Magellan Linux

Contents of /trunk/kernel26-alx/patches-2.6.20-r6/0116-2.6.20.16-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1175 - (show annotations) (download)
Thu Oct 14 12:15:46 2010 UTC (13 years, 6 months ago) by niro
File size: 34527 byte(s)
-2.6.20-alx-r6 new magellan 0.5.2 kernel
1 diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
2 index 5e47683..9bf056e 100644
3 --- a/arch/i386/kernel/entry.S
4 +++ b/arch/i386/kernel/entry.S
5 @@ -367,10 +367,6 @@ ENTRY(system_call)
6 CFI_ADJUST_CFA_OFFSET 4
7 SAVE_ALL
8 GET_THREAD_INFO(%ebp)
9 - testl $TF_MASK,PT_EFLAGS(%esp)
10 - jz no_singlestep
11 - orl $_TIF_SINGLESTEP,TI_flags(%ebp)
12 -no_singlestep:
13 # system call tracing in operation / emulation
14 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
15 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
16 @@ -385,6 +381,10 @@ syscall_exit:
17 # setting need_resched or sigpending
18 # between sampling and the iret
19 TRACE_IRQS_OFF
20 + testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
21 + jz no_singlestep
22 + orl $_TIF_SINGLESTEP,TI_flags(%ebp)
23 +no_singlestep:
24 movl TI_flags(%ebp), %ecx
25 testw $_TIF_ALLWORK_MASK, %cx # current->work
26 jne syscall_exit_work
27 diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
28 index 3700eef..be4a9a8 100644
29 --- a/arch/i386/oprofile/nmi_int.c
30 +++ b/arch/i386/oprofile/nmi_int.c
31 @@ -131,7 +131,6 @@ static void nmi_save_registers(void * dummy)
32 {
33 int cpu = smp_processor_id();
34 struct op_msrs * msrs = &cpu_msrs[cpu];
35 - model->fill_in_addresses(msrs);
36 nmi_cpu_save_registers(msrs);
37 }
38
39 @@ -195,6 +194,7 @@ static struct notifier_block profile_exceptions_nb = {
40 static int nmi_setup(void)
41 {
42 int err=0;
43 + int cpu;
44
45 if (!allocate_msrs())
46 return -ENOMEM;
47 @@ -207,6 +207,13 @@ static int nmi_setup(void)
48 /* We need to serialize save and setup for HT because the subset
49 * of msrs are distinct for save and setup operations
50 */
51 +
52 + /* Assume saved/restored counters are the same on all CPUs */
53 + model->fill_in_addresses(&cpu_msrs[0]);
54 + for_each_possible_cpu (cpu) {
55 + if (cpu != 0)
56 + cpu_msrs[cpu] = cpu_msrs[0];
57 + }
58 on_each_cpu(nmi_save_registers, NULL, 0, 1);
59 on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
60 nmi_enabled = 1;
61 diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
62 index f72e8e8..a84304e 100644
63 --- a/arch/powerpc/kernel/signal_64.c
64 +++ b/arch/powerpc/kernel/signal_64.c
65 @@ -177,6 +177,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
66 */
67 discard_lazy_cpu_state();
68
69 + /*
70 + * Force reload of FP/VEC.
71 + * This has to be done before copying stuff into current->thread.fpr/vr
72 + * for the reasons explained in the previous comment.
73 + */
74 + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC);
75 +
76 err |= __copy_from_user(&current->thread.fpr, &sc->fp_regs, FP_REGS_SIZE);
77
78 #ifdef CONFIG_ALTIVEC
79 @@ -198,9 +205,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
80 current->thread.vrsave = 0;
81 #endif /* CONFIG_ALTIVEC */
82
83 - /* Force reload of FP/VEC */
84 - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC);
85 -
86 return err;
87 }
88
89 diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
90 index 2968b90..e67cc4f 100644
91 --- a/arch/x86_64/mm/init.c
92 +++ b/arch/x86_64/mm/init.c
93 @@ -72,6 +72,8 @@ void show_mem(void)
94
95 for_each_online_pgdat(pgdat) {
96 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
97 + if (!pfn_valid(pgdat->node_start_pfn + i))
98 + continue;
99 page = pfn_to_page(pgdat->node_start_pfn + i);
100 total++;
101 if (PageReserved(page))
102 @@ -766,3 +768,9 @@ int in_gate_area_no_task(unsigned long addr)
103 {
104 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
105 }
106 +
107 +void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
108 +{
109 + return __alloc_bootmem_core(pgdat->bdata, size,
110 + SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
111 +}
112 diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
113 index 3ffa080..e4e0ccb 100644
114 --- a/drivers/char/cyclades.c
115 +++ b/drivers/char/cyclades.c
116 @@ -1102,6 +1102,7 @@ static void cyy_intr_chip(struct cyclades_card *cinfo, int chip,
117
118 if (data & info->ignore_status_mask) {
119 info->icount.rx++;
120 + spin_unlock(&cinfo->card_lock);
121 return;
122 }
123 if (tty_buffer_request_room(tty, 1)) {
124 diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
125 index cef1287..550ac72 100644
126 --- a/drivers/md/bitmap.c
127 +++ b/drivers/md/bitmap.c
128 @@ -255,19 +255,25 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
129
130 }
131
132 -static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait)
133 +static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
134 {
135 mdk_rdev_t *rdev;
136 struct list_head *tmp;
137 + mddev_t *mddev = bitmap->mddev;
138
139 ITERATE_RDEV(mddev, rdev, tmp)
140 if (test_bit(In_sync, &rdev->flags)
141 - && !test_bit(Faulty, &rdev->flags))
142 + && !test_bit(Faulty, &rdev->flags)) {
143 + int size = PAGE_SIZE;
144 + if (page->index == bitmap->file_pages-1)
145 + size = roundup(bitmap->last_page_size,
146 + bdev_hardsect_size(rdev->bdev));
147 md_super_write(mddev, rdev,
148 - (rdev->sb_offset<<1) + offset
149 + (rdev->sb_offset<<1) + bitmap->offset
150 + page->index * (PAGE_SIZE/512),
151 - PAGE_SIZE,
152 + size,
153 page);
154 + }
155
156 if (wait)
157 md_super_wait(mddev);
158 @@ -282,7 +288,7 @@ static int write_page(struct bitmap *bitmap, struct page *page, int wait)
159 struct buffer_head *bh;
160
161 if (bitmap->file == NULL)
162 - return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
163 + return write_sb_page(bitmap, page, wait);
164
165 bh = page_buffers(page);
166
167 @@ -923,6 +929,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
168 }
169
170 bitmap->filemap[bitmap->file_pages++] = page;
171 + bitmap->last_page_size = count;
172 }
173 paddr = kmap_atomic(page, KM_USER0);
174 if (bitmap->flags & BITMAP_HOSTENDIAN)
175 diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
176 index 4c2471e..b9ff4e3 100644
177 --- a/drivers/md/dm-crypt.c
178 +++ b/drivers/md/dm-crypt.c
179 @@ -33,7 +33,6 @@
180 struct crypt_io {
181 struct dm_target *target;
182 struct bio *base_bio;
183 - struct bio *first_clone;
184 struct work_struct work;
185 atomic_t pending;
186 int error;
187 @@ -107,6 +106,8 @@ struct crypt_config {
188
189 static struct kmem_cache *_crypt_io_pool;
190
191 +static void clone_init(struct crypt_io *, struct bio *);
192 +
193 /*
194 * Different IV generation algorithms:
195 *
196 @@ -378,25 +379,20 @@ static int crypt_convert(struct crypt_config *cc,
197 * This should never violate the device limitations
198 * May return a smaller bio when running out of pages
199 */
200 -static struct bio *
201 -crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
202 - struct bio *base_bio, unsigned int *bio_vec_idx)
203 +static struct bio *crypt_alloc_buffer(struct crypt_io *io, unsigned int size,
204 + unsigned int *bio_vec_idx)
205 {
206 + struct crypt_config *cc = io->target->private;
207 struct bio *clone;
208 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
209 gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
210 unsigned int i;
211
212 - if (base_bio) {
213 - clone = bio_alloc_bioset(GFP_NOIO, base_bio->bi_max_vecs, cc->bs);
214 - __bio_clone(clone, base_bio);
215 - } else
216 - clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
217 -
218 + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
219 if (!clone)
220 return NULL;
221
222 - clone->bi_destructor = dm_crypt_bio_destructor;
223 + clone_init(io, clone);
224
225 /* if the last bio was not complete, continue where that one ended */
226 clone->bi_idx = *bio_vec_idx;
227 @@ -495,9 +491,6 @@ static void dec_pending(struct crypt_io *io, int error)
228 if (!atomic_dec_and_test(&io->pending))
229 return;
230
231 - if (io->first_clone)
232 - bio_put(io->first_clone);
233 -
234 bio_endio(io->base_bio, io->base_bio->bi_size, io->error);
235
236 mempool_free(io, cc->io_pool);
237 @@ -562,6 +555,7 @@ static void clone_init(struct crypt_io *io, struct bio *clone)
238 clone->bi_end_io = crypt_endio;
239 clone->bi_bdev = cc->dev->bdev;
240 clone->bi_rw = io->base_bio->bi_rw;
241 + clone->bi_destructor = dm_crypt_bio_destructor;
242 }
243
244 static void process_read(struct crypt_io *io)
245 @@ -585,7 +579,6 @@ static void process_read(struct crypt_io *io)
246 }
247
248 clone_init(io, clone);
249 - clone->bi_destructor = dm_crypt_bio_destructor;
250 clone->bi_idx = 0;
251 clone->bi_vcnt = bio_segments(base_bio);
252 clone->bi_size = base_bio->bi_size;
253 @@ -615,8 +608,7 @@ static void process_write(struct crypt_io *io)
254 * so repeat the whole process until all the data can be handled.
255 */
256 while (remaining) {
257 - clone = crypt_alloc_buffer(cc, base_bio->bi_size,
258 - io->first_clone, &bvec_idx);
259 + clone = crypt_alloc_buffer(io, base_bio->bi_size, &bvec_idx);
260 if (unlikely(!clone)) {
261 dec_pending(io, -ENOMEM);
262 return;
263 @@ -631,31 +623,23 @@ static void process_write(struct crypt_io *io)
264 return;
265 }
266
267 - clone_init(io, clone);
268 clone->bi_sector = cc->start + sector;
269 -
270 - if (!io->first_clone) {
271 - /*
272 - * hold a reference to the first clone, because it
273 - * holds the bio_vec array and that can't be freed
274 - * before all other clones are released
275 - */
276 - bio_get(clone);
277 - io->first_clone = clone;
278 - }
279 -
280 remaining -= clone->bi_size;
281 sector += bio_sectors(clone);
282
283 - /* prevent bio_put of first_clone */
284 + /* Grab another reference to the io struct
285 + * before we kick off the request */
286 if (remaining)
287 atomic_inc(&io->pending);
288
289 generic_make_request(clone);
290
291 + /* Do not reference clone after this - it
292 + * may be gone already. */
293 +
294 /* out of memory -> run queues */
295 if (remaining)
296 - congestion_wait(bio_data_dir(clone), HZ/100);
297 + congestion_wait(WRITE, HZ/100);
298 }
299 }
300
301 @@ -954,10 +938,12 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
302 struct crypt_config *cc = ti->private;
303 struct crypt_io *io;
304
305 + if (bio_barrier(bio))
306 + return -EOPNOTSUPP;
307 +
308 io = mempool_alloc(cc->io_pool, GFP_NOIO);
309 io->target = ti;
310 io->base_bio = bio;
311 - io->first_clone = NULL;
312 io->error = io->post_process = 0;
313 atomic_set(&io->pending, 0);
314 kcryptd_queue_io(io);
315 diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
316 index dfe3214..2c404f7 100644
317 --- a/drivers/md/raid0.c
318 +++ b/drivers/md/raid0.c
319 @@ -415,7 +415,7 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
320 raid0_conf_t *conf = mddev_to_conf(mddev);
321 struct strip_zone *zone;
322 mdk_rdev_t *tmp_dev;
323 - unsigned long chunk;
324 + sector_t chunk;
325 sector_t block, rsect;
326 const int rw = bio_data_dir(bio);
327
328 @@ -470,7 +470,6 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
329
330 sector_div(x, zone->nb_dev);
331 chunk = x;
332 - BUG_ON(x != (sector_t)chunk);
333
334 x = block >> chunksize_bits;
335 tmp_dev = zone->dev[sector_div(x, zone->nb_dev)];
336 diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
337 index 97ee870..b20c6e9 100644
338 --- a/drivers/md/raid1.c
339 +++ b/drivers/md/raid1.c
340 @@ -1235,17 +1235,24 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
341 }
342 r1_bio->read_disk = primary;
343 for (i=0; i<mddev->raid_disks; i++)
344 - if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
345 - test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
346 + if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
347 int j;
348 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
349 struct bio *pbio = r1_bio->bios[primary];
350 struct bio *sbio = r1_bio->bios[i];
351 - for (j = vcnt; j-- ; )
352 - if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
353 - page_address(sbio->bi_io_vec[j].bv_page),
354 - PAGE_SIZE))
355 - break;
356 +
357 + if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
358 + for (j = vcnt; j-- ; ) {
359 + struct page *p, *s;
360 + p = pbio->bi_io_vec[j].bv_page;
361 + s = sbio->bi_io_vec[j].bv_page;
362 + if (memcmp(page_address(p),
363 + page_address(s),
364 + PAGE_SIZE))
365 + break;
366 + }
367 + } else
368 + j = 0;
369 if (j >= 0)
370 mddev->resync_mismatches += r1_bio->sectors;
371 if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
372 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
373 index 82249a6..9eb66c1 100644
374 --- a/drivers/md/raid10.c
375 +++ b/drivers/md/raid10.c
376 @@ -1867,6 +1867,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
377 int d = r10_bio->devs[i].devnum;
378 bio = r10_bio->devs[i].bio;
379 bio->bi_end_io = NULL;
380 + clear_bit(BIO_UPTODATE, &bio->bi_flags);
381 if (conf->mirrors[d].rdev == NULL ||
382 test_bit(Faulty, &conf->mirrors[d].rdev->flags))
383 continue;
384 @@ -2037,6 +2038,11 @@ static int run(mddev_t *mddev)
385 /* 'size' is now the number of chunks in the array */
386 /* calculate "used chunks per device" in 'stride' */
387 stride = size * conf->copies;
388 +
389 + /* We need to round up when dividing by raid_disks to
390 + * get the stride size.
391 + */
392 + stride += conf->raid_disks - 1;
393 sector_div(stride, conf->raid_disks);
394 mddev->size = stride << (conf->chunk_shift-1);
395
396 diff --git a/drivers/media/video/saa7134/saa7134-tvaudio.c b/drivers/media/video/saa7134/saa7134-tvaudio.c
397 index dd759d6..36b3fa3 100644
398 --- a/drivers/media/video/saa7134/saa7134-tvaudio.c
399 +++ b/drivers/media/video/saa7134/saa7134-tvaudio.c
400 @@ -1006,7 +1006,7 @@ int saa7134_tvaudio_init2(struct saa7134_dev *dev)
401 int saa7134_tvaudio_fini(struct saa7134_dev *dev)
402 {
403 /* shutdown tvaudio thread */
404 - if (dev->thread.pid >= 0) {
405 + if (dev->thread.pid > 0) {
406 dev->thread.shutdown = 1;
407 wake_up_interruptible(&dev->thread.wq);
408 wait_for_completion(&dev->thread.exit);
409 diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
410 index c6259c7..40bdcf9 100644
411 --- a/drivers/net/e1000/e1000_main.c
412 +++ b/drivers/net/e1000/e1000_main.c
413 @@ -1157,13 +1157,16 @@ e1000_probe(struct pci_dev *pdev,
414 !e1000_check_mng_mode(&adapter->hw))
415 e1000_get_hw_control(adapter);
416
417 - strcpy(netdev->name, "eth%d");
418 - if ((err = register_netdev(netdev)))
419 - goto err_register;
420 -
421 /* tell the stack to leave us alone until e1000_open() is called */
422 netif_carrier_off(netdev);
423 netif_stop_queue(netdev);
424 +#ifdef CONFIG_E1000_NAPI
425 + netif_poll_disable(netdev);
426 +#endif
427 +
428 + strcpy(netdev->name, "eth%d");
429 + if ((err = register_netdev(netdev)))
430 + goto err_register;
431
432 DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n");
433
434 diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
435 index 38e75cf..aec8c59 100644
436 --- a/drivers/net/sky2.c
437 +++ b/drivers/net/sky2.c
438 @@ -95,7 +95,7 @@ static int disable_msi = 0;
439 module_param(disable_msi, int, 0);
440 MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
441
442 -static int idle_timeout = 0;
443 +static int idle_timeout = 100;
444 module_param(idle_timeout, int, 0);
445 MODULE_PARM_DESC(idle_timeout, "Watchdog timer for lost interrupts (ms)");
446
447 @@ -2341,6 +2341,13 @@ static int sky2_poll(struct net_device *dev0, int *budget)
448
449 work_done = sky2_status_intr(hw, work_limit);
450 if (work_done < work_limit) {
451 + /* Bug/Errata workaround?
452 + * Need to kick the TX irq moderation timer.
453 + */
454 + if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) {
455 + sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP);
456 + sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START);
457 + }
458 netif_rx_complete(dev0);
459
460 sky2_read32(hw, B0_Y2_SP_LISR);
461 diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c
462 index 3d2fcc5..64ed5ef 100644
463 --- a/drivers/serial/mpsc.c
464 +++ b/drivers/serial/mpsc.c
465 @@ -502,7 +502,8 @@ mpsc_sdma_intr_ack(struct mpsc_port_info *pi)
466
467 if (pi->mirror_regs)
468 pi->shared_regs->SDMA_INTR_CAUSE_m = 0;
469 - writel(0, pi->shared_regs->sdma_intr_base + SDMA_INTR_CAUSE);
470 + writeb(0x00, pi->shared_regs->sdma_intr_base + SDMA_INTR_CAUSE +
471 + pi->port.line);
472 return;
473 }
474
475 diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
476 index 2275f27..8f820e4 100644
477 --- a/include/linux/bootmem.h
478 +++ b/include/linux/bootmem.h
479 @@ -59,6 +59,7 @@ extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
480 unsigned long align,
481 unsigned long goal,
482 unsigned long limit);
483 +extern void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size);
484
485 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
486 extern void reserve_bootmem(unsigned long addr, unsigned long size);
487 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
488 index d37f46a..f768e10 100644
489 --- a/include/linux/pci_ids.h
490 +++ b/include/linux/pci_ids.h
491 @@ -2235,11 +2235,11 @@
492 #define PCI_DEVICE_ID_INTEL_ICH8_5 0x283e
493 #define PCI_DEVICE_ID_INTEL_ICH8_6 0x2850
494 #define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910
495 -#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2911
496 +#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917
497 #define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912
498 #define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913
499 #define PCI_DEVICE_ID_INTEL_ICH9_4 0x2914
500 -#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2915
501 +#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919
502 #define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930
503 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
504 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
505 diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
506 index 6db9a4c..dd5a05d 100644
507 --- a/include/linux/raid/bitmap.h
508 +++ b/include/linux/raid/bitmap.h
509 @@ -232,6 +232,7 @@ struct bitmap {
510 struct page **filemap; /* list of cache pages for the file */
511 unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
512 unsigned long file_pages; /* number of pages in the file */
513 + int last_page_size; /* bytes in the last page */
514
515 unsigned long flags;
516
517 diff --git a/include/linux/sched.h b/include/linux/sched.h
518 index 4463735..7a0cc67 100644
519 --- a/include/linux/sched.h
520 +++ b/include/linux/sched.h
521 @@ -1137,6 +1137,7 @@ static inline void put_task_struct(struct task_struct *t)
522 /* Not implemented yet, only for 486*/
523 #define PF_STARTING 0x00000002 /* being created */
524 #define PF_EXITING 0x00000004 /* getting shut down */
525 +#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
526 #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
527 #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
528 #define PF_DUMPCORE 0x00000200 /* dumped core */
529 diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
530 index 2a7b38d..1a76bda 100644
531 --- a/include/linux/workqueue.h
532 +++ b/include/linux/workqueue.h
533 @@ -162,7 +162,7 @@ extern struct workqueue_struct *__create_workqueue(const char *name,
534 int singlethread,
535 int freezeable);
536 #define create_workqueue(name) __create_workqueue((name), 0, 0)
537 -#define create_freezeable_workqueue(name) __create_workqueue((name), 0, 1)
538 +#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
539 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
540
541 extern void destroy_workqueue(struct workqueue_struct *wq);
542 diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
543 index 9c8c232..5a75657 100644
544 --- a/kernel/auditfilter.c
545 +++ b/kernel/auditfilter.c
546 @@ -905,7 +905,7 @@ static void audit_update_watch(struct audit_parent *parent,
547
548 /* If the update involves invalidating rules, do the inode-based
549 * filtering now, so we don't omit records. */
550 - if (invalidating &&
551 + if (invalidating && current->audit_context &&
552 audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
553 audit_set_auditable(current->audit_context);
554
555 diff --git a/kernel/exit.c b/kernel/exit.c
556 index fec12eb..d306845 100644
557 --- a/kernel/exit.c
558 +++ b/kernel/exit.c
559 @@ -883,13 +883,29 @@ fastcall NORET_TYPE void do_exit(long code)
560 if (unlikely(tsk->flags & PF_EXITING)) {
561 printk(KERN_ALERT
562 "Fixing recursive fault but reboot is needed!\n");
563 + /*
564 + * We can do this unlocked here. The futex code uses
565 + * this flag just to verify whether the pi state
566 + * cleanup has been done or not. In the worst case it
567 + * loops once more. We pretend that the cleanup was
568 + * done as there is no way to return. Either the
569 + * OWNER_DIED bit is set by now or we push the blocked
570 + * task into the wait for ever nirwana as well.
571 + */
572 + tsk->flags |= PF_EXITPIDONE;
573 if (tsk->io_context)
574 exit_io_context();
575 set_current_state(TASK_UNINTERRUPTIBLE);
576 schedule();
577 }
578
579 + /*
580 + * tsk->flags are checked in the futex code to protect against
581 + * an exiting task cleaning up the robust pi futexes.
582 + */
583 + spin_lock_irq(&tsk->pi_lock);
584 tsk->flags |= PF_EXITING;
585 + spin_unlock_irq(&tsk->pi_lock);
586
587 if (unlikely(in_atomic()))
588 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
589 @@ -956,6 +972,12 @@ fastcall NORET_TYPE void do_exit(long code)
590 * Make sure we are holding no locks:
591 */
592 debug_check_no_locks_held(tsk);
593 + /*
594 + * We can do this unlocked here. The futex code uses this flag
595 + * just to verify whether the pi state cleanup has been done
596 + * or not. In the worst case it loops once more.
597 + */
598 + tsk->flags |= PF_EXITPIDONE;
599
600 if (tsk->io_context)
601 exit_io_context();
602 diff --git a/kernel/futex.c b/kernel/futex.c
603 index 1df411e..99dad33 100644
604 --- a/kernel/futex.c
605 +++ b/kernel/futex.c
606 @@ -390,18 +390,12 @@ static struct task_struct * futex_find_get_task(pid_t pid)
607
608 rcu_read_lock();
609 p = find_task_by_pid(pid);
610 - if (!p)
611 - goto out_unlock;
612 - if ((current->euid != p->euid) && (current->euid != p->uid)) {
613 - p = NULL;
614 - goto out_unlock;
615 - }
616 - if (p->exit_state != 0) {
617 - p = NULL;
618 - goto out_unlock;
619 - }
620 - get_task_struct(p);
621 -out_unlock:
622 +
623 + if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
624 + p = ERR_PTR(-ESRCH);
625 + else
626 + get_task_struct(p);
627 +
628 rcu_read_unlock();
629
630 return p;
631 @@ -467,7 +461,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
632 struct futex_q *this, *next;
633 struct list_head *head;
634 struct task_struct *p;
635 - pid_t pid;
636 + pid_t pid = uval & FUTEX_TID_MASK;
637
638 head = &hb->chain;
639
640 @@ -485,6 +479,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
641 return -EINVAL;
642
643 WARN_ON(!atomic_read(&pi_state->refcount));
644 + WARN_ON(pid && pi_state->owner &&
645 + pi_state->owner->pid != pid);
646
647 atomic_inc(&pi_state->refcount);
648 me->pi_state = pi_state;
649 @@ -495,15 +491,33 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
650
651 /*
652 * We are the first waiter - try to look up the real owner and attach
653 - * the new pi_state to it, but bail out when the owner died bit is set
654 - * and TID = 0:
655 + * the new pi_state to it, but bail out when TID = 0
656 */
657 - pid = uval & FUTEX_TID_MASK;
658 - if (!pid && (uval & FUTEX_OWNER_DIED))
659 + if (!pid)
660 return -ESRCH;
661 p = futex_find_get_task(pid);
662 - if (!p)
663 - return -ESRCH;
664 + if (IS_ERR(p))
665 + return PTR_ERR(p);
666 +
667 + /*
668 + * We need to look at the task state flags to figure out,
669 + * whether the task is exiting. To protect against the do_exit
670 + * change of the task flags, we do this protected by
671 + * p->pi_lock:
672 + */
673 + spin_lock_irq(&p->pi_lock);
674 + if (unlikely(p->flags & PF_EXITING)) {
675 + /*
676 + * The task is on the way out. When PF_EXITPIDONE is
677 + * set, we know that the task has finished the
678 + * cleanup:
679 + */
680 + int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
681 +
682 + spin_unlock_irq(&p->pi_lock);
683 + put_task_struct(p);
684 + return ret;
685 + }
686
687 pi_state = alloc_pi_state();
688
689 @@ -516,7 +530,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
690 /* Store the key for possible exit cleanups: */
691 pi_state->key = me->key;
692
693 - spin_lock_irq(&p->pi_lock);
694 WARN_ON(!list_empty(&pi_state->list));
695 list_add(&pi_state->list, &p->pi_state_list);
696 pi_state->owner = p;
697 @@ -583,15 +596,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
698 * preserve the owner died bit.)
699 */
700 if (!(uval & FUTEX_OWNER_DIED)) {
701 + int ret = 0;
702 +
703 newval = FUTEX_WAITERS | new_owner->pid;
704
705 pagefault_disable();
706 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
707 pagefault_enable();
708 +
709 if (curval == -EFAULT)
710 - return -EFAULT;
711 + ret = -EFAULT;
712 if (curval != uval)
713 - return -EINVAL;
714 + ret = -EINVAL;
715 + if (ret) {
716 + spin_unlock(&pi_state->pi_mutex.wait_lock);
717 + return ret;
718 + }
719 }
720
721 spin_lock_irq(&pi_state->owner->pi_lock);
722 @@ -1149,6 +1169,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
723 if (unlikely(ret != 0))
724 goto out_release_sem;
725
726 + retry_unlocked:
727 hb = queue_lock(&q, -1, NULL);
728
729 retry_locked:
730 @@ -1200,34 +1221,58 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
731 ret = lookup_pi_state(uval, hb, &q);
732
733 if (unlikely(ret)) {
734 - /*
735 - * There were no waiters and the owner task lookup
736 - * failed. When the OWNER_DIED bit is set, then we
737 - * know that this is a robust futex and we actually
738 - * take the lock. This is safe as we are protected by
739 - * the hash bucket lock. We also set the waiters bit
740 - * unconditionally here, to simplify glibc handling of
741 - * multiple tasks racing to acquire the lock and
742 - * cleanup the problems which were left by the dead
743 - * owner.
744 - */
745 - if (curval & FUTEX_OWNER_DIED) {
746 - uval = newval;
747 - newval = current->pid |
748 - FUTEX_OWNER_DIED | FUTEX_WAITERS;
749 + switch (ret) {
750
751 - pagefault_disable();
752 - curval = futex_atomic_cmpxchg_inatomic(uaddr,
753 - uval, newval);
754 - pagefault_enable();
755 + case -EAGAIN:
756 + /*
757 + * Task is exiting and we just wait for the
758 + * exit to complete.
759 + */
760 + queue_unlock(&q, hb);
761 + up_read(&curr->mm->mmap_sem);
762 + cond_resched();
763 + goto retry;
764
765 - if (unlikely(curval == -EFAULT))
766 + case -ESRCH:
767 + /*
768 + * No owner found for this futex. Check if the
769 + * OWNER_DIED bit is set to figure out whether
770 + * this is a robust futex or not.
771 + */
772 + if (get_futex_value_locked(&curval, uaddr))
773 goto uaddr_faulted;
774 - if (unlikely(curval != uval))
775 - goto retry_locked;
776 - ret = 0;
777 +
778 + /*
779 + * There were no waiters and the owner task lookup
780 + * failed. When the OWNER_DIED bit is set, then we
781 + * know that this is a robust futex and we actually
782 + * take the lock. This is safe as we are protected by
783 + * the hash bucket lock. We also set the waiters bit
784 + * unconditionally here, to simplify glibc handling of
785 + * multiple tasks racing to acquire the lock and
786 + * cleanup the problems which were left by the dead
787 + * owner.
788 + */
789 + if (curval & FUTEX_OWNER_DIED) {
790 + uval = newval;
791 + newval = current->pid |
792 + FUTEX_OWNER_DIED | FUTEX_WAITERS;
793 +
794 + pagefault_disable();
795 + curval = futex_atomic_cmpxchg_inatomic(uaddr,
796 + uval,
797 + newval);
798 + pagefault_enable();
799 +
800 + if (unlikely(curval == -EFAULT))
801 + goto uaddr_faulted;
802 + if (unlikely(curval != uval))
803 + goto retry_locked;
804 + ret = 0;
805 + }
806 + default:
807 + goto out_unlock_release_sem;
808 }
809 - goto out_unlock_release_sem;
810 }
811
812 /*
813 @@ -1279,39 +1324,52 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
814 list_add(&q.pi_state->list, &current->pi_state_list);
815 spin_unlock_irq(&current->pi_lock);
816
817 - /* Unqueue and drop the lock */
818 - unqueue_me_pi(&q, hb);
819 - up_read(&curr->mm->mmap_sem);
820 /*
821 * We own it, so we have to replace the pending owner
822 - * TID. This must be atomic as we have preserve the
823 + * TID. This must be atomic as we have to preserve the
824 * owner died bit here.
825 */
826 - ret = get_user(uval, uaddr);
827 + ret = get_futex_value_locked(&uval, uaddr);
828 while (!ret) {
829 newval = (uval & FUTEX_OWNER_DIED) | newtid;
830 +
831 + pagefault_disable();
832 curval = futex_atomic_cmpxchg_inatomic(uaddr,
833 uval, newval);
834 + pagefault_enable();
835 +
836 if (curval == -EFAULT)
837 ret = -EFAULT;
838 if (curval == uval)
839 break;
840 uval = curval;
841 }
842 - } else {
843 + } else if (ret) {
844 /*
845 * Catch the rare case, where the lock was released
846 * when we were on the way back before we locked
847 * the hash bucket.
848 */
849 - if (ret && q.pi_state->owner == curr) {
850 - if (rt_mutex_trylock(&q.pi_state->pi_mutex))
851 - ret = 0;
852 + if (q.pi_state->owner == curr &&
853 + rt_mutex_trylock(&q.pi_state->pi_mutex)) {
854 + ret = 0;
855 + } else {
856 + /*
857 + * Paranoia check. If we did not take the lock
858 + * in the trylock above, then we should not be
859 + * the owner of the rtmutex, neither the real
860 + * nor the pending one:
861 + */
862 + if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
863 + printk(KERN_ERR "futex_lock_pi: ret = %d "
864 + "pi-mutex: %p pi-state %p\n", ret,
865 + q.pi_state->pi_mutex.owner,
866 + q.pi_state->owner);
867 }
868 - /* Unqueue and drop the lock */
869 - unqueue_me_pi(&q, hb);
870 - up_read(&curr->mm->mmap_sem);
871 }
872 + /* Unqueue and drop the lock */
873 + unqueue_me_pi(&q, hb);
874 + up_read(&curr->mm->mmap_sem);
875
876 if (!detect && ret == -EDEADLK && 0)
877 force_sig(SIGKILL, current);
878 @@ -1331,16 +1389,18 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
879 * non-atomically. Therefore, if get_user below is not
880 * enough, we need to handle the fault ourselves, while
881 * still holding the mmap_sem.
882 + *
883 + * ... and hb->lock. :-) --ANK
884 */
885 + queue_unlock(&q, hb);
886 +
887 if (attempt++) {
888 - if (futex_handle_fault((unsigned long)uaddr, attempt)) {
889 - ret = -EFAULT;
890 - goto out_unlock_release_sem;
891 - }
892 - goto retry_locked;
893 + ret = futex_handle_fault((unsigned long)uaddr, attempt);
894 + if (ret)
895 + goto out_release_sem;
896 + goto retry_unlocked;
897 }
898
899 - queue_unlock(&q, hb);
900 up_read(&curr->mm->mmap_sem);
901
902 ret = get_user(uval, uaddr);
903 @@ -1382,9 +1442,9 @@ retry:
904 goto out;
905
906 hb = hash_futex(&key);
907 +retry_unlocked:
908 spin_lock(&hb->lock);
909
910 -retry_locked:
911 /*
912 * To avoid races, try to do the TID -> 0 atomic transition
913 * again. If it succeeds then we can return without waking
914 @@ -1446,16 +1506,17 @@ pi_faulted:
915 * non-atomically. Therefore, if get_user below is not
916 * enough, we need to handle the fault ourselves, while
917 * still holding the mmap_sem.
918 + *
919 + * ... and hb->lock. :-) --ANK
920 */
921 + spin_unlock(&hb->lock);
922 +
923 if (attempt++) {
924 - if (futex_handle_fault((unsigned long)uaddr, attempt)) {
925 - ret = -EFAULT;
926 - goto out_unlock;
927 - }
928 - goto retry_locked;
929 + ret = futex_handle_fault((unsigned long)uaddr, attempt);
930 + if (ret)
931 + goto out;
932 + goto retry_unlocked;
933 }
934 -
935 - spin_unlock(&hb->lock);
936 up_read(&current->mm->mmap_sem);
937
938 ret = get_user(uval, uaddr);
939 diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
940 index 4ab17da..dd5feae 100644
941 --- a/kernel/rtmutex.c
942 +++ b/kernel/rtmutex.c
943 @@ -212,6 +212,19 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
944 if (!waiter || !waiter->task)
945 goto out_unlock_pi;
946
947 + /*
948 + * Check the orig_waiter state. After we dropped the locks,
949 + * the previous owner of the lock might have released the lock
950 + * and made us the pending owner:
951 + */
952 + if (orig_waiter && !orig_waiter->task)
953 + goto out_unlock_pi;
954 +
955 + /*
956 + * Drop out, when the task has no waiters. Note,
957 + * top_waiter can be NULL, when we are in the deboosting
958 + * mode!
959 + */
960 if (top_waiter && (!task_has_pi_waiters(task) ||
961 top_waiter != task_top_pi_waiter(task)))
962 goto out_unlock_pi;
963 @@ -659,9 +672,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
964 * all over without going into schedule to try
965 * to get the lock now:
966 */
967 - if (unlikely(!waiter.task))
968 + if (unlikely(!waiter.task)) {
969 + /*
970 + * Reset the return value. We might
971 + * have returned with -EDEADLK and the
972 + * owner released the lock while we
973 + * were walking the pi chain.
974 + */
975 + ret = 0;
976 continue;
977 -
978 + }
979 if (unlikely(ret))
980 break;
981 }
982 diff --git a/kernel/sched.c b/kernel/sched.c
983 index 62db30c..907ab05 100644
984 --- a/kernel/sched.c
985 +++ b/kernel/sched.c
986 @@ -2814,17 +2814,21 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
987 unsigned long next_balance = jiffies + 60 * HZ;
988
989 for_each_domain(this_cpu, sd) {
990 - if (sd->flags & SD_BALANCE_NEWIDLE) {
991 + unsigned long interval;
992 +
993 + if (!(sd->flags & SD_LOAD_BALANCE))
994 + continue;
995 +
996 + if (sd->flags & SD_BALANCE_NEWIDLE)
997 /* If we've pulled tasks over stop searching: */
998 pulled_task = load_balance_newidle(this_cpu,
999 - this_rq, sd);
1000 - if (time_after(next_balance,
1001 - sd->last_balance + sd->balance_interval))
1002 - next_balance = sd->last_balance
1003 - + sd->balance_interval;
1004 - if (pulled_task)
1005 - break;
1006 - }
1007 + this_rq, sd);
1008 +
1009 + interval = msecs_to_jiffies(sd->balance_interval);
1010 + if (time_after(next_balance, sd->last_balance + interval))
1011 + next_balance = sd->last_balance + interval;
1012 + if (pulled_task)
1013 + break;
1014 }
1015 if (!pulled_task)
1016 /*
1017 diff --git a/mm/rmap.c b/mm/rmap.c
1018 index 7ce69c1..c30781c 100644
1019 --- a/mm/rmap.c
1020 +++ b/mm/rmap.c
1021 @@ -53,24 +53,6 @@
1022
1023 struct kmem_cache *anon_vma_cachep;
1024
1025 -static inline void validate_anon_vma(struct vm_area_struct *find_vma)
1026 -{
1027 -#ifdef CONFIG_DEBUG_VM
1028 - struct anon_vma *anon_vma = find_vma->anon_vma;
1029 - struct vm_area_struct *vma;
1030 - unsigned int mapcount = 0;
1031 - int found = 0;
1032 -
1033 - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
1034 - mapcount++;
1035 - BUG_ON(mapcount > 100000);
1036 - if (vma == find_vma)
1037 - found = 1;
1038 - }
1039 - BUG_ON(!found);
1040 -#endif
1041 -}
1042 -
1043 /* This must be called under the mmap_sem. */
1044 int anon_vma_prepare(struct vm_area_struct *vma)
1045 {
1046 @@ -121,10 +103,8 @@ void __anon_vma_link(struct vm_area_struct *vma)
1047 {
1048 struct anon_vma *anon_vma = vma->anon_vma;
1049
1050 - if (anon_vma) {
1051 + if (anon_vma)
1052 list_add_tail(&vma->anon_vma_node, &anon_vma->head);
1053 - validate_anon_vma(vma);
1054 - }
1055 }
1056
1057 void anon_vma_link(struct vm_area_struct *vma)
1058 @@ -134,7 +114,6 @@ void anon_vma_link(struct vm_area_struct *vma)
1059 if (anon_vma) {
1060 spin_lock(&anon_vma->lock);
1061 list_add_tail(&vma->anon_vma_node, &anon_vma->head);
1062 - validate_anon_vma(vma);
1063 spin_unlock(&anon_vma->lock);
1064 }
1065 }
1066 @@ -148,7 +127,6 @@ void anon_vma_unlink(struct vm_area_struct *vma)
1067 return;
1068
1069 spin_lock(&anon_vma->lock);
1070 - validate_anon_vma(vma);
1071 list_del(&vma->anon_vma_node);
1072
1073 /* We must garbage collect the anon_vma if it's empty */
1074 diff --git a/mm/sparse.c b/mm/sparse.c
1075 index ac26eb0..faa08e2 100644
1076 --- a/mm/sparse.c
1077 +++ b/mm/sparse.c
1078 @@ -209,6 +209,12 @@ static int sparse_init_one_section(struct mem_section *ms,
1079 return 1;
1080 }
1081
1082 +__attribute__((weak))
1083 +void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
1084 +{
1085 + return NULL;
1086 +}
1087 +
1088 static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
1089 {
1090 struct page *map;
1091 @@ -219,6 +225,11 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
1092 if (map)
1093 return map;
1094
1095 + map = alloc_bootmem_high_node(NODE_DATA(nid),
1096 + sizeof(struct page) * PAGES_PER_SECTION);
1097 + if (map)
1098 + return map;
1099 +
1100 map = alloc_bootmem_node(NODE_DATA(nid),
1101 sizeof(struct page) * PAGES_PER_SECTION);
1102 if (map)