Magellan Linux

Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1011-2.6.25-xen-auto-common.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (show annotations) (download)
Fri May 23 17:35:37 2008 UTC (15 years, 11 months ago) by niro
File size: 88655 byte(s)
-using opensuse xen patchset, updated kernel configs

1 Subject: xen3 common
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 517:d71965a78c20)
3 Patch-mainline: obsolete
4 Acked-by: jbeulich@novell.com
5
6 List of files that don't require modification anymore (and hence
7 removed from this patch), for reference and in case upstream wants to
8 take the forward porting patches:
9 2.6.25/mm/highmem.c
10
11 ---
12 drivers/Makefile | 1
13 drivers/acpi/hardware/hwsleep.c | 15
14 drivers/acpi/sleep/main.c | 11
15 drivers/char/agp/intel-agp.c | 10
16 drivers/char/mem.c | 6
17 drivers/char/tpm/Makefile | 2
18 drivers/char/tpm/tpm.h | 15
19 drivers/char/tpm/tpm_vtpm.c | 542 +++++++++++++++++++++++++
20 drivers/char/tpm/tpm_vtpm.h | 55 ++
21 drivers/char/tpm/tpm_xen.c | 722 ++++++++++++++++++++++++++++++++++
22 drivers/ide/ide-lib.c | 8
23 drivers/oprofile/buffer_sync.c | 87 +++-
24 drivers/oprofile/cpu_buffer.c | 51 +-
25 drivers/oprofile/cpu_buffer.h | 9
26 drivers/oprofile/event_buffer.h | 3
27 drivers/oprofile/oprof.c | 30 +
28 drivers/oprofile/oprof.h | 3
29 drivers/oprofile/oprofile_files.c | 201 +++++++++
30 drivers/pci/bus.c | 7
31 drivers/pci/quirks.c | 34 +
32 fs/aio.c | 120 +++++
33 fs/compat_ioctl.c | 19
34 fs/splice.c | 3
35 include/asm-generic/pci.h | 2
36 include/asm-generic/pgtable.h | 4
37 include/linux/aio.h | 5
38 include/linux/interrupt.h | 6
39 include/linux/kexec.h | 17
40 include/linux/mm.h | 7
41 include/linux/oprofile.h | 12
42 include/linux/page-flags.h | 15
43 include/linux/sched.h | 5
44 include/linux/skbuff.h | 8
45 include/linux/vermagic.h | 8
46 kernel/irq/spurious.c | 2
47 kernel/kexec.c | 71 ++-
48 kernel/softlockup.c | 13
49 kernel/sysctl.c | 2
50 kernel/timer.c | 8
51 mm/memory.c | 38 +
52 mm/mprotect.c | 2
53 mm/page_alloc.c | 30 +
54 net/core/dev.c | 62 ++
55 net/core/skbuff.c | 4
56 net/ipv4/netfilter/nf_nat_proto_tcp.c | 3
57 net/ipv4/netfilter/nf_nat_proto_udp.c | 4
58 net/ipv4/xfrm4_output.c | 2
59 scripts/Makefile.build | 14
60 scripts/Makefile.lib | 6
61 49 files changed, 2226 insertions(+), 78 deletions(-)
62
63 --- a/drivers/Makefile
64 +++ b/drivers/Makefile
65 @@ -34,6 +34,7 @@
66 obj-$(CONFIG_NUBUS) += nubus/
67 obj-$(CONFIG_ATM) += atm/
68 obj-y += macintosh/
69 +obj-$(CONFIG_XEN) += xen/
70 obj-$(CONFIG_IDE) += ide/
71 obj-$(CONFIG_SCSI) += scsi/
72 obj-$(CONFIG_ATA) += ata/
73 --- a/drivers/acpi/hardware/hwsleep.c
74 +++ b/drivers/acpi/hardware/hwsleep.c
75 @@ -252,7 +252,11 @@
76 u32 PM1Bcontrol;
77 struct acpi_bit_register_info *sleep_type_reg_info;
78 struct acpi_bit_register_info *sleep_enable_reg_info;
79 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
80 u32 in_value;
81 +#else
82 + int err;
83 +#endif
84 struct acpi_object_list arg_list;
85 union acpi_object arg;
86 acpi_status status;
87 @@ -362,6 +366,7 @@
88
89 ACPI_FLUSH_CPU_CACHE();
90
91 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
92 status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
93 PM1Acontrol);
94 if (ACPI_FAILURE(status)) {
95 @@ -408,6 +413,16 @@
96 /* Spin until we wake */
97
98 } while (!in_value);
99 +#else
100 + /* PV ACPI just need check hypercall return value */
101 + err = acpi_notify_hypervisor_state(sleep_state,
102 + PM1Acontrol, PM1Bcontrol);
103 + if (err) {
104 + ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
105 + "Hypervisor failure [%d]\n", err));
106 + return_ACPI_STATUS(AE_ERROR);
107 + }
108 +#endif
109
110 return_ACPI_STATUS(AE_OK);
111 }
112 --- a/drivers/acpi/sleep/main.c
113 +++ b/drivers/acpi/sleep/main.c
114 @@ -31,6 +31,7 @@
115 static int acpi_sleep_prepare(u32 acpi_state)
116 {
117 #ifdef CONFIG_ACPI_SLEEP
118 +#ifndef CONFIG_ACPI_PV_SLEEP
119 /* do we have a wakeup address for S2 and S3? */
120 if (acpi_state == ACPI_STATE_S3) {
121 if (!acpi_wakeup_address) {
122 @@ -41,6 +42,7 @@
123 acpi_wakeup_address));
124
125 }
126 +#endif
127 ACPI_FLUSH_CPU_CACHE();
128 acpi_enable_wakeup_device_prep(acpi_state);
129 #endif
130 @@ -137,7 +139,14 @@
131 break;
132
133 case ACPI_STATE_S3:
134 +#ifdef CONFIG_ACPI_PV_SLEEP
135 + /* Hyperviosr will save and restore CPU context
136 + * and then we can skip low level housekeeping here.
137 + */
138 + acpi_enter_sleep_state(acpi_state);
139 +#else
140 do_suspend_lowlevel();
141 +#endif
142 break;
143 }
144
145 @@ -187,7 +196,7 @@
146
147 acpi_target_sleep_state = ACPI_STATE_S0;
148
149 -#ifdef CONFIG_X86
150 +#if defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
151 if (init_8259A_after_S1) {
152 printk("Broken toshiba laptop -> kicking interrupts\n");
153 init_8259A(0);
154 --- a/drivers/char/agp/intel-agp.c
155 +++ b/drivers/char/agp/intel-agp.c
156 @@ -230,6 +230,13 @@
157 if (page == NULL)
158 return NULL;
159
160 +#ifdef CONFIG_XEN
161 + if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
162 + __free_pages(page, 2);
163 + return NULL;
164 + }
165 +#endif
166 +
167 if (set_pages_uc(page, 4) < 0) {
168 set_pages_wb(page, 4);
169 __free_pages(page, 2);
170 @@ -249,6 +256,9 @@
171
172 page = virt_to_page(addr);
173 set_pages_wb(page, 4);
174 +#ifdef CONFIG_XEN
175 + xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
176 +#endif
177 put_page(page);
178 __free_pages(page, 2);
179 atomic_dec(&agp_bridge->current_memory_agp);
180 --- a/drivers/char/mem.c
181 +++ b/drivers/char/mem.c
182 @@ -108,6 +108,7 @@
183 }
184 #endif
185
186 +#ifndef ARCH_HAS_DEV_MEM
187 /*
188 * This funcion reads the *physical* memory. The f_pos points directly to the
189 * memory location.
190 @@ -230,6 +231,7 @@
191 *ppos += written;
192 return written;
193 }
194 +#endif
195
196 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
197 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
198 @@ -725,6 +727,7 @@
199 #define open_kmem open_mem
200 #define open_oldmem open_mem
201
202 +#ifndef ARCH_HAS_DEV_MEM
203 static const struct file_operations mem_fops = {
204 .llseek = memory_lseek,
205 .read = read_mem,
206 @@ -733,6 +736,9 @@
207 .open = open_mem,
208 .get_unmapped_area = get_unmapped_area_mem,
209 };
210 +#else
211 +extern const struct file_operations mem_fops;
212 +#endif
213
214 static const struct file_operations kmem_fops = {
215 .llseek = memory_lseek,
216 --- a/drivers/char/tpm/Makefile
217 +++ b/drivers/char/tpm/Makefile
218 @@ -9,3 +9,5 @@
219 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
220 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
221 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
222 +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
223 +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
224 --- a/drivers/char/tpm/tpm.h
225 +++ b/drivers/char/tpm/tpm.h
226 @@ -107,6 +107,9 @@
227 struct dentry **bios_dir;
228
229 struct list_head list;
230 +#ifdef CONFIG_XEN
231 + void *priv;
232 +#endif
233 void (*release) (struct device *);
234 };
235
236 @@ -124,6 +127,18 @@
237 outb(value & 0xFF, base+1);
238 }
239
240 +#ifdef CONFIG_XEN
241 +static inline void *chip_get_private(const struct tpm_chip *chip)
242 +{
243 + return chip->priv;
244 +}
245 +
246 +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
247 +{
248 + chip->priv = priv;
249 +}
250 +#endif
251 +
252 extern void tpm_get_timeouts(struct tpm_chip *);
253 extern void tpm_gen_interrupt(struct tpm_chip *);
254 extern void tpm_continue_selftest(struct tpm_chip *);
255 --- /dev/null
256 +++ b/drivers/char/tpm/tpm_vtpm.c
257 @@ -0,0 +1,542 @@
258 +/*
259 + * Copyright (C) 2006 IBM Corporation
260 + *
261 + * Authors:
262 + * Stefan Berger <stefanb@us.ibm.com>
263 + *
264 + * Generic device driver part for device drivers in a virtualized
265 + * environment.
266 + *
267 + * This program is free software; you can redistribute it and/or
268 + * modify it under the terms of the GNU General Public License as
269 + * published by the Free Software Foundation, version 2 of the
270 + * License.
271 + *
272 + */
273 +
274 +#include <asm/uaccess.h>
275 +#include <linux/list.h>
276 +#include <linux/device.h>
277 +#include <linux/interrupt.h>
278 +#include <linux/platform_device.h>
279 +#include "tpm.h"
280 +#include "tpm_vtpm.h"
281 +
282 +/* read status bits */
283 +enum {
284 + STATUS_BUSY = 0x01,
285 + STATUS_DATA_AVAIL = 0x02,
286 + STATUS_READY = 0x04
287 +};
288 +
289 +struct transmission {
290 + struct list_head next;
291 +
292 + unsigned char *request;
293 + size_t request_len;
294 + size_t request_buflen;
295 +
296 + unsigned char *response;
297 + size_t response_len;
298 + size_t response_buflen;
299 +
300 + unsigned int flags;
301 +};
302 +
303 +enum {
304 + TRANSMISSION_FLAG_WAS_QUEUED = 0x1
305 +};
306 +
307 +
308 +enum {
309 + DATAEX_FLAG_QUEUED_ONLY = 0x1
310 +};
311 +
312 +
313 +/* local variables */
314 +
315 +/* local function prototypes */
316 +static int _vtpm_send_queued(struct tpm_chip *chip);
317 +
318 +
319 +/* =============================================================
320 + * Some utility functions
321 + * =============================================================
322 + */
323 +static void vtpm_state_init(struct vtpm_state *vtpms)
324 +{
325 + vtpms->current_request = NULL;
326 + spin_lock_init(&vtpms->req_list_lock);
327 + init_waitqueue_head(&vtpms->req_wait_queue);
328 + INIT_LIST_HEAD(&vtpms->queued_requests);
329 +
330 + vtpms->current_response = NULL;
331 + spin_lock_init(&vtpms->resp_list_lock);
332 + init_waitqueue_head(&vtpms->resp_wait_queue);
333 +
334 + vtpms->disconnect_time = jiffies;
335 +}
336 +
337 +
338 +static inline struct transmission *transmission_alloc(void)
339 +{
340 + return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
341 +}
342 +
343 +static unsigned char *
344 +transmission_set_req_buffer(struct transmission *t,
345 + unsigned char *buffer, size_t len)
346 +{
347 + if (t->request_buflen < len) {
348 + kfree(t->request);
349 + t->request = kmalloc(len, GFP_KERNEL);
350 + if (!t->request) {
351 + t->request_buflen = 0;
352 + return NULL;
353 + }
354 + t->request_buflen = len;
355 + }
356 +
357 + memcpy(t->request, buffer, len);
358 + t->request_len = len;
359 +
360 + return t->request;
361 +}
362 +
363 +static unsigned char *
364 +transmission_set_res_buffer(struct transmission *t,
365 + const unsigned char *buffer, size_t len)
366 +{
367 + if (t->response_buflen < len) {
368 + kfree(t->response);
369 + t->response = kmalloc(len, GFP_ATOMIC);
370 + if (!t->response) {
371 + t->response_buflen = 0;
372 + return NULL;
373 + }
374 + t->response_buflen = len;
375 + }
376 +
377 + memcpy(t->response, buffer, len);
378 + t->response_len = len;
379 +
380 + return t->response;
381 +}
382 +
383 +static inline void transmission_free(struct transmission *t)
384 +{
385 + kfree(t->request);
386 + kfree(t->response);
387 + kfree(t);
388 +}
389 +
390 +/* =============================================================
391 + * Interface with the lower layer driver
392 + * =============================================================
393 + */
394 +/*
395 + * Lower layer uses this function to make a response available.
396 + */
397 +int vtpm_vd_recv(const struct tpm_chip *chip,
398 + const unsigned char *buffer, size_t count,
399 + void *ptr)
400 +{
401 + unsigned long flags;
402 + int ret_size = 0;
403 + struct transmission *t;
404 + struct vtpm_state *vtpms;
405 +
406 + vtpms = (struct vtpm_state *)chip_get_private(chip);
407 +
408 + /*
409 + * The list with requests must contain one request
410 + * only and the element there must be the one that
411 + * was passed to me from the front-end.
412 + */
413 + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
414 + if (vtpms->current_request != ptr) {
415 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
416 + return 0;
417 + }
418 +
419 + if ((t = vtpms->current_request)) {
420 + transmission_free(t);
421 + vtpms->current_request = NULL;
422 + }
423 +
424 + t = transmission_alloc();
425 + if (t) {
426 + if (!transmission_set_res_buffer(t, buffer, count)) {
427 + transmission_free(t);
428 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
429 + return -ENOMEM;
430 + }
431 + ret_size = count;
432 + vtpms->current_response = t;
433 + wake_up_interruptible(&vtpms->resp_wait_queue);
434 + }
435 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
436 +
437 + return ret_size;
438 +}
439 +
440 +
441 +/*
442 + * Lower layer indicates its status (connected/disconnected)
443 + */
444 +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
445 +{
446 + struct vtpm_state *vtpms;
447 +
448 + vtpms = (struct vtpm_state *)chip_get_private(chip);
449 +
450 + vtpms->vd_status = vd_status;
451 + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
452 + vtpms->disconnect_time = jiffies;
453 + }
454 +}
455 +
456 +/* =============================================================
457 + * Interface with the generic TPM driver
458 + * =============================================================
459 + */
460 +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
461 +{
462 + int rc = 0;
463 + unsigned long flags;
464 + struct vtpm_state *vtpms;
465 +
466 + vtpms = (struct vtpm_state *)chip_get_private(chip);
467 +
468 + /*
469 + * Check if the previous operation only queued the command
470 + * In this case there won't be a response, so I just
471 + * return from here and reset that flag. In any other
472 + * case I should receive a response from the back-end.
473 + */
474 + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
475 + if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
476 + vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
477 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
478 + /*
479 + * The first few commands (measurements) must be
480 + * queued since it might not be possible to talk to the
481 + * TPM, yet.
482 + * Return a response of up to 30 '0's.
483 + */
484 +
485 + count = min_t(size_t, count, 30);
486 + memset(buf, 0x0, count);
487 + return count;
488 + }
489 + /*
490 + * Check whether something is in the responselist and if
491 + * there's nothing in the list wait for something to appear.
492 + */
493 +
494 + if (!vtpms->current_response) {
495 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
496 + interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
497 + 1000);
498 + spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
499 + }
500 +
501 + if (vtpms->current_response) {
502 + struct transmission *t = vtpms->current_response;
503 + vtpms->current_response = NULL;
504 + rc = min(count, t->response_len);
505 + memcpy(buf, t->response, rc);
506 + transmission_free(t);
507 + }
508 +
509 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
510 + return rc;
511 +}
512 +
513 +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
514 +{
515 + int rc = 0;
516 + unsigned long flags;
517 + struct transmission *t = transmission_alloc();
518 + struct vtpm_state *vtpms;
519 +
520 + vtpms = (struct vtpm_state *)chip_get_private(chip);
521 +
522 + if (!t)
523 + return -ENOMEM;
524 + /*
525 + * If there's a current request, it must be the
526 + * previous request that has timed out.
527 + */
528 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
529 + if (vtpms->current_request != NULL) {
530 + printk("WARNING: Sending although there is a request outstanding.\n"
531 + " Previous request must have timed out.\n");
532 + transmission_free(vtpms->current_request);
533 + vtpms->current_request = NULL;
534 + }
535 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
536 +
537 + /*
538 + * Queue the packet if the driver below is not
539 + * ready, yet, or there is any packet already
540 + * in the queue.
541 + * If the driver below is ready, unqueue all
542 + * packets first before sending our current
543 + * packet.
544 + * For each unqueued packet, except for the
545 + * last (=current) packet, call the function
546 + * tpm_xen_recv to wait for the response to come
547 + * back.
548 + */
549 + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
550 + if (time_after(jiffies,
551 + vtpms->disconnect_time + HZ * 10)) {
552 + rc = -ENOENT;
553 + } else {
554 + goto queue_it;
555 + }
556 + } else {
557 + /*
558 + * Send all queued packets.
559 + */
560 + if (_vtpm_send_queued(chip) == 0) {
561 +
562 + vtpms->current_request = t;
563 +
564 + rc = vtpm_vd_send(vtpms->tpm_private,
565 + buf,
566 + count,
567 + t);
568 + /*
569 + * The generic TPM driver will call
570 + * the function to receive the response.
571 + */
572 + if (rc < 0) {
573 + vtpms->current_request = NULL;
574 + goto queue_it;
575 + }
576 + } else {
577 +queue_it:
578 + if (!transmission_set_req_buffer(t, buf, count)) {
579 + transmission_free(t);
580 + rc = -ENOMEM;
581 + goto exit;
582 + }
583 + /*
584 + * An error occurred. Don't event try
585 + * to send the current request. Just
586 + * queue it.
587 + */
588 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
589 + vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
590 + list_add_tail(&t->next, &vtpms->queued_requests);
591 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
592 + }
593 + }
594 +
595 +exit:
596 + return rc;
597 +}
598 +
599 +
600 +/*
601 + * Send all queued requests.
602 + */
603 +static int _vtpm_send_queued(struct tpm_chip *chip)
604 +{
605 + int rc;
606 + int error = 0;
607 + long flags;
608 + unsigned char buffer[1];
609 + struct vtpm_state *vtpms;
610 + vtpms = (struct vtpm_state *)chip_get_private(chip);
611 +
612 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
613 +
614 + while (!list_empty(&vtpms->queued_requests)) {
615 + /*
616 + * Need to dequeue them.
617 + * Read the result into a dummy buffer.
618 + */
619 + struct transmission *qt = (struct transmission *)
620 + vtpms->queued_requests.next;
621 + list_del(&qt->next);
622 + vtpms->current_request = qt;
623 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
624 +
625 + rc = vtpm_vd_send(vtpms->tpm_private,
626 + qt->request,
627 + qt->request_len,
628 + qt);
629 +
630 + if (rc < 0) {
631 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
632 + if ((qt = vtpms->current_request) != NULL) {
633 + /*
634 + * requeue it at the beginning
635 + * of the list
636 + */
637 + list_add(&qt->next,
638 + &vtpms->queued_requests);
639 + }
640 + vtpms->current_request = NULL;
641 + error = 1;
642 + break;
643 + }
644 + /*
645 + * After this point qt is not valid anymore!
646 + * It is freed when the front-end is delivering
647 + * the data by calling tpm_recv
648 + */
649 + /*
650 + * Receive response into provided dummy buffer
651 + */
652 + rc = vtpm_recv(chip, buffer, sizeof(buffer));
653 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
654 + }
655 +
656 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
657 +
658 + return error;
659 +}
660 +
661 +static void vtpm_cancel(struct tpm_chip *chip)
662 +{
663 + unsigned long flags;
664 + struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
665 +
666 + spin_lock_irqsave(&vtpms->resp_list_lock,flags);
667 +
668 + if (!vtpms->current_response && vtpms->current_request) {
669 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
670 + interruptible_sleep_on(&vtpms->resp_wait_queue);
671 + spin_lock_irqsave(&vtpms->resp_list_lock,flags);
672 + }
673 +
674 + if (vtpms->current_response) {
675 + struct transmission *t = vtpms->current_response;
676 + vtpms->current_response = NULL;
677 + transmission_free(t);
678 + }
679 +
680 + spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
681 +}
682 +
683 +static u8 vtpm_status(struct tpm_chip *chip)
684 +{
685 + u8 rc = 0;
686 + unsigned long flags;
687 + struct vtpm_state *vtpms;
688 +
689 + vtpms = (struct vtpm_state *)chip_get_private(chip);
690 +
691 + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
692 + /*
693 + * Data are available if:
694 + * - there's a current response
695 + * - the last packet was queued only (this is fake, but necessary to
696 + * get the generic TPM layer to call the receive function.)
697 + */
698 + if (vtpms->current_response ||
699 + 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
700 + rc = STATUS_DATA_AVAIL;
701 + } else if (!vtpms->current_response && !vtpms->current_request) {
702 + rc = STATUS_READY;
703 + }
704 +
705 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
706 + return rc;
707 +}
708 +
709 +static struct file_operations vtpm_ops = {
710 + .owner = THIS_MODULE,
711 + .llseek = no_llseek,
712 + .open = tpm_open,
713 + .read = tpm_read,
714 + .write = tpm_write,
715 + .release = tpm_release,
716 +};
717 +
718 +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
719 +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
720 +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
721 +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
722 +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
723 +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
724 + NULL);
725 +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
726 +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
727 +
728 +static struct attribute *vtpm_attrs[] = {
729 + &dev_attr_pubek.attr,
730 + &dev_attr_pcrs.attr,
731 + &dev_attr_enabled.attr,
732 + &dev_attr_active.attr,
733 + &dev_attr_owned.attr,
734 + &dev_attr_temp_deactivated.attr,
735 + &dev_attr_caps.attr,
736 + &dev_attr_cancel.attr,
737 + NULL,
738 +};
739 +
740 +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
741 +
742 +#define TPM_LONG_TIMEOUT (10 * 60 * HZ)
743 +
744 +static struct tpm_vendor_specific tpm_vtpm = {
745 + .recv = vtpm_recv,
746 + .send = vtpm_send,
747 + .cancel = vtpm_cancel,
748 + .status = vtpm_status,
749 + .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
750 + .req_complete_val = STATUS_DATA_AVAIL,
751 + .req_canceled = STATUS_READY,
752 + .attr_group = &vtpm_attr_grp,
753 + .miscdev = {
754 + .fops = &vtpm_ops,
755 + },
756 + .duration = {
757 + TPM_LONG_TIMEOUT,
758 + TPM_LONG_TIMEOUT,
759 + TPM_LONG_TIMEOUT,
760 + },
761 +};
762 +
763 +struct tpm_chip *init_vtpm(struct device *dev,
764 + struct tpm_private *tp)
765 +{
766 + long rc;
767 + struct tpm_chip *chip;
768 + struct vtpm_state *vtpms;
769 +
770 + vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
771 + if (!vtpms)
772 + return ERR_PTR(-ENOMEM);
773 +
774 + vtpm_state_init(vtpms);
775 + vtpms->tpm_private = tp;
776 +
777 + chip = tpm_register_hardware(dev, &tpm_vtpm);
778 + if (!chip) {
779 + rc = -ENODEV;
780 + goto err_free_mem;
781 + }
782 +
783 + chip_set_private(chip, vtpms);
784 +
785 + return chip;
786 +
787 +err_free_mem:
788 + kfree(vtpms);
789 +
790 + return ERR_PTR(rc);
791 +}
792 +
793 +void cleanup_vtpm(struct device *dev)
794 +{
795 + struct tpm_chip *chip = dev_get_drvdata(dev);
796 + struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
797 + tpm_remove_hardware(dev);
798 + kfree(vtpms);
799 +}
800 --- /dev/null
801 +++ b/drivers/char/tpm/tpm_vtpm.h
802 @@ -0,0 +1,55 @@
803 +#ifndef TPM_VTPM_H
804 +#define TPM_VTPM_H
805 +
806 +struct tpm_chip;
807 +struct tpm_private;
808 +
809 +struct vtpm_state {
810 + struct transmission *current_request;
811 + spinlock_t req_list_lock;
812 + wait_queue_head_t req_wait_queue;
813 +
814 + struct list_head queued_requests;
815 +
816 + struct transmission *current_response;
817 + spinlock_t resp_list_lock;
818 + wait_queue_head_t resp_wait_queue; // processes waiting for responses
819 +
820 + u8 vd_status;
821 + u8 flags;
822 +
823 + unsigned long disconnect_time;
824 +
825 + /*
826 + * The following is a private structure of the underlying
827 + * driver. It is passed as parameter in the send function.
828 + */
829 + struct tpm_private *tpm_private;
830 +};
831 +
832 +
833 +enum vdev_status {
834 + TPM_VD_STATUS_DISCONNECTED = 0x0,
835 + TPM_VD_STATUS_CONNECTED = 0x1
836 +};
837 +
838 +/* this function is called from tpm_vtpm.c */
839 +int vtpm_vd_send(struct tpm_private * tp,
840 + const u8 * buf, size_t count, void *ptr);
841 +
842 +/* these functions are offered by tpm_vtpm.c */
843 +struct tpm_chip *init_vtpm(struct device *,
844 + struct tpm_private *);
845 +void cleanup_vtpm(struct device *);
846 +int vtpm_vd_recv(const struct tpm_chip* chip,
847 + const unsigned char *buffer, size_t count, void *ptr);
848 +void vtpm_vd_status(const struct tpm_chip *, u8 status);
849 +
850 +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
851 +{
852 + struct tpm_chip *chip = dev_get_drvdata(dev);
853 + struct vtpm_state *vtpms = chip_get_private(chip);
854 + return vtpms->tpm_private;
855 +}
856 +
857 +#endif
858 --- /dev/null
859 +++ b/drivers/char/tpm/tpm_xen.c
860 @@ -0,0 +1,722 @@
861 +/*
862 + * Copyright (c) 2005, IBM Corporation
863 + *
864 + * Author: Stefan Berger, stefanb@us.ibm.com
865 + * Grant table support: Mahadevan Gomathisankaran
866 + *
867 + * This code has been derived from drivers/xen/netfront/netfront.c
868 + *
869 + * Copyright (c) 2002-2004, K A Fraser
870 + *
871 + * This program is free software; you can redistribute it and/or
872 + * modify it under the terms of the GNU General Public License version 2
873 + * as published by the Free Software Foundation; or, when distributed
874 + * separately from the Linux kernel or incorporated into other
875 + * software packages, subject to the following license:
876 + *
877 + * Permission is hereby granted, free of charge, to any person obtaining a copy
878 + * of this source file (the "Software"), to deal in the Software without
879 + * restriction, including without limitation the rights to use, copy, modify,
880 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
881 + * and to permit persons to whom the Software is furnished to do so, subject to
882 + * the following conditions:
883 + *
884 + * The above copyright notice and this permission notice shall be included in
885 + * all copies or substantial portions of the Software.
886 + *
887 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
888 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
889 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
890 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
891 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
892 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
893 + * IN THE SOFTWARE.
894 + */
895 +
896 +#include <linux/errno.h>
897 +#include <linux/err.h>
898 +#include <linux/interrupt.h>
899 +#include <linux/mutex.h>
900 +#include <asm/uaccess.h>
901 +#include <xen/evtchn.h>
902 +#include <xen/interface/grant_table.h>
903 +#include <xen/interface/io/tpmif.h>
904 +#include <xen/gnttab.h>
905 +#include <xen/xenbus.h>
906 +#include "tpm.h"
907 +#include "tpm_vtpm.h"
908 +
909 +#undef DEBUG
910 +
911 +/* local structures */
912 +struct tpm_private {
913 + struct tpm_chip *chip;
914 +
915 + tpmif_tx_interface_t *tx;
916 + atomic_t refcnt;
917 + unsigned int irq;
918 + u8 is_connected;
919 + u8 is_suspended;
920 +
921 + spinlock_t tx_lock;
922 +
923 + struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
924 +
925 + atomic_t tx_busy;
926 + void *tx_remember;
927 +
928 + domid_t backend_id;
929 + wait_queue_head_t wait_q;
930 +
931 + struct xenbus_device *dev;
932 + int ring_ref;
933 +};
934 +
935 +struct tx_buffer {
936 + unsigned int size; // available space in data
937 + unsigned int len; // used space in data
938 + unsigned char *data; // pointer to a page
939 +};
940 +
941 +
942 +/* locally visible variables */
943 +static grant_ref_t gref_head;
944 +static struct tpm_private *my_priv;
945 +
946 +/* local function prototypes */
947 +static irqreturn_t tpmif_int(int irq,
948 + void *tpm_priv,
949 + struct pt_regs *ptregs);
950 +static void tpmif_rx_action(unsigned long unused);
951 +static int tpmif_connect(struct xenbus_device *dev,
952 + struct tpm_private *tp,
953 + domid_t domid);
954 +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
955 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
956 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
957 +static void tpmif_set_connected_state(struct tpm_private *tp,
958 + u8 newstate);
959 +static int tpm_xmit(struct tpm_private *tp,
960 + const u8 * buf, size_t count, int userbuffer,
961 + void *remember);
962 +static void destroy_tpmring(struct tpm_private *tp);
963 +void __exit tpmif_exit(void);
964 +
965 +#define DPRINTK(fmt, args...) \
966 + pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
967 +#define IPRINTK(fmt, args...) \
968 + printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
969 +#define WPRINTK(fmt, args...) \
970 + printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
971 +
972 +#define GRANT_INVALID_REF 0
973 +
974 +
975 +static inline int
976 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
977 + int isuserbuffer)
978 +{
979 + int copied = len;
980 +
981 + if (len > txb->size)
982 + copied = txb->size;
983 + if (isuserbuffer) {
984 + if (copy_from_user(txb->data, src, copied))
985 + return -EFAULT;
986 + } else {
987 + memcpy(txb->data, src, copied);
988 + }
989 + txb->len = len;
990 + return copied;
991 +}
992 +
993 +static inline struct tx_buffer *tx_buffer_alloc(void)
994 +{
995 + struct tx_buffer *txb;
996 +
997 + txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
998 + if (!txb)
999 + return NULL;
1000 +
1001 + txb->len = 0;
1002 + txb->size = PAGE_SIZE;
1003 + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1004 + if (txb->data == NULL) {
1005 + kfree(txb);
1006 + txb = NULL;
1007 + }
1008 +
1009 + return txb;
1010 +}
1011 +
1012 +
1013 +static inline void tx_buffer_free(struct tx_buffer *txb)
1014 +{
1015 + if (txb) {
1016 + free_page((long)txb->data);
1017 + kfree(txb);
1018 + }
1019 +}
1020 +
1021 +/**************************************************************
1022 + Utility function for the tpm_private structure
1023 +**************************************************************/
1024 +static void tpm_private_init(struct tpm_private *tp)
1025 +{
1026 + spin_lock_init(&tp->tx_lock);
1027 + init_waitqueue_head(&tp->wait_q);
1028 + atomic_set(&tp->refcnt, 1);
1029 +}
1030 +
1031 +static void tpm_private_put(void)
1032 +{
1033 + if (!atomic_dec_and_test(&my_priv->refcnt))
1034 + return;
1035 +
1036 + tpmif_free_tx_buffers(my_priv);
1037 + kfree(my_priv);
1038 + my_priv = NULL;
1039 +}
1040 +
1041 +static struct tpm_private *tpm_private_get(void)
1042 +{
1043 + int err;
1044 +
1045 + if (my_priv) {
1046 + atomic_inc(&my_priv->refcnt);
1047 + return my_priv;
1048 + }
1049 +
1050 + my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1051 + if (!my_priv)
1052 + return NULL;
1053 +
1054 + tpm_private_init(my_priv);
1055 + err = tpmif_allocate_tx_buffers(my_priv);
1056 + if (err < 0)
1057 + tpm_private_put();
1058 +
1059 + return my_priv;
1060 +}
1061 +
1062 +/**************************************************************
1063 +
1064 + The interface to let the tpm plugin register its callback
1065 + function and send data to another partition using this module
1066 +
1067 +**************************************************************/
1068 +
1069 +static DEFINE_MUTEX(suspend_lock);
1070 +/*
1071 + * Send data via this module by calling this function
1072 + */
1073 +int vtpm_vd_send(struct tpm_private *tp,
1074 + const u8 * buf, size_t count, void *ptr)
1075 +{
1076 + int sent;
1077 +
1078 + mutex_lock(&suspend_lock);
1079 + sent = tpm_xmit(tp, buf, count, 0, ptr);
1080 + mutex_unlock(&suspend_lock);
1081 +
1082 + return sent;
1083 +}
1084 +
1085 +/**************************************************************
1086 + XENBUS support code
1087 +**************************************************************/
1088 +
1089 +static int setup_tpmring(struct xenbus_device *dev,
1090 + struct tpm_private *tp)
1091 +{
1092 + tpmif_tx_interface_t *sring;
1093 + int err;
1094 +
1095 + tp->ring_ref = GRANT_INVALID_REF;
1096 +
1097 + sring = (void *)__get_free_page(GFP_KERNEL);
1098 + if (!sring) {
1099 + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1100 + return -ENOMEM;
1101 + }
1102 + tp->tx = sring;
1103 +
1104 + err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1105 + if (err < 0) {
1106 + free_page((unsigned long)sring);
1107 + tp->tx = NULL;
1108 + xenbus_dev_fatal(dev, err, "allocating grant reference");
1109 + goto fail;
1110 + }
1111 + tp->ring_ref = err;
1112 +
1113 + err = tpmif_connect(dev, tp, dev->otherend_id);
1114 + if (err)
1115 + goto fail;
1116 +
1117 + return 0;
1118 +fail:
1119 + destroy_tpmring(tp);
1120 + return err;
1121 +}
1122 +
1123 +
1124 +static void destroy_tpmring(struct tpm_private *tp)
1125 +{
1126 + tpmif_set_connected_state(tp, 0);
1127 +
1128 + if (tp->ring_ref != GRANT_INVALID_REF) {
1129 + gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1130 + tp->ring_ref = GRANT_INVALID_REF;
1131 + tp->tx = NULL;
1132 + }
1133 +
1134 + if (tp->irq)
1135 + unbind_from_irqhandler(tp->irq, tp);
1136 +
1137 + tp->irq = 0;
1138 +}
1139 +
1140 +
1141 +static int talk_to_backend(struct xenbus_device *dev,
1142 + struct tpm_private *tp)
1143 +{
1144 + const char *message = NULL;
1145 + int err;
1146 + struct xenbus_transaction xbt;
1147 +
1148 + err = setup_tpmring(dev, tp);
1149 + if (err) {
1150 + xenbus_dev_fatal(dev, err, "setting up ring");
1151 + goto out;
1152 + }
1153 +
1154 +again:
1155 + err = xenbus_transaction_start(&xbt);
1156 + if (err) {
1157 + xenbus_dev_fatal(dev, err, "starting transaction");
1158 + goto destroy_tpmring;
1159 + }
1160 +
1161 + err = xenbus_printf(xbt, dev->nodename,
1162 + "ring-ref","%u", tp->ring_ref);
1163 + if (err) {
1164 + message = "writing ring-ref";
1165 + goto abort_transaction;
1166 + }
1167 +
1168 + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1169 + irq_to_evtchn_port(tp->irq));
1170 + if (err) {
1171 + message = "writing event-channel";
1172 + goto abort_transaction;
1173 + }
1174 +
1175 + err = xenbus_transaction_end(xbt, 0);
1176 + if (err == -EAGAIN)
1177 + goto again;
1178 + if (err) {
1179 + xenbus_dev_fatal(dev, err, "completing transaction");
1180 + goto destroy_tpmring;
1181 + }
1182 +
1183 + xenbus_switch_state(dev, XenbusStateConnected);
1184 +
1185 + return 0;
1186 +
1187 +abort_transaction:
1188 + xenbus_transaction_end(xbt, 1);
1189 + if (message)
1190 + xenbus_dev_error(dev, err, "%s", message);
1191 +destroy_tpmring:
1192 + destroy_tpmring(tp);
1193 +out:
1194 + return err;
1195 +}
1196 +
1197 +/**
1198 + * Callback received when the backend's state changes.
1199 + */
1200 +static void backend_changed(struct xenbus_device *dev,
1201 + enum xenbus_state backend_state)
1202 +{
1203 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1204 + DPRINTK("\n");
1205 +
1206 + switch (backend_state) {
1207 + case XenbusStateInitialising:
1208 + case XenbusStateInitWait:
1209 + case XenbusStateInitialised:
1210 + case XenbusStateReconfiguring:
1211 + case XenbusStateReconfigured:
1212 + case XenbusStateUnknown:
1213 + break;
1214 +
1215 + case XenbusStateConnected:
1216 + tpmif_set_connected_state(tp, 1);
1217 + break;
1218 +
1219 + case XenbusStateClosing:
1220 + tpmif_set_connected_state(tp, 0);
1221 + xenbus_frontend_closed(dev);
1222 + break;
1223 +
1224 + case XenbusStateClosed:
1225 + tpmif_set_connected_state(tp, 0);
1226 + if (tp->is_suspended == 0)
1227 + device_unregister(&dev->dev);
1228 + xenbus_frontend_closed(dev);
1229 + break;
1230 + }
1231 +}
1232 +
1233 +static int tpmfront_probe(struct xenbus_device *dev,
1234 + const struct xenbus_device_id *id)
1235 +{
1236 + int err;
1237 + int handle;
1238 + struct tpm_private *tp = tpm_private_get();
1239 +
1240 + if (!tp)
1241 + return -ENOMEM;
1242 +
1243 + tp->chip = init_vtpm(&dev->dev, tp);
1244 + if (IS_ERR(tp->chip))
1245 + return PTR_ERR(tp->chip);
1246 +
1247 + err = xenbus_scanf(XBT_NIL, dev->nodename,
1248 + "handle", "%i", &handle);
1249 + if (XENBUS_EXIST_ERR(err))
1250 + return err;
1251 +
1252 + if (err < 0) {
1253 + xenbus_dev_fatal(dev,err,"reading virtual-device");
1254 + return err;
1255 + }
1256 +
1257 + tp->dev = dev;
1258 +
1259 + err = talk_to_backend(dev, tp);
1260 + if (err) {
1261 + tpm_private_put();
1262 + return err;
1263 + }
1264 +
1265 + return 0;
1266 +}
1267 +
1268 +
1269 +static int tpmfront_remove(struct xenbus_device *dev)
1270 +{
1271 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1272 + destroy_tpmring(tp);
1273 + cleanup_vtpm(&dev->dev);
1274 + return 0;
1275 +}
1276 +
1277 +static int tpmfront_suspend(struct xenbus_device *dev)
1278 +{
1279 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1280 + u32 ctr;
1281 +
1282 + /* Take the lock, preventing any application from sending. */
1283 + mutex_lock(&suspend_lock);
1284 + tp->is_suspended = 1;
1285 +
1286 + for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1287 + if ((ctr % 10) == 0)
1288 + printk("TPM-FE [INFO]: Waiting for outstanding "
1289 + "request.\n");
1290 + /* Wait for a request to be responded to. */
1291 + interruptible_sleep_on_timeout(&tp->wait_q, 100);
1292 + }
1293 +
1294 + return 0;
1295 +}
1296 +
1297 +static int tpmfront_suspend_finish(struct tpm_private *tp)
1298 +{
1299 + tp->is_suspended = 0;
1300 + /* Allow applications to send again. */
1301 + mutex_unlock(&suspend_lock);
1302 + return 0;
1303 +}
1304 +
1305 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1306 +{
1307 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1308 + return tpmfront_suspend_finish(tp);
1309 +}
1310 +
1311 +static int tpmfront_resume(struct xenbus_device *dev)
1312 +{
1313 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1314 + destroy_tpmring(tp);
1315 + return talk_to_backend(dev, tp);
1316 +}
1317 +
1318 +static int tpmif_connect(struct xenbus_device *dev,
1319 + struct tpm_private *tp,
1320 + domid_t domid)
1321 +{
1322 + int err;
1323 +
1324 + tp->backend_id = domid;
1325 +
1326 + err = bind_listening_port_to_irqhandler(
1327 + domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1328 + if (err <= 0) {
1329 + WPRINTK("bind_listening_port_to_irqhandler failed "
1330 + "(err=%d)\n", err);
1331 + return err;
1332 + }
1333 + tp->irq = err;
1334 +
1335 + return 0;
1336 +}
1337 +
1338 +static struct xenbus_device_id tpmfront_ids[] = {
1339 + { "vtpm" },
1340 + { "" }
1341 +};
1342 +
1343 +static struct xenbus_driver tpmfront = {
1344 + .name = "vtpm",
1345 + .owner = THIS_MODULE,
1346 + .ids = tpmfront_ids,
1347 + .probe = tpmfront_probe,
1348 + .remove = tpmfront_remove,
1349 + .resume = tpmfront_resume,
1350 + .otherend_changed = backend_changed,
1351 + .suspend = tpmfront_suspend,
1352 + .suspend_cancel = tpmfront_suspend_cancel,
1353 +};
1354 +
1355 +static void __init init_tpm_xenbus(void)
1356 +{
1357 + xenbus_register_frontend(&tpmfront);
1358 +}
1359 +
1360 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1361 +{
1362 + unsigned int i;
1363 +
1364 + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1365 + tp->tx_buffers[i] = tx_buffer_alloc();
1366 + if (!tp->tx_buffers[i]) {
1367 + tpmif_free_tx_buffers(tp);
1368 + return -ENOMEM;
1369 + }
1370 + }
1371 + return 0;
1372 +}
1373 +
1374 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1375 +{
1376 + unsigned int i;
1377 +
1378 + for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1379 + tx_buffer_free(tp->tx_buffers[i]);
1380 +}
1381 +
1382 +static void tpmif_rx_action(unsigned long priv)
1383 +{
1384 + struct tpm_private *tp = (struct tpm_private *)priv;
1385 + int i = 0;
1386 + unsigned int received;
1387 + unsigned int offset = 0;
1388 + u8 *buffer;
1389 + tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1390 +
1391 + atomic_set(&tp->tx_busy, 0);
1392 + wake_up_interruptible(&tp->wait_q);
1393 +
1394 + received = tx->size;
1395 +
1396 + buffer = kmalloc(received, GFP_ATOMIC);
1397 + if (!buffer)
1398 + return;
1399 +
1400 + for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1401 + struct tx_buffer *txb = tp->tx_buffers[i];
1402 + tpmif_tx_request_t *tx;
1403 + unsigned int tocopy;
1404 +
1405 + tx = &tp->tx->ring[i].req;
1406 + tocopy = tx->size;
1407 + if (tocopy > PAGE_SIZE)
1408 + tocopy = PAGE_SIZE;
1409 +
1410 + memcpy(&buffer[offset], txb->data, tocopy);
1411 +
1412 + gnttab_release_grant_reference(&gref_head, tx->ref);
1413 +
1414 + offset += tocopy;
1415 + }
1416 +
1417 + vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
1418 + kfree(buffer);
1419 +}
1420 +
1421 +
1422 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
1423 +{
1424 + struct tpm_private *tp = tpm_priv;
1425 + unsigned long flags;
1426 +
1427 + spin_lock_irqsave(&tp->tx_lock, flags);
1428 + tpmif_rx_tasklet.data = (unsigned long)tp;
1429 + tasklet_schedule(&tpmif_rx_tasklet);
1430 + spin_unlock_irqrestore(&tp->tx_lock, flags);
1431 +
1432 + return IRQ_HANDLED;
1433 +}
1434 +
1435 +
1436 +static int tpm_xmit(struct tpm_private *tp,
1437 + const u8 * buf, size_t count, int isuserbuffer,
1438 + void *remember)
1439 +{
1440 + tpmif_tx_request_t *tx;
1441 + TPMIF_RING_IDX i;
1442 + unsigned int offset = 0;
1443 +
1444 + spin_lock_irq(&tp->tx_lock);
1445 +
1446 + if (unlikely(atomic_read(&tp->tx_busy))) {
1447 + printk("tpm_xmit: There's an outstanding request/response "
1448 + "on the way!\n");
1449 + spin_unlock_irq(&tp->tx_lock);
1450 + return -EBUSY;
1451 + }
1452 +
1453 + if (tp->is_connected != 1) {
1454 + spin_unlock_irq(&tp->tx_lock);
1455 + return -EIO;
1456 + }
1457 +
1458 + for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
1459 + struct tx_buffer *txb = tp->tx_buffers[i];
1460 + int copied;
1461 +
1462 + if (!txb) {
1463 + DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
1464 + "Not transmitting anything!\n", i);
1465 + spin_unlock_irq(&tp->tx_lock);
1466 + return -EFAULT;
1467 + }
1468 +
1469 + copied = tx_buffer_copy(txb, &buf[offset], count,
1470 + isuserbuffer);
1471 + if (copied < 0) {
1472 + /* An error occurred */
1473 + spin_unlock_irq(&tp->tx_lock);
1474 + return copied;
1475 + }
1476 + count -= copied;
1477 + offset += copied;
1478 +
1479 + tx = &tp->tx->ring[i].req;
1480 + tx->addr = virt_to_machine(txb->data);
1481 + tx->size = txb->len;
1482 + tx->unused = 0;
1483 +
1484 + DPRINTK("First 4 characters sent by TPM-FE are "
1485 + "0x%02x 0x%02x 0x%02x 0x%02x\n",
1486 + txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
1487 +
1488 + /* Get the granttable reference for this page. */
1489 + tx->ref = gnttab_claim_grant_reference(&gref_head);
1490 + if (tx->ref == -ENOSPC) {
1491 + spin_unlock_irq(&tp->tx_lock);
1492 + DPRINTK("Grant table claim reference failed in "
1493 + "func:%s line:%d file:%s\n",
1494 + __FUNCTION__, __LINE__, __FILE__);
1495 + return -ENOSPC;
1496 + }
1497 + gnttab_grant_foreign_access_ref(tx->ref,
1498 + tp->backend_id,
1499 + virt_to_mfn(txb->data),
1500 + 0 /*RW*/);
1501 + wmb();
1502 + }
1503 +
1504 + atomic_set(&tp->tx_busy, 1);
1505 + tp->tx_remember = remember;
1506 +
1507 + mb();
1508 +
1509 + notify_remote_via_irq(tp->irq);
1510 +
1511 + spin_unlock_irq(&tp->tx_lock);
1512 + return offset;
1513 +}
1514 +
1515 +
1516 +static void tpmif_notify_upperlayer(struct tpm_private *tp)
1517 +{
1518 + /* Notify upper layer about the state of the connection to the BE. */
1519 + vtpm_vd_status(tp->chip, (tp->is_connected
1520 + ? TPM_VD_STATUS_CONNECTED
1521 + : TPM_VD_STATUS_DISCONNECTED));
1522 +}
1523 +
1524 +
1525 +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
1526 +{
1527 + /*
1528 + * Don't notify upper layer if we are in suspend mode and
1529 + * should disconnect - assumption is that we will resume
1530 + * The mutex keeps apps from sending.
1531 + */
1532 + if (is_connected == 0 && tp->is_suspended == 1)
1533 + return;
1534 +
1535 + /*
1536 + * Unlock the mutex if we are connected again
1537 + * after being suspended - now resuming.
1538 + * This also removes the suspend state.
1539 + */
1540 + if (is_connected == 1 && tp->is_suspended == 1)
1541 + tpmfront_suspend_finish(tp);
1542 +
1543 + if (is_connected != tp->is_connected) {
1544 + tp->is_connected = is_connected;
1545 + tpmif_notify_upperlayer(tp);
1546 + }
1547 +}
1548 +
1549 +
1550 +
1551 +/* =================================================================
1552 + * Initialization function.
1553 + * =================================================================
1554 + */
1555 +
1556 +
1557 +static int __init tpmif_init(void)
1558 +{
1559 + struct tpm_private *tp;
1560 +
1561 + if (is_initial_xendomain())
1562 + return -EPERM;
1563 +
1564 + tp = tpm_private_get();
1565 + if (!tp)
1566 + return -ENOMEM;
1567 +
1568 + IPRINTK("Initialising the vTPM driver.\n");
1569 + if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
1570 + &gref_head) < 0) {
1571 + tpm_private_put();
1572 + return -EFAULT;
1573 + }
1574 +
1575 + init_tpm_xenbus();
1576 + return 0;
1577 +}
1578 +
1579 +
1580 +module_init(tpmif_init);
1581 +
1582 +MODULE_LICENSE("Dual BSD/GPL");
1583 --- a/drivers/ide/ide-lib.c
1584 +++ b/drivers/ide/ide-lib.c
1585 @@ -336,12 +336,12 @@
1586 {
1587 u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
1588
1589 - if (!PCI_DMA_BUS_IS_PHYS) {
1590 - addr = BLK_BOUNCE_ANY;
1591 - } else if (on && drive->media == ide_disk) {
1592 + if (on && drive->media == ide_disk) {
1593 struct device *dev = drive->hwif->dev;
1594
1595 - if (dev && dev->dma_mask)
1596 + if (!PCI_DMA_BUS_IS_PHYS)
1597 + addr = BLK_BOUNCE_ANY;
1598 + else if (dev && dev->dma_mask)
1599 addr = *dev->dma_mask;
1600 }
1601
1602 --- a/drivers/oprofile/buffer_sync.c
1603 +++ b/drivers/oprofile/buffer_sync.c
1604 @@ -6,6 +6,10 @@
1605 *
1606 * @author John Levon <levon@movementarian.org>
1607 *
1608 + * Modified by Aravind Menon for Xen
1609 + * These modifications are:
1610 + * Copyright (C) 2005 Hewlett-Packard Co.
1611 + *
1612 * This is the core of the buffer management. Each
1613 * CPU buffer is processed and entered into the
1614 * global event buffer. Such processing is necessary
1615 @@ -40,6 +44,7 @@
1616 static DEFINE_SPINLOCK(task_mortuary);
1617 static void process_task_mortuary(void);
1618
1619 +static int cpu_current_domain[NR_CPUS];
1620
1621 /* Take ownership of the task struct and place it on the
1622 * list for processing. Only after two full buffer syncs
1623 @@ -148,6 +153,11 @@
1624 int sync_start(void)
1625 {
1626 int err;
1627 + int i;
1628 +
1629 + for (i = 0; i < NR_CPUS; i++) {
1630 + cpu_current_domain[i] = COORDINATOR_DOMAIN;
1631 + }
1632
1633 start_cpu_work();
1634
1635 @@ -274,15 +284,31 @@
1636 last_cookie = INVALID_COOKIE;
1637 }
1638
1639 -static void add_kernel_ctx_switch(unsigned int in_kernel)
1640 +static void add_cpu_mode_switch(unsigned int cpu_mode)
1641 {
1642 add_event_entry(ESCAPE_CODE);
1643 - if (in_kernel)
1644 - add_event_entry(KERNEL_ENTER_SWITCH_CODE);
1645 - else
1646 - add_event_entry(KERNEL_EXIT_SWITCH_CODE);
1647 + switch (cpu_mode) {
1648 + case CPU_MODE_USER:
1649 + add_event_entry(USER_ENTER_SWITCH_CODE);
1650 + break;
1651 + case CPU_MODE_KERNEL:
1652 + add_event_entry(KERNEL_ENTER_SWITCH_CODE);
1653 + break;
1654 + case CPU_MODE_XEN:
1655 + add_event_entry(XEN_ENTER_SWITCH_CODE);
1656 + break;
1657 + default:
1658 + break;
1659 + }
1660 }
1661 -
1662 +
1663 +static void add_domain_switch(unsigned long domain_id)
1664 +{
1665 + add_event_entry(ESCAPE_CODE);
1666 + add_event_entry(DOMAIN_SWITCH_CODE);
1667 + add_event_entry(domain_id);
1668 +}
1669 +
1670 static void
1671 add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
1672 {
1673 @@ -347,9 +373,9 @@
1674 * for later lookup from userspace.
1675 */
1676 static int
1677 -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
1678 +add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
1679 {
1680 - if (in_kernel) {
1681 + if (cpu_mode >= CPU_MODE_KERNEL) {
1682 add_sample_entry(s->eip, s->event);
1683 return 1;
1684 } else if (mm) {
1685 @@ -495,15 +521,21 @@
1686 struct mm_struct *mm = NULL;
1687 struct task_struct * new;
1688 unsigned long cookie = 0;
1689 - int in_kernel = 1;
1690 + int cpu_mode = 1;
1691 unsigned int i;
1692 sync_buffer_state state = sb_buffer_start;
1693 unsigned long available;
1694 + int domain_switch = 0;
1695
1696 mutex_lock(&buffer_mutex);
1697
1698 add_cpu_switch(cpu);
1699
1700 + /* We need to assign the first samples in this CPU buffer to the
1701 + same domain that we were processing at the last sync_buffer */
1702 + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
1703 + add_domain_switch(cpu_current_domain[cpu]);
1704 + }
1705 /* Remember, only we can modify tail_pos */
1706
1707 available = get_slots(cpu_buf);
1708 @@ -511,16 +543,18 @@
1709 for (i = 0; i < available; ++i) {
1710 struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
1711
1712 - if (is_code(s->eip)) {
1713 - if (s->event <= CPU_IS_KERNEL) {
1714 - /* kernel/userspace switch */
1715 - in_kernel = s->event;
1716 + if (is_code(s->eip) && !domain_switch) {
1717 + if (s->event <= CPU_MODE_XEN) {
1718 + /* xen/kernel/userspace switch */
1719 + cpu_mode = s->event;
1720 if (state == sb_buffer_start)
1721 state = sb_sample_start;
1722 - add_kernel_ctx_switch(s->event);
1723 + add_cpu_mode_switch(s->event);
1724 } else if (s->event == CPU_TRACE_BEGIN) {
1725 state = sb_bt_start;
1726 add_trace_begin();
1727 + } else if (s->event == CPU_DOMAIN_SWITCH) {
1728 + domain_switch = 1;
1729 } else {
1730 struct mm_struct * oldmm = mm;
1731
1732 @@ -534,11 +568,21 @@
1733 add_user_ctx_switch(new, cookie);
1734 }
1735 } else {
1736 - if (state >= sb_bt_start &&
1737 - !add_sample(mm, s, in_kernel)) {
1738 - if (state == sb_bt_start) {
1739 - state = sb_bt_ignore;
1740 - atomic_inc(&oprofile_stats.bt_lost_no_mapping);
1741 + if (domain_switch) {
1742 + cpu_current_domain[cpu] = s->eip;
1743 + add_domain_switch(s->eip);
1744 + domain_switch = 0;
1745 + } else {
1746 + if (cpu_current_domain[cpu] !=
1747 + COORDINATOR_DOMAIN) {
1748 + add_sample_entry(s->eip, s->event);
1749 + }
1750 + else if (state >= sb_bt_start &&
1751 + !add_sample(mm, s, cpu_mode)) {
1752 + if (state == sb_bt_start) {
1753 + state = sb_bt_ignore;
1754 + atomic_inc(&oprofile_stats.bt_lost_no_mapping);
1755 + }
1756 }
1757 }
1758 }
1759 @@ -547,6 +591,11 @@
1760 }
1761 release_mm(mm);
1762
1763 + /* We reset domain to COORDINATOR at each CPU switch */
1764 + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
1765 + add_domain_switch(COORDINATOR_DOMAIN);
1766 + }
1767 +
1768 mark_done(cpu);
1769
1770 mutex_unlock(&buffer_mutex);
1771 --- a/drivers/oprofile/cpu_buffer.c
1772 +++ b/drivers/oprofile/cpu_buffer.c
1773 @@ -6,6 +6,10 @@
1774 *
1775 * @author John Levon <levon@movementarian.org>
1776 *
1777 + * Modified by Aravind Menon for Xen
1778 + * These modifications are:
1779 + * Copyright (C) 2005 Hewlett-Packard Co.
1780 + *
1781 * Each CPU has a local buffer that stores PC value/event
1782 * pairs. We also log context switches when we notice them.
1783 * Eventually each CPU's buffer is processed into the global
1784 @@ -34,6 +38,8 @@
1785 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
1786 static int work_enabled;
1787
1788 +static int32_t current_domain = COORDINATOR_DOMAIN;
1789 +
1790 void free_cpu_buffers(void)
1791 {
1792 int i;
1793 @@ -57,7 +63,7 @@
1794 goto fail;
1795
1796 b->last_task = NULL;
1797 - b->last_is_kernel = -1;
1798 + b->last_cpu_mode = -1;
1799 b->tracing = 0;
1800 b->buffer_size = buffer_size;
1801 b->tail_pos = 0;
1802 @@ -115,7 +121,7 @@
1803 * collected will populate the buffer with proper
1804 * values to initialize the buffer
1805 */
1806 - cpu_buf->last_is_kernel = -1;
1807 + cpu_buf->last_cpu_mode = -1;
1808 cpu_buf->last_task = NULL;
1809 }
1810
1811 @@ -165,13 +171,13 @@
1812 * because of the head/tail separation of the writer and reader
1813 * of the CPU buffer.
1814 *
1815 - * is_kernel is needed because on some architectures you cannot
1816 + * cpu_mode is needed because on some architectures you cannot
1817 * tell if you are in kernel or user space simply by looking at
1818 - * pc. We tag this in the buffer by generating kernel enter/exit
1819 - * events whenever is_kernel changes
1820 + * pc. We tag this in the buffer by generating kernel/user (and xen)
1821 + * enter events whenever cpu_mode changes
1822 */
1823 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
1824 - int is_kernel, unsigned long event)
1825 + int cpu_mode, unsigned long event)
1826 {
1827 struct task_struct * task;
1828
1829 @@ -187,18 +193,18 @@
1830 return 0;
1831 }
1832
1833 - is_kernel = !!is_kernel;
1834 -
1835 task = current;
1836
1837 /* notice a switch from user->kernel or vice versa */
1838 - if (cpu_buf->last_is_kernel != is_kernel) {
1839 - cpu_buf->last_is_kernel = is_kernel;
1840 - add_code(cpu_buf, is_kernel);
1841 + if (cpu_buf->last_cpu_mode != cpu_mode) {
1842 + cpu_buf->last_cpu_mode = cpu_mode;
1843 + add_code(cpu_buf, cpu_mode);
1844 }
1845 -
1846 +
1847 /* notice a task switch */
1848 - if (cpu_buf->last_task != task) {
1849 + /* if not processing other domain samples */
1850 + if ((cpu_buf->last_task != task) &&
1851 + (current_domain == COORDINATOR_DOMAIN)) {
1852 cpu_buf->last_task = task;
1853 add_code(cpu_buf, (unsigned long)task);
1854 }
1855 @@ -282,6 +288,25 @@
1856 add_sample(cpu_buf, pc, 0);
1857 }
1858
1859 +int oprofile_add_domain_switch(int32_t domain_id)
1860 +{
1861 + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
1862 +
1863 + /* should have space for switching into and out of domain
1864 + (2 slots each) plus one sample and one cpu mode switch */
1865 + if (((nr_available_slots(cpu_buf) < 6) &&
1866 + (domain_id != COORDINATOR_DOMAIN)) ||
1867 + (nr_available_slots(cpu_buf) < 2))
1868 + return 0;
1869 +
1870 + add_code(cpu_buf, CPU_DOMAIN_SWITCH);
1871 + add_sample(cpu_buf, domain_id, 0);
1872 +
1873 + current_domain = domain_id;
1874 +
1875 + return 1;
1876 +}
1877 +
1878 /*
1879 * This serves to avoid cpu buffer overflow, and makes sure
1880 * the task mortuary progresses
1881 --- a/drivers/oprofile/cpu_buffer.h
1882 +++ b/drivers/oprofile/cpu_buffer.h
1883 @@ -36,7 +36,7 @@
1884 volatile unsigned long tail_pos;
1885 unsigned long buffer_size;
1886 struct task_struct * last_task;
1887 - int last_is_kernel;
1888 + int last_cpu_mode;
1889 int tracing;
1890 struct op_sample * buffer;
1891 unsigned long sample_received;
1892 @@ -52,7 +52,10 @@
1893 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
1894
1895 /* transient events for the CPU buffer -> event buffer */
1896 -#define CPU_IS_KERNEL 1
1897 -#define CPU_TRACE_BEGIN 2
1898 +#define CPU_MODE_USER 0
1899 +#define CPU_MODE_KERNEL 1
1900 +#define CPU_MODE_XEN 2
1901 +#define CPU_TRACE_BEGIN 3
1902 +#define CPU_DOMAIN_SWITCH 4
1903
1904 #endif /* OPROFILE_CPU_BUFFER_H */
1905 --- a/drivers/oprofile/event_buffer.h
1906 +++ b/drivers/oprofile/event_buffer.h
1907 @@ -23,6 +23,9 @@
1908 #define INVALID_COOKIE ~0UL
1909 #define NO_COOKIE 0UL
1910
1911 +/* Constant used to refer to coordinator domain (Xen) */
1912 +#define COORDINATOR_DOMAIN -1
1913 +
1914 extern const struct file_operations event_buffer_fops;
1915
1916 /* mutex between sync_cpu_buffers() and the
1917 --- a/drivers/oprofile/oprof.c
1918 +++ b/drivers/oprofile/oprof.c
1919 @@ -5,6 +5,10 @@
1920 * @remark Read the file COPYING
1921 *
1922 * @author John Levon <levon@movementarian.org>
1923 + *
1924 + * Modified by Aravind Menon for Xen
1925 + * These modifications are:
1926 + * Copyright (C) 2005 Hewlett-Packard Co.
1927 */
1928
1929 #include <linux/kernel.h>
1930 @@ -33,6 +37,32 @@
1931 */
1932 static int timer = 0;
1933
1934 +int oprofile_set_active(int active_domains[], unsigned int adomains)
1935 +{
1936 + int err;
1937 +
1938 + if (!oprofile_ops.set_active)
1939 + return -EINVAL;
1940 +
1941 + mutex_lock(&start_mutex);
1942 + err = oprofile_ops.set_active(active_domains, adomains);
1943 + mutex_unlock(&start_mutex);
1944 + return err;
1945 +}
1946 +
1947 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
1948 +{
1949 + int err;
1950 +
1951 + if (!oprofile_ops.set_passive)
1952 + return -EINVAL;
1953 +
1954 + mutex_lock(&start_mutex);
1955 + err = oprofile_ops.set_passive(passive_domains, pdomains);
1956 + mutex_unlock(&start_mutex);
1957 + return err;
1958 +}
1959 +
1960 int oprofile_setup(void)
1961 {
1962 int err;
1963 --- a/drivers/oprofile/oprof.h
1964 +++ b/drivers/oprofile/oprof.h
1965 @@ -35,5 +35,8 @@
1966 void oprofile_timer_init(struct oprofile_operations * ops);
1967
1968 int oprofile_set_backtrace(unsigned long depth);
1969 +
1970 +int oprofile_set_active(int active_domains[], unsigned int adomains);
1971 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
1972
1973 #endif /* OPROF_H */
1974 --- a/drivers/oprofile/oprofile_files.c
1975 +++ b/drivers/oprofile/oprofile_files.c
1976 @@ -5,15 +5,21 @@
1977 * @remark Read the file COPYING
1978 *
1979 * @author John Levon <levon@movementarian.org>
1980 + *
1981 + * Modified by Aravind Menon for Xen
1982 + * These modifications are:
1983 + * Copyright (C) 2005 Hewlett-Packard Co.
1984 */
1985
1986 #include <linux/fs.h>
1987 #include <linux/oprofile.h>
1988 +#include <asm/uaccess.h>
1989 +#include <linux/ctype.h>
1990
1991 #include "event_buffer.h"
1992 #include "oprofile_stats.h"
1993 #include "oprof.h"
1994 -
1995 +
1996 unsigned long fs_buffer_size = 131072;
1997 unsigned long fs_cpu_buffer_size = 8192;
1998 unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
1999 @@ -117,11 +123,202 @@
2000 static const struct file_operations dump_fops = {
2001 .write = dump_write,
2002 };
2003 -
2004 +
2005 +#define TMPBUFSIZE 512
2006 +
2007 +static unsigned int adomains = 0;
2008 +static int active_domains[MAX_OPROF_DOMAINS + 1];
2009 +static DEFINE_MUTEX(adom_mutex);
2010 +
2011 +static ssize_t adomain_write(struct file * file, char const __user * buf,
2012 + size_t count, loff_t * offset)
2013 +{
2014 + char *tmpbuf;
2015 + char *startp, *endp;
2016 + int i;
2017 + unsigned long val;
2018 + ssize_t retval = count;
2019 +
2020 + if (*offset)
2021 + return -EINVAL;
2022 + if (count > TMPBUFSIZE - 1)
2023 + return -EINVAL;
2024 +
2025 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2026 + return -ENOMEM;
2027 +
2028 + if (copy_from_user(tmpbuf, buf, count)) {
2029 + kfree(tmpbuf);
2030 + return -EFAULT;
2031 + }
2032 + tmpbuf[count] = 0;
2033 +
2034 + mutex_lock(&adom_mutex);
2035 +
2036 + startp = tmpbuf;
2037 + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2038 + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2039 + val = simple_strtoul(startp, &endp, 0);
2040 + if (endp == startp)
2041 + break;
2042 + while (ispunct(*endp) || isspace(*endp))
2043 + endp++;
2044 + active_domains[i] = val;
2045 + if (active_domains[i] != val)
2046 + /* Overflow, force error below */
2047 + i = MAX_OPROF_DOMAINS + 1;
2048 + startp = endp;
2049 + }
2050 + /* Force error on trailing junk */
2051 + adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2052 +
2053 + kfree(tmpbuf);
2054 +
2055 + if (adomains > MAX_OPROF_DOMAINS
2056 + || oprofile_set_active(active_domains, adomains)) {
2057 + adomains = 0;
2058 + retval = -EINVAL;
2059 + }
2060 +
2061 + mutex_unlock(&adom_mutex);
2062 + return retval;
2063 +}
2064 +
2065 +static ssize_t adomain_read(struct file * file, char __user * buf,
2066 + size_t count, loff_t * offset)
2067 +{
2068 + char * tmpbuf;
2069 + size_t len;
2070 + int i;
2071 + ssize_t retval;
2072 +
2073 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2074 + return -ENOMEM;
2075 +
2076 + mutex_lock(&adom_mutex);
2077 +
2078 + len = 0;
2079 + for (i = 0; i < adomains; i++)
2080 + len += snprintf(tmpbuf + len,
2081 + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2082 + "%u ", active_domains[i]);
2083 + WARN_ON(len > TMPBUFSIZE);
2084 + if (len != 0 && len <= TMPBUFSIZE)
2085 + tmpbuf[len-1] = '\n';
2086 +
2087 + mutex_unlock(&adom_mutex);
2088 +
2089 + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2090 +
2091 + kfree(tmpbuf);
2092 + return retval;
2093 +}
2094 +
2095 +
2096 +static struct file_operations active_domain_ops = {
2097 + .read = adomain_read,
2098 + .write = adomain_write,
2099 +};
2100 +
2101 +static unsigned int pdomains = 0;
2102 +static int passive_domains[MAX_OPROF_DOMAINS];
2103 +static DEFINE_MUTEX(pdom_mutex);
2104 +
2105 +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2106 + size_t count, loff_t * offset)
2107 +{
2108 + char *tmpbuf;
2109 + char *startp, *endp;
2110 + int i;
2111 + unsigned long val;
2112 + ssize_t retval = count;
2113 +
2114 + if (*offset)
2115 + return -EINVAL;
2116 + if (count > TMPBUFSIZE - 1)
2117 + return -EINVAL;
2118 +
2119 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2120 + return -ENOMEM;
2121 +
2122 + if (copy_from_user(tmpbuf, buf, count)) {
2123 + kfree(tmpbuf);
2124 + return -EFAULT;
2125 + }
2126 + tmpbuf[count] = 0;
2127 +
2128 + mutex_lock(&pdom_mutex);
2129 +
2130 + startp = tmpbuf;
2131 + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2132 + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2133 + val = simple_strtoul(startp, &endp, 0);
2134 + if (endp == startp)
2135 + break;
2136 + while (ispunct(*endp) || isspace(*endp))
2137 + endp++;
2138 + passive_domains[i] = val;
2139 + if (passive_domains[i] != val)
2140 + /* Overflow, force error below */
2141 + i = MAX_OPROF_DOMAINS + 1;
2142 + startp = endp;
2143 + }
2144 + /* Force error on trailing junk */
2145 + pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2146 +
2147 + kfree(tmpbuf);
2148 +
2149 + if (pdomains > MAX_OPROF_DOMAINS
2150 + || oprofile_set_passive(passive_domains, pdomains)) {
2151 + pdomains = 0;
2152 + retval = -EINVAL;
2153 + }
2154 +
2155 + mutex_unlock(&pdom_mutex);
2156 + return retval;
2157 +}
2158 +
2159 +static ssize_t pdomain_read(struct file * file, char __user * buf,
2160 + size_t count, loff_t * offset)
2161 +{
2162 + char * tmpbuf;
2163 + size_t len;
2164 + int i;
2165 + ssize_t retval;
2166 +
2167 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2168 + return -ENOMEM;
2169 +
2170 + mutex_lock(&pdom_mutex);
2171 +
2172 + len = 0;
2173 + for (i = 0; i < pdomains; i++)
2174 + len += snprintf(tmpbuf + len,
2175 + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2176 + "%u ", passive_domains[i]);
2177 + WARN_ON(len > TMPBUFSIZE);
2178 + if (len != 0 && len <= TMPBUFSIZE)
2179 + tmpbuf[len-1] = '\n';
2180 +
2181 + mutex_unlock(&pdom_mutex);
2182 +
2183 + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2184 +
2185 + kfree(tmpbuf);
2186 + return retval;
2187 +}
2188 +
2189 +static struct file_operations passive_domain_ops = {
2190 + .read = pdomain_read,
2191 + .write = pdomain_write,
2192 +};
2193 +
2194 void oprofile_create_files(struct super_block * sb, struct dentry * root)
2195 {
2196 oprofilefs_create_file(sb, root, "enable", &enable_fops);
2197 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2198 + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2199 + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2200 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2201 oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
2202 oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
2203 --- a/drivers/pci/bus.c
2204 +++ b/drivers/pci/bus.c
2205 @@ -17,6 +17,8 @@
2206
2207 #include "pci.h"
2208
2209 +extern int pci_mem_align;
2210 +
2211 /**
2212 * pci_bus_alloc_resource - allocate a resource from a parent bus
2213 * @bus: PCI bus
2214 @@ -44,6 +46,11 @@
2215
2216 type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
2217
2218 + /* If the boot parameter 'pci-mem-align' was specified then we need to
2219 + align the memory addresses, at page size alignment. */
2220 + if (pci_mem_align && (align < (PAGE_SIZE-1)))
2221 + align = PAGE_SIZE - 1;
2222 +
2223 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
2224 struct resource *r = bus->resource[i];
2225 if (!r)
2226 --- a/drivers/pci/quirks.c
2227 +++ b/drivers/pci/quirks.c
2228 @@ -24,6 +24,40 @@
2229 #include <linux/kallsyms.h>
2230 #include "pci.h"
2231
2232 +/* A global flag which signals if we should page-align PCI mem windows. */
2233 +int pci_mem_align = 0;
2234 +
2235 +static int __init set_pci_mem_align(char *str)
2236 +{
2237 + pci_mem_align = 1;
2238 + return 1;
2239 +}
2240 +__setup("pci-mem-align", set_pci_mem_align);
2241 +
2242 +/* This quirk function enables us to force all memory resources which are
2243 + * assigned to PCI devices, to be page-aligned.
2244 + */
2245 +static void __devinit quirk_align_mem_resources(struct pci_dev *dev)
2246 +{
2247 + int i;
2248 + struct resource *r;
2249 + resource_size_t old_start;
2250 +
2251 + if (!pci_mem_align)
2252 + return;
2253 +
2254 + for (i=0; i < DEVICE_COUNT_RESOURCE; i++) {
2255 + r = &dev->resource[i];
2256 + if ((r == NULL) || !(r->flags & IORESOURCE_MEM))
2257 + continue;
2258 +
2259 + old_start = r->start;
2260 + r->start = (r->start + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
2261 + r->end = r->end - (old_start - r->start);
2262 + }
2263 +}
2264 +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_align_mem_resources);
2265 +
2266 /* The Mellanox Tavor device gives false positive parity errors
2267 * Mark this device with a broken_parity_status, to allow
2268 * PCI scanning code to "skip" this now blacklisted device.
2269 --- a/fs/aio.c
2270 +++ b/fs/aio.c
2271 @@ -36,6 +36,11 @@
2272 #include <asm/uaccess.h>
2273 #include <asm/mmu_context.h>
2274
2275 +#ifdef CONFIG_EPOLL
2276 +#include <linux/poll.h>
2277 +#include <linux/eventpoll.h>
2278 +#endif
2279 +
2280 #if DEBUG > 1
2281 #define dprintk printk
2282 #else
2283 @@ -1008,6 +1013,11 @@
2284 if (waitqueue_active(&ctx->wait))
2285 wake_up(&ctx->wait);
2286
2287 +#ifdef CONFIG_EPOLL
2288 + if (ctx->file && waitqueue_active(&ctx->poll_wait))
2289 + wake_up(&ctx->poll_wait);
2290 +#endif
2291 +
2292 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2293 return ret;
2294 }
2295 @@ -1015,6 +1025,8 @@
2296 /* aio_read_evt
2297 * Pull an event off of the ioctx's event ring. Returns the number of
2298 * events fetched (0 or 1 ;-)
2299 + * If ent parameter is 0, just returns the number of events that would
2300 + * be fetched.
2301 * FIXME: make this use cmpxchg.
2302 * TODO: make the ringbuffer user mmap()able (requires FIXME).
2303 */
2304 @@ -1037,13 +1049,18 @@
2305
2306 head = ring->head % info->nr;
2307 if (head != ring->tail) {
2308 - struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2309 - *ent = *evp;
2310 - head = (head + 1) % info->nr;
2311 - smp_mb(); /* finish reading the event before updatng the head */
2312 - ring->head = head;
2313 - ret = 1;
2314 - put_aio_ring_event(evp, KM_USER1);
2315 + if (ent) { /* event requested */
2316 + struct io_event *evp =
2317 + aio_ring_event(info, head, KM_USER1);
2318 + *ent = *evp;
2319 + head = (head + 1) % info->nr;
2320 + /* finish reading the event before updatng the head */
2321 + smp_mb();
2322 + ring->head = head;
2323 + ret = 1;
2324 + put_aio_ring_event(evp, KM_USER1);
2325 + } else /* only need to know availability */
2326 + ret = 1;
2327 }
2328 spin_unlock(&info->ring_lock);
2329
2330 @@ -1234,6 +1251,13 @@
2331
2332 aio_cancel_all(ioctx);
2333 wait_for_all_aios(ioctx);
2334 +#ifdef CONFIG_EPOLL
2335 + /* forget the poll file, but it's up to the user to close it */
2336 + if (ioctx->file) {
2337 + ioctx->file->private_data = 0;
2338 + ioctx->file = 0;
2339 + }
2340 +#endif
2341
2342 /*
2343 * Wake up any waiters. The setting of ctx->dead must be seen
2344 @@ -1244,6 +1268,68 @@
2345 put_ioctx(ioctx); /* once for the lookup */
2346 }
2347
2348 +#ifdef CONFIG_EPOLL
2349 +
2350 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
2351 +{
2352 + struct kioctx *ioctx = file->private_data;
2353 + if (ioctx) {
2354 + file->private_data = 0;
2355 + spin_lock_irq(&ioctx->ctx_lock);
2356 + ioctx->file = 0;
2357 + spin_unlock_irq(&ioctx->ctx_lock);
2358 + }
2359 + return 0;
2360 +}
2361 +
2362 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2363 +{ unsigned int pollflags = 0;
2364 + struct kioctx *ioctx = file->private_data;
2365 +
2366 + if (ioctx) {
2367 +
2368 + spin_lock_irq(&ioctx->ctx_lock);
2369 + /* Insert inside our poll wait queue */
2370 + poll_wait(file, &ioctx->poll_wait, wait);
2371 +
2372 + /* Check our condition */
2373 + if (aio_read_evt(ioctx, 0))
2374 + pollflags = POLLIN | POLLRDNORM;
2375 + spin_unlock_irq(&ioctx->ctx_lock);
2376 + }
2377 +
2378 + return pollflags;
2379 +}
2380 +
2381 +static const struct file_operations aioq_fops = {
2382 + .release = aio_queue_fd_close,
2383 + .poll = aio_queue_fd_poll
2384 +};
2385 +
2386 +/* make_aio_fd:
2387 + * Create a file descriptor that can be used to poll the event queue.
2388 + * Based and piggybacked on the excellent epoll code.
2389 + */
2390 +
2391 +static int make_aio_fd(struct kioctx *ioctx)
2392 +{
2393 + int error, fd;
2394 + struct inode *inode;
2395 + struct file *file;
2396 +
2397 + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2398 + if (error)
2399 + return error;
2400 +
2401 + /* associate the file with the IO context */
2402 + file->private_data = ioctx;
2403 + ioctx->file = file;
2404 + init_waitqueue_head(&ioctx->poll_wait);
2405 + return fd;
2406 +}
2407 +#endif
2408 +
2409 +
2410 /* sys_io_setup:
2411 * Create an aio_context capable of receiving at least nr_events.
2412 * ctxp must not point to an aio_context that already exists, and
2413 @@ -1256,18 +1342,30 @@
2414 * resources are available. May fail with -EFAULT if an invalid
2415 * pointer is passed for ctxp. Will fail with -ENOSYS if not
2416 * implemented.
2417 + *
2418 + * To request a selectable fd, the user context has to be initialized
2419 + * to 1, instead of 0, and the return value is the fd.
2420 + * This keeps the system call compatible, since a non-zero value
2421 + * was not allowed so far.
2422 */
2423 asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
2424 {
2425 struct kioctx *ioctx = NULL;
2426 unsigned long ctx;
2427 long ret;
2428 + int make_fd = 0;
2429
2430 ret = get_user(ctx, ctxp);
2431 if (unlikely(ret))
2432 goto out;
2433
2434 ret = -EINVAL;
2435 +#ifdef CONFIG_EPOLL
2436 + if (ctx == 1) {
2437 + make_fd = 1;
2438 + ctx = 0;
2439 + }
2440 +#endif
2441 if (unlikely(ctx || nr_events == 0)) {
2442 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
2443 ctx, nr_events);
2444 @@ -1278,8 +1376,12 @@
2445 ret = PTR_ERR(ioctx);
2446 if (!IS_ERR(ioctx)) {
2447 ret = put_user(ioctx->user_id, ctxp);
2448 - if (!ret)
2449 - return 0;
2450 +#ifdef CONFIG_EPOLL
2451 + if (make_fd && ret >= 0)
2452 + ret = make_aio_fd(ioctx);
2453 +#endif
2454 + if (ret >= 0)
2455 + return ret;
2456
2457 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
2458 io_destroy(ioctx);
2459 --- a/fs/compat_ioctl.c
2460 +++ b/fs/compat_ioctl.c
2461 @@ -114,6 +114,13 @@
2462 #include <asm/fbio.h>
2463 #endif
2464
2465 +#ifdef CONFIG_XEN
2466 +#include <xen/interface/xen.h>
2467 +#include <xen/public/evtchn.h>
2468 +#include <xen/public/privcmd.h>
2469 +#include <xen/compat_ioctl.h>
2470 +#endif
2471 +
2472 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
2473 unsigned long arg, struct file *f)
2474 {
2475 @@ -2834,6 +2841,18 @@
2476 IGNORE_IOCTL(FBIOSCURSOR32)
2477 IGNORE_IOCTL(FBIOGCURSOR32)
2478 #endif
2479 +
2480 +#ifdef CONFIG_XEN
2481 +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
2482 +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
2483 +COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
2484 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
2485 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
2486 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT)
2487 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND)
2488 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY)
2489 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET)
2490 +#endif
2491 };
2492
2493 #define IOCTL_HASHSIZE 256
2494 --- a/fs/splice.c
2495 +++ b/fs/splice.c
2496 @@ -1218,6 +1218,9 @@
2497 if (!access_ok(VERIFY_READ, base, len))
2498 break;
2499
2500 + if (unlikely(!access_ok(VERIFY_READ, base, len)))
2501 + break;
2502 +
2503 /*
2504 * Get this base offset and number of pages, then map
2505 * in the user pages.
2506 --- a/include/asm-generic/pci.h
2507 +++ b/include/asm-generic/pci.h
2508 @@ -43,7 +43,9 @@
2509 return root;
2510 }
2511
2512 +#ifndef pcibios_scan_all_fns
2513 #define pcibios_scan_all_fns(a, b) 0
2514 +#endif
2515
2516 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
2517 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
2518 --- a/include/asm-generic/pgtable.h
2519 +++ b/include/asm-generic/pgtable.h
2520 @@ -99,6 +99,10 @@
2521 }
2522 #endif
2523
2524 +#ifndef arch_change_pte_range
2525 +#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
2526 +#endif
2527 +
2528 #ifndef __HAVE_ARCH_PTE_SAME
2529 #define pte_same(A,B) (pte_val(A) == pte_val(B))
2530 #endif
2531 --- a/include/linux/aio.h
2532 +++ b/include/linux/aio.h
2533 @@ -200,6 +200,11 @@
2534 struct aio_ring_info ring_info;
2535
2536 struct delayed_work wq;
2537 +#ifdef CONFIG_EPOLL
2538 + // poll integration
2539 + wait_queue_head_t poll_wait;
2540 + struct file *file;
2541 +#endif
2542 };
2543
2544 /* prototypes */
2545 --- a/include/linux/interrupt.h
2546 +++ b/include/linux/interrupt.h
2547 @@ -194,6 +194,12 @@
2548 }
2549 #endif /* CONFIG_GENERIC_HARDIRQS */
2550
2551 +#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED
2552 +int irq_ignore_unhandled(unsigned int irq);
2553 +#else
2554 +#define irq_ignore_unhandled(irq) 0
2555 +#endif
2556 +
2557 #ifndef __ARCH_SET_SOFTIRQ_PENDING
2558 #define set_softirq_pending(x) (local_softirq_pending() = (x))
2559 #define or_softirq_pending(x) (local_softirq_pending() |= (x))
2560 --- a/include/linux/kexec.h
2561 +++ b/include/linux/kexec.h
2562 @@ -46,6 +46,13 @@
2563 KEXEC_CORE_NOTE_NAME_BYTES + \
2564 KEXEC_CORE_NOTE_DESC_BYTES )
2565
2566 +#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
2567 +#define kexec_page_to_pfn(page) page_to_pfn(page)
2568 +#define kexec_pfn_to_page(pfn) pfn_to_page(pfn)
2569 +#define kexec_virt_to_phys(addr) virt_to_phys(addr)
2570 +#define kexec_phys_to_virt(addr) phys_to_virt(addr)
2571 +#endif
2572 +
2573 /*
2574 * This structure is used to hold the arguments that are used when loading
2575 * kernel binaries.
2576 @@ -106,6 +113,12 @@
2577 extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
2578 extern int machine_kexec_prepare(struct kimage *image);
2579 extern void machine_kexec_cleanup(struct kimage *image);
2580 +#ifdef CONFIG_XEN
2581 +extern int xen_machine_kexec_load(struct kimage *image);
2582 +extern void xen_machine_kexec_unload(struct kimage *image);
2583 +extern void xen_machine_kexec_setup_resources(void);
2584 +extern void xen_machine_kexec_register_resources(struct resource *res);
2585 +#endif
2586 extern asmlinkage long sys_kexec_load(unsigned long entry,
2587 unsigned long nr_segments,
2588 struct kexec_segment __user *segments,
2589 @@ -154,6 +167,10 @@
2590
2591 #ifndef kexec_flush_icache_page
2592 #define kexec_flush_icache_page(page)
2593 +#endif
2594 +
2595 +#ifndef kexec_flush_icache_page
2596 +#define kexec_flush_icache_page(page)
2597 #endif
2598
2599 #define KEXEC_ON_CRASH 0x00000001
2600 --- a/include/linux/mm.h
2601 +++ b/include/linux/mm.h
2602 @@ -100,6 +100,9 @@
2603 #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
2604 #define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
2605 #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
2606 +#ifdef CONFIG_XEN
2607 +#define VM_FOREIGN 0x00200000 /* Has pages belonging to another VM */
2608 +#endif
2609 #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
2610 #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
2611 #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
2612 @@ -172,6 +175,10 @@
2613 /* notification that a previously read-only page is about to become
2614 * writable, if an error is returned it will cause a SIGBUS */
2615 int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
2616 + /* Area-specific function for clearing the PTE at @ptep. Returns the
2617 + * original value of @ptep. */
2618 + pte_t (*zap_pte)(struct vm_area_struct *vma,
2619 + unsigned long addr, pte_t *ptep, int is_fullmm);
2620 #ifdef CONFIG_NUMA
2621 int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
2622 struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
2623 --- a/include/linux/oprofile.h
2624 +++ b/include/linux/oprofile.h
2625 @@ -16,6 +16,8 @@
2626 #include <linux/types.h>
2627 #include <linux/spinlock.h>
2628 #include <asm/atomic.h>
2629 +
2630 +#include <xen/interface/xenoprof.h>
2631
2632 /* Each escaped entry is prefixed by ESCAPE_CODE
2633 * then one of the following codes, then the
2634 @@ -28,7 +30,7 @@
2635 #define CPU_SWITCH_CODE 2
2636 #define COOKIE_SWITCH_CODE 3
2637 #define KERNEL_ENTER_SWITCH_CODE 4
2638 -#define KERNEL_EXIT_SWITCH_CODE 5
2639 +#define USER_ENTER_SWITCH_CODE 5
2640 #define MODULE_LOADED_CODE 6
2641 #define CTX_TGID_CODE 7
2642 #define TRACE_BEGIN_CODE 8
2643 @@ -36,6 +38,7 @@
2644 #define XEN_ENTER_SWITCH_CODE 10
2645 #define SPU_PROFILING_CODE 11
2646 #define SPU_CTX_SWITCH_CODE 12
2647 +#define DOMAIN_SWITCH_CODE 13
2648
2649 struct super_block;
2650 struct dentry;
2651 @@ -47,6 +50,11 @@
2652 /* create any necessary configuration files in the oprofile fs.
2653 * Optional. */
2654 int (*create_files)(struct super_block * sb, struct dentry * root);
2655 + /* setup active domains with Xen */
2656 + int (*set_active)(int *active_domains, unsigned int adomains);
2657 + /* setup passive domains with Xen */
2658 + int (*set_passive)(int *passive_domains, unsigned int pdomains);
2659 +
2660 /* Do any necessary interrupt setup. Optional. */
2661 int (*setup)(void);
2662 /* Do any necessary interrupt shutdown. Optional. */
2663 @@ -113,6 +121,8 @@
2664 /* add a backtrace entry, to be called from the ->backtrace callback */
2665 void oprofile_add_trace(unsigned long eip);
2666
2667 +/* add a domain switch entry */
2668 +int oprofile_add_domain_switch(int32_t domain_id);
2669
2670 /**
2671 * Create a file of the given name as a child of the given root, with
2672 --- a/include/linux/page-flags.h
2673 +++ b/include/linux/page-flags.h
2674 @@ -97,6 +97,8 @@
2675 #define PG_checked PG_owner_priv_1 /* Used by some filesystems */
2676 #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */
2677
2678 +#define PG_foreign 20 /* Page is owned by foreign allocator. */
2679 +
2680 #if (BITS_PER_LONG > 32)
2681 /*
2682 * 64-bit-only flags build down from bit 31
2683 @@ -296,6 +298,19 @@
2684 #define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
2685 #define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)
2686
2687 +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
2688 +#define SetPageForeign(_page, dtor) do { \
2689 + set_bit(PG_foreign, &(_page)->flags); \
2690 + BUG_ON((dtor) == (void (*)(struct page *))0); \
2691 + (_page)->index = (long)(dtor); \
2692 +} while (0)
2693 +#define ClearPageForeign(page) do { \
2694 + clear_bit(PG_foreign, &(page)->flags); \
2695 + (page)->index = 0; \
2696 +} while (0)
2697 +#define PageForeignDestructor(_page) \
2698 + ((void (*)(struct page *))(_page)->index)(_page)
2699 +
2700 struct page; /* forward declaration */
2701
2702 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
2703 --- a/include/linux/sched.h
2704 +++ b/include/linux/sched.h
2705 @@ -290,6 +290,7 @@
2706 extern void sched_show_task(struct task_struct *p);
2707
2708 #ifdef CONFIG_DETECT_SOFTLOCKUP
2709 +extern unsigned long softlockup_get_next_event(void);
2710 extern void softlockup_tick(void);
2711 extern void spawn_softlockup_task(void);
2712 extern void touch_softlockup_watchdog(void);
2713 @@ -299,6 +300,10 @@
2714 extern unsigned long sysctl_hung_task_timeout_secs;
2715 extern unsigned long sysctl_hung_task_warnings;
2716 #else
2717 +static inline unsigned long softlockup_get_next_event(void)
2718 +{
2719 + return MAX_JIFFY_OFFSET;
2720 +}
2721 static inline void softlockup_tick(void)
2722 {
2723 }
2724 --- a/include/linux/skbuff.h
2725 +++ b/include/linux/skbuff.h
2726 @@ -217,6 +217,8 @@
2727 * @local_df: allow local fragmentation
2728 * @cloned: Head may be cloned (check refcnt to be sure)
2729 * @nohdr: Payload reference only, must not modify header
2730 + * @proto_data_valid: Protocol data validated since arriving at localhost
2731 + * @proto_csum_blank: Protocol csum must be added before leaving localhost
2732 * @pkt_type: Packet class
2733 * @fclone: skbuff clone status
2734 * @ip_summed: Driver fed us an IP checksum
2735 @@ -310,7 +312,13 @@
2736 __u16 tc_verd; /* traffic control verdict */
2737 #endif
2738 #endif
2739 +#ifndef CONFIG_XEN
2740 /* 2 byte hole */
2741 +#else
2742 + __u8 proto_data_valid:1,
2743 + proto_csum_blank:1;
2744 + /* 1 byte hole */
2745 +#endif
2746
2747 #ifdef CONFIG_NET_DMA
2748 dma_cookie_t dma_cookie;
2749 --- a/include/linux/vermagic.h
2750 +++ b/include/linux/vermagic.h
2751 @@ -17,6 +17,11 @@
2752 #else
2753 #define MODULE_VERMAGIC_MODULE_UNLOAD ""
2754 #endif
2755 +#ifdef CONFIG_XEN
2756 +#define MODULE_VERMAGIC_XEN "Xen "
2757 +#else
2758 +#define MODULE_VERMAGIC_XEN
2759 +#endif
2760 #ifndef MODULE_ARCH_VERMAGIC
2761 #define MODULE_ARCH_VERMAGIC ""
2762 #endif
2763 @@ -24,5 +29,6 @@
2764 #define VERMAGIC_STRING \
2765 UTS_RELEASE " " \
2766 MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
2767 - MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC
2768 + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_XEN \
2769 + MODULE_ARCH_VERMAGIC
2770
2771 --- a/kernel/irq/spurious.c
2772 +++ b/kernel/irq/spurious.c
2773 @@ -182,7 +182,7 @@
2774 */
2775 if (time_after(jiffies, desc->last_unhandled + HZ/10))
2776 desc->irqs_unhandled = 1;
2777 - else
2778 + else if (!irq_ignore_unhandled(irq))
2779 desc->irqs_unhandled++;
2780 desc->last_unhandled = jiffies;
2781 if (unlikely(action_ret != IRQ_NONE))
2782 --- a/kernel/kexec.c
2783 +++ b/kernel/kexec.c
2784 @@ -340,13 +340,26 @@
2785 return 0;
2786 }
2787
2788 -static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
2789 +static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit)
2790 {
2791 struct page *pages;
2792
2793 pages = alloc_pages(gfp_mask, order);
2794 if (pages) {
2795 unsigned int count, i;
2796 +#ifdef CONFIG_XEN
2797 + int address_bits;
2798 +
2799 + if (limit == ~0UL)
2800 + address_bits = BITS_PER_LONG;
2801 + else
2802 + address_bits = long_log2(limit);
2803 +
2804 + if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
2805 + __free_pages(pages, order);
2806 + return NULL;
2807 + }
2808 +#endif
2809 pages->mapping = NULL;
2810 set_page_private(pages, order);
2811 count = 1 << order;
2812 @@ -365,6 +378,9 @@
2813 count = 1 << order;
2814 for (i = 0; i < count; i++)
2815 ClearPageReserved(page + i);
2816 +#ifdef CONFIG_XEN
2817 + xen_destroy_contiguous_region((unsigned long)page_address(page), order);
2818 +#endif
2819 __free_pages(page, order);
2820 }
2821
2822 @@ -410,10 +426,10 @@
2823 do {
2824 unsigned long pfn, epfn, addr, eaddr;
2825
2826 - pages = kimage_alloc_pages(GFP_KERNEL, order);
2827 + pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT);
2828 if (!pages)
2829 break;
2830 - pfn = page_to_pfn(pages);
2831 + pfn = kexec_page_to_pfn(pages);
2832 epfn = pfn + count;
2833 addr = pfn << PAGE_SHIFT;
2834 eaddr = epfn << PAGE_SHIFT;
2835 @@ -447,6 +463,7 @@
2836 return pages;
2837 }
2838
2839 +#ifndef CONFIG_XEN
2840 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
2841 unsigned int order)
2842 {
2843 @@ -500,7 +517,7 @@
2844 }
2845 /* If I don't overlap any segments I have found my hole! */
2846 if (i == image->nr_segments) {
2847 - pages = pfn_to_page(hole_start >> PAGE_SHIFT);
2848 + pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
2849 break;
2850 }
2851 }
2852 @@ -527,6 +544,13 @@
2853
2854 return pages;
2855 }
2856 +#else /* !CONFIG_XEN */
2857 +struct page *kimage_alloc_control_pages(struct kimage *image,
2858 + unsigned int order)
2859 +{
2860 + return kimage_alloc_normal_control_pages(image, order);
2861 +}
2862 +#endif
2863
2864 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
2865 {
2866 @@ -542,7 +566,7 @@
2867 return -ENOMEM;
2868
2869 ind_page = page_address(page);
2870 - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
2871 + *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
2872 image->entry = ind_page;
2873 image->last_entry = ind_page +
2874 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
2875 @@ -603,13 +627,13 @@
2876 #define for_each_kimage_entry(image, ptr, entry) \
2877 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
2878 ptr = (entry & IND_INDIRECTION)? \
2879 - phys_to_virt((entry & PAGE_MASK)): ptr +1)
2880 + kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
2881
2882 static void kimage_free_entry(kimage_entry_t entry)
2883 {
2884 struct page *page;
2885
2886 - page = pfn_to_page(entry >> PAGE_SHIFT);
2887 + page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
2888 kimage_free_pages(page);
2889 }
2890
2891 @@ -621,6 +645,10 @@
2892 if (!image)
2893 return;
2894
2895 +#ifdef CONFIG_XEN
2896 + xen_machine_kexec_unload(image);
2897 +#endif
2898 +
2899 kimage_free_extra_pages(image);
2900 for_each_kimage_entry(image, ptr, entry) {
2901 if (entry & IND_INDIRECTION) {
2902 @@ -696,7 +724,7 @@
2903 * have a match.
2904 */
2905 list_for_each_entry(page, &image->dest_pages, lru) {
2906 - addr = page_to_pfn(page) << PAGE_SHIFT;
2907 + addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
2908 if (addr == destination) {
2909 list_del(&page->lru);
2910 return page;
2911 @@ -707,16 +735,16 @@
2912 kimage_entry_t *old;
2913
2914 /* Allocate a page, if we run out of memory give up */
2915 - page = kimage_alloc_pages(gfp_mask, 0);
2916 + page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT);
2917 if (!page)
2918 return NULL;
2919 /* If the page cannot be used file it away */
2920 - if (page_to_pfn(page) >
2921 + if (kexec_page_to_pfn(page) >
2922 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
2923 list_add(&page->lru, &image->unuseable_pages);
2924 continue;
2925 }
2926 - addr = page_to_pfn(page) << PAGE_SHIFT;
2927 + addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
2928
2929 /* If it is the destination page we want use it */
2930 if (addr == destination)
2931 @@ -739,7 +767,7 @@
2932 struct page *old_page;
2933
2934 old_addr = *old & PAGE_MASK;
2935 - old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
2936 + old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
2937 copy_highpage(page, old_page);
2938 *old = addr | (*old & ~PAGE_MASK);
2939
2940 @@ -789,7 +817,7 @@
2941 result = -ENOMEM;
2942 goto out;
2943 }
2944 - result = kimage_add_page(image, page_to_pfn(page)
2945 + result = kimage_add_page(image, kexec_page_to_pfn(page)
2946 << PAGE_SHIFT);
2947 if (result < 0)
2948 goto out;
2949 @@ -821,6 +849,7 @@
2950 return result;
2951 }
2952
2953 +#ifndef CONFIG_XEN
2954 static int kimage_load_crash_segment(struct kimage *image,
2955 struct kexec_segment *segment)
2956 {
2957 @@ -843,7 +872,7 @@
2958 char *ptr;
2959 size_t uchunk, mchunk;
2960
2961 - page = pfn_to_page(maddr >> PAGE_SHIFT);
2962 + page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
2963 if (!page) {
2964 result = -ENOMEM;
2965 goto out;
2966 @@ -892,6 +921,13 @@
2967
2968 return result;
2969 }
2970 +#else /* CONFIG_XEN */
2971 +static int kimage_load_segment(struct kimage *image,
2972 + struct kexec_segment *segment)
2973 +{
2974 + return kimage_load_normal_segment(image, segment);
2975 +}
2976 +#endif
2977
2978 /*
2979 * Exec Kernel system call: for obvious reasons only root may call it.
2980 @@ -1002,6 +1038,13 @@
2981 if (result)
2982 goto out;
2983 }
2984 +#ifdef CONFIG_XEN
2985 + if (image) {
2986 + result = xen_machine_kexec_load(image);
2987 + if (result)
2988 + goto out;
2989 + }
2990 +#endif
2991 /* Install the new kernel, and Uninstall the old */
2992 image = xchg(dest_image, image);
2993
2994 --- a/kernel/softlockup.c
2995 +++ b/kernel/softlockup.c
2996 @@ -39,6 +39,19 @@
2997 .notifier_call = softlock_panic,
2998 };
2999
3000 +unsigned long softlockup_get_next_event(void)
3001 +{
3002 + int this_cpu = smp_processor_id();
3003 + unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
3004 +
3005 + if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
3006 + did_panic ||
3007 + !per_cpu(watchdog_task, this_cpu))
3008 + return MAX_JIFFY_OFFSET;
3009 +
3010 + return max_t(long, 0, touch_timestamp + HZ - jiffies);
3011 +}
3012 +
3013 /*
3014 * Returns seconds, approximately. We don't need nanosecond
3015 * resolution, and we don't need to waste time with a big divide when
3016 --- a/kernel/sysctl.c
3017 +++ b/kernel/sysctl.c
3018 @@ -742,7 +742,7 @@
3019 .proc_handler = &proc_dointvec,
3020 },
3021 #endif
3022 -#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
3023 +#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
3024 {
3025 .procname = "acpi_video_flags",
3026 .data = &acpi_realmode_flags,
3027 --- a/kernel/timer.c
3028 +++ b/kernel/timer.c
3029 @@ -802,7 +802,7 @@
3030 unsigned long get_next_timer_interrupt(unsigned long now)
3031 {
3032 struct tvec_base *base = __get_cpu_var(tvec_bases);
3033 - unsigned long expires;
3034 + unsigned long expires, sl_next;
3035
3036 spin_lock(&base->lock);
3037 expires = __next_timer_interrupt(base);
3038 @@ -811,7 +811,11 @@
3039 if (time_before_eq(expires, now))
3040 return now;
3041
3042 - return cmp_next_hrtimer_event(now, expires);
3043 + expires = cmp_next_hrtimer_event(now, expires);
3044 + sl_next = softlockup_get_next_event();
3045 +
3046 + return expires <= now || expires - now < sl_next
3047 + ? expires : now + sl_next;
3048 }
3049
3050 #ifdef CONFIG_NO_IDLE_HZ
3051 --- a/mm/memory.c
3052 +++ b/mm/memory.c
3053 @@ -402,6 +402,12 @@
3054 return NULL;
3055 }
3056
3057 +#if defined(CONFIG_XEN) && defined(CONFIG_X86)
3058 + /* XEN: Covers user-space grant mappings (even of local pages). */
3059 + if (unlikely(vma->vm_flags & VM_FOREIGN))
3060 + return NULL;
3061 +#endif
3062 +
3063 #ifdef CONFIG_DEBUG_VM
3064 /*
3065 * Add some anal sanity checks for now. Eventually,
3066 @@ -410,7 +416,8 @@
3067 * and that the resulting page looks ok.
3068 */
3069 if (unlikely(!pfn_valid(pfn))) {
3070 - print_bad_pte(vma, pte, addr);
3071 + if (!(vma->vm_flags & VM_RESERVED))
3072 + print_bad_pte(vma, pte, addr);
3073 return NULL;
3074 }
3075 #endif
3076 @@ -668,8 +675,12 @@
3077 page->index > details->last_index))
3078 continue;
3079 }
3080 - ptent = ptep_get_and_clear_full(mm, addr, pte,
3081 - tlb->fullmm);
3082 + if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
3083 + ptent = vma->vm_ops->zap_pte(vma, addr, pte,
3084 + tlb->fullmm);
3085 + else
3086 + ptent = ptep_get_and_clear_full(mm, addr, pte,
3087 + tlb->fullmm);
3088 tlb_remove_tlb_entry(tlb, pte, addr);
3089 if (unlikely(!page))
3090 continue;
3091 @@ -902,6 +913,7 @@
3092 tlb_finish_mmu(tlb, address, end);
3093 return end;
3094 }
3095 +EXPORT_SYMBOL(zap_page_range);
3096
3097 /*
3098 * Do a quick page-table lookup for a single page.
3099 @@ -1043,6 +1055,26 @@
3100 continue;
3101 }
3102
3103 +#ifdef CONFIG_XEN
3104 + if (vma && (vma->vm_flags & VM_FOREIGN)) {
3105 + struct page **map = vma->vm_private_data;
3106 + int offset = (start - vma->vm_start) >> PAGE_SHIFT;
3107 + if (map[offset] != NULL) {
3108 + if (pages) {
3109 + struct page *page = map[offset];
3110 +
3111 + pages[i] = page;
3112 + get_page(page);
3113 + }
3114 + if (vmas)
3115 + vmas[i] = vma;
3116 + i++;
3117 + start += PAGE_SIZE;
3118 + len--;
3119 + continue;
3120 + }
3121 + }
3122 +#endif
3123 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
3124 || !(vm_flags & vma->vm_flags))
3125 return i ? : -EFAULT;
3126 --- a/mm/mprotect.c
3127 +++ b/mm/mprotect.c
3128 @@ -86,6 +86,8 @@
3129 next = pmd_addr_end(addr, end);
3130 if (pmd_none_or_clear_bad(pmd))
3131 continue;
3132 + if (arch_change_pte_range(mm, pmd, addr, next, newprot))
3133 + continue;
3134 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
3135 } while (pmd++, addr = next, addr != end);
3136 }
3137 --- a/mm/page_alloc.c
3138 +++ b/mm/page_alloc.c
3139 @@ -245,7 +245,11 @@
3140 1 << PG_slab |
3141 1 << PG_swapcache |
3142 1 << PG_writeback |
3143 - 1 << PG_buddy );
3144 + 1 << PG_buddy |
3145 +#ifdef CONFIG_X86_XEN
3146 + 1 << PG_pinned |
3147 +#endif
3148 + 1 << PG_foreign );
3149 set_page_count(page, 0);
3150 reset_page_mapcount(page);
3151 page->mapping = NULL;
3152 @@ -471,7 +475,11 @@
3153 1 << PG_swapcache |
3154 1 << PG_writeback |
3155 1 << PG_reserved |
3156 - 1 << PG_buddy ))))
3157 + 1 << PG_buddy |
3158 +#ifdef CONFIG_X86_XEN
3159 + 1 << PG_pinned |
3160 +#endif
3161 + 1 << PG_foreign ))))
3162 bad_page(page);
3163 if (PageDirty(page))
3164 __ClearPageDirty(page);
3165 @@ -527,6 +535,12 @@
3166 int i;
3167 int reserved = 0;
3168
3169 +#ifdef CONFIG_XEN
3170 + if (PageForeign(page)) {
3171 + PageForeignDestructor(page);
3172 + return;
3173 + }
3174 +#endif
3175 for (i = 0 ; i < (1 << order) ; ++i)
3176 reserved += free_pages_check(page + i);
3177 if (reserved)
3178 @@ -622,7 +636,11 @@
3179 1 << PG_swapcache |
3180 1 << PG_writeback |
3181 1 << PG_reserved |
3182 - 1 << PG_buddy ))))
3183 + 1 << PG_buddy |
3184 +#ifdef CONFIG_X86_XEN
3185 + 1 << PG_pinned |
3186 +#endif
3187 + 1 << PG_foreign ))))
3188 bad_page(page);
3189
3190 /*
3191 @@ -990,6 +1008,12 @@
3192 struct per_cpu_pages *pcp;
3193 unsigned long flags;
3194
3195 +#ifdef CONFIG_XEN
3196 + if (PageForeign(page)) {
3197 + PageForeignDestructor(page);
3198 + return;
3199 + }
3200 +#endif
3201 if (PageAnon(page))
3202 page->mapping = NULL;
3203 if (free_pages_check(page))
3204 --- a/net/core/dev.c
3205 +++ b/net/core/dev.c
3206 @@ -122,6 +122,12 @@
3207
3208 #include "net-sysfs.h"
3209
3210 +#ifdef CONFIG_XEN
3211 +#include <net/ip.h>
3212 +#include <linux/tcp.h>
3213 +#include <linux/udp.h>
3214 +#endif
3215 +
3216 /*
3217 * The list of packet types we will receive (as opposed to discard)
3218 * and the routines to invoke.
3219 @@ -1580,6 +1586,42 @@
3220 return 0;
3221 }
3222
3223 +#ifdef CONFIG_XEN
3224 +inline int skb_checksum_setup(struct sk_buff *skb)
3225 +{
3226 + if (skb->proto_csum_blank) {
3227 + if (skb->protocol != htons(ETH_P_IP))
3228 + goto out;
3229 + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
3230 + if (skb->h.raw >= skb->tail)
3231 + goto out;
3232 + switch (skb->nh.iph->protocol) {
3233 + case IPPROTO_TCP:
3234 + skb->csum = offsetof(struct tcphdr, check);
3235 + break;
3236 + case IPPROTO_UDP:
3237 + skb->csum = offsetof(struct udphdr, check);
3238 + break;
3239 + default:
3240 + if (net_ratelimit())
3241 + printk(KERN_ERR "Attempting to checksum a non-"
3242 + "TCP/UDP packet, dropping a protocol"
3243 + " %d packet", skb->nh.iph->protocol);
3244 + goto out;
3245 + }
3246 + if ((skb->h.raw + skb->csum + 2) > skb->tail)
3247 + goto out;
3248 + skb->ip_summed = CHECKSUM_HW;
3249 + skb->proto_csum_blank = 0;
3250 + }
3251 + return 0;
3252 +out:
3253 + return -EPROTO;
3254 +}
3255 +#else
3256 +inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
3257 +#endif
3258 +
3259 /**
3260 * dev_queue_xmit - transmit a buffer
3261 * @skb: buffer to transmit
3262 @@ -1612,6 +1654,12 @@
3263 struct Qdisc *q;
3264 int rc = -ENOMEM;
3265
3266 + /* If a checksum-deferred packet is forwarded to a device that needs a
3267 + * checksum, correct the pointers and force checksumming.
3268 + */
3269 + if (skb_checksum_setup(skb))
3270 + goto out_kfree_skb;
3271 +
3272 /* GSO will handle the following emulations directly. */
3273 if (netif_needs_gso(dev, skb))
3274 goto gso;
3275 @@ -2062,6 +2110,19 @@
3276 }
3277 #endif
3278
3279 +#ifdef CONFIG_XEN
3280 + switch (skb->ip_summed) {
3281 + case CHECKSUM_UNNECESSARY:
3282 + skb->proto_data_valid = 1;
3283 + break;
3284 + case CHECKSUM_HW:
3285 + /* XXX Implement me. */
3286 + default:
3287 + skb->proto_data_valid = 0;
3288 + break;
3289 + }
3290 +#endif
3291 +
3292 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3293 if (!ptype->dev || ptype->dev == skb->dev) {
3294 if (pt_prev)
3295 @@ -4587,6 +4648,7 @@
3296 EXPORT_SYMBOL(net_enable_timestamp);
3297 EXPORT_SYMBOL(net_disable_timestamp);
3298 EXPORT_SYMBOL(dev_get_flags);
3299 +EXPORT_SYMBOL(skb_checksum_setup);
3300
3301 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3302 EXPORT_SYMBOL(br_handle_frame_hook);
3303 --- a/net/core/skbuff.c
3304 +++ b/net/core/skbuff.c
3305 @@ -454,6 +454,10 @@
3306 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
3307 n->cloned = 1;
3308 n->nohdr = 0;
3309 +#ifdef CONFIG_XEN
3310 + C(proto_data_valid);
3311 + C(proto_csum_blank);
3312 +#endif
3313 n->destructor = NULL;
3314 C(iif);
3315 C(tail);
3316 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
3317 +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
3318 @@ -132,6 +132,9 @@
3319 if (hdrsize < sizeof(*hdr))
3320 return 1;
3321
3322 + if (skb_checksum_setup(skb))
3323 + return 0;
3324 +
3325 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
3326 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
3327 return 1;
3328 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c
3329 +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
3330 @@ -116,6 +116,10 @@
3331 newport = tuple->dst.u.udp.port;
3332 portptr = &hdr->dest;
3333 }
3334 +
3335 + if (skb_checksum_setup(skb))
3336 + return 0;
3337 +
3338 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
3339 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
3340 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
3341 --- a/net/ipv4/xfrm4_output.c
3342 +++ b/net/ipv4/xfrm4_output.c
3343 @@ -81,7 +81,7 @@
3344 #endif
3345
3346 skb->protocol = htons(ETH_P_IP);
3347 - return xfrm_output(skb);
3348 + return skb_checksum_setup(skb) ?: xfrm_output(skb);
3349 }
3350
3351 int xfrm4_output(struct sk_buff *skb)
3352 --- a/scripts/Makefile.build
3353 +++ b/scripts/Makefile.build
3354 @@ -73,6 +73,20 @@
3355 $(warning kbuild: Makefile.build is included improperly)
3356 endif
3357
3358 +ifeq ($(CONFIG_XEN),y)
3359 +$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build
3360 + @echo ' Updating $@'
3361 + $(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\
3362 + ,$(error 'Your awk program does not define gensub. Use gawk or another awk with gensub'))
3363 + @$(AWK) -f $< $(filter-out $<,$^) >$@
3364 +
3365 +xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c))))
3366 +xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o)
3367 +single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m))
3368 +
3369 +-include $(objtree)/scripts/Makefile.xen
3370 +endif
3371 +
3372 # ===========================================================================
3373
3374 ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
3375 --- a/scripts/Makefile.lib
3376 +++ b/scripts/Makefile.lib
3377 @@ -17,6 +17,12 @@
3378
3379 lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m)))
3380
3381 +# Remove objects forcibly disabled
3382 +
3383 +obj-y := $(filter-out $(disabled-obj-y),$(obj-y))
3384 +obj-m := $(filter-out $(disabled-obj-y),$(obj-m))
3385 +lib-y := $(filter-out $(disabled-obj-y),$(lib-y))
3386 +
3387
3388 # Handle objects in subdirs
3389 # ---------------------------------------------------------------------------