Magellan Linux

Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1011-2.6.25-xen-auto-common.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (hide annotations) (download)
Fri May 23 17:35:37 2008 UTC (16 years ago) by niro
File size: 88655 byte(s)
-using opensuse xen patchset, updated kernel configs

1 niro 609 Subject: xen3 common
2     From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 517:d71965a78c20)
3     Patch-mainline: obsolete
4     Acked-by: jbeulich@novell.com
5    
6     List of files that don't require modification anymore (and hence
7     removed from this patch), for reference and in case upstream wants to
8     take the forward porting patches:
9     2.6.25/mm/highmem.c
10    
11     ---
12     drivers/Makefile | 1
13     drivers/acpi/hardware/hwsleep.c | 15
14     drivers/acpi/sleep/main.c | 11
15     drivers/char/agp/intel-agp.c | 10
16     drivers/char/mem.c | 6
17     drivers/char/tpm/Makefile | 2
18     drivers/char/tpm/tpm.h | 15
19     drivers/char/tpm/tpm_vtpm.c | 542 +++++++++++++++++++++++++
20     drivers/char/tpm/tpm_vtpm.h | 55 ++
21     drivers/char/tpm/tpm_xen.c | 722 ++++++++++++++++++++++++++++++++++
22     drivers/ide/ide-lib.c | 8
23     drivers/oprofile/buffer_sync.c | 87 +++-
24     drivers/oprofile/cpu_buffer.c | 51 +-
25     drivers/oprofile/cpu_buffer.h | 9
26     drivers/oprofile/event_buffer.h | 3
27     drivers/oprofile/oprof.c | 30 +
28     drivers/oprofile/oprof.h | 3
29     drivers/oprofile/oprofile_files.c | 201 +++++++++
30     drivers/pci/bus.c | 7
31     drivers/pci/quirks.c | 34 +
32     fs/aio.c | 120 +++++
33     fs/compat_ioctl.c | 19
34     fs/splice.c | 3
35     include/asm-generic/pci.h | 2
36     include/asm-generic/pgtable.h | 4
37     include/linux/aio.h | 5
38     include/linux/interrupt.h | 6
39     include/linux/kexec.h | 17
40     include/linux/mm.h | 7
41     include/linux/oprofile.h | 12
42     include/linux/page-flags.h | 15
43     include/linux/sched.h | 5
44     include/linux/skbuff.h | 8
45     include/linux/vermagic.h | 8
46     kernel/irq/spurious.c | 2
47     kernel/kexec.c | 71 ++-
48     kernel/softlockup.c | 13
49     kernel/sysctl.c | 2
50     kernel/timer.c | 8
51     mm/memory.c | 38 +
52     mm/mprotect.c | 2
53     mm/page_alloc.c | 30 +
54     net/core/dev.c | 62 ++
55     net/core/skbuff.c | 4
56     net/ipv4/netfilter/nf_nat_proto_tcp.c | 3
57     net/ipv4/netfilter/nf_nat_proto_udp.c | 4
58     net/ipv4/xfrm4_output.c | 2
59     scripts/Makefile.build | 14
60     scripts/Makefile.lib | 6
61     49 files changed, 2226 insertions(+), 78 deletions(-)
62    
63     --- a/drivers/Makefile
64     +++ b/drivers/Makefile
65     @@ -34,6 +34,7 @@
66     obj-$(CONFIG_NUBUS) += nubus/
67     obj-$(CONFIG_ATM) += atm/
68     obj-y += macintosh/
69     +obj-$(CONFIG_XEN) += xen/
70     obj-$(CONFIG_IDE) += ide/
71     obj-$(CONFIG_SCSI) += scsi/
72     obj-$(CONFIG_ATA) += ata/
73     --- a/drivers/acpi/hardware/hwsleep.c
74     +++ b/drivers/acpi/hardware/hwsleep.c
75     @@ -252,7 +252,11 @@
76     u32 PM1Bcontrol;
77     struct acpi_bit_register_info *sleep_type_reg_info;
78     struct acpi_bit_register_info *sleep_enable_reg_info;
79     +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
80     u32 in_value;
81     +#else
82     + int err;
83     +#endif
84     struct acpi_object_list arg_list;
85     union acpi_object arg;
86     acpi_status status;
87     @@ -362,6 +366,7 @@
88    
89     ACPI_FLUSH_CPU_CACHE();
90    
91     +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
92     status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
93     PM1Acontrol);
94     if (ACPI_FAILURE(status)) {
95     @@ -408,6 +413,16 @@
96     /* Spin until we wake */
97    
98     } while (!in_value);
99     +#else
100     + /* PV ACPI just need check hypercall return value */
101     + err = acpi_notify_hypervisor_state(sleep_state,
102     + PM1Acontrol, PM1Bcontrol);
103     + if (err) {
104     + ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
105     + "Hypervisor failure [%d]\n", err));
106     + return_ACPI_STATUS(AE_ERROR);
107     + }
108     +#endif
109    
110     return_ACPI_STATUS(AE_OK);
111     }
112     --- a/drivers/acpi/sleep/main.c
113     +++ b/drivers/acpi/sleep/main.c
114     @@ -31,6 +31,7 @@
115     static int acpi_sleep_prepare(u32 acpi_state)
116     {
117     #ifdef CONFIG_ACPI_SLEEP
118     +#ifndef CONFIG_ACPI_PV_SLEEP
119     /* do we have a wakeup address for S2 and S3? */
120     if (acpi_state == ACPI_STATE_S3) {
121     if (!acpi_wakeup_address) {
122     @@ -41,6 +42,7 @@
123     acpi_wakeup_address));
124    
125     }
126     +#endif
127     ACPI_FLUSH_CPU_CACHE();
128     acpi_enable_wakeup_device_prep(acpi_state);
129     #endif
130     @@ -137,7 +139,14 @@
131     break;
132    
133     case ACPI_STATE_S3:
134     +#ifdef CONFIG_ACPI_PV_SLEEP
135     + /* Hyperviosr will save and restore CPU context
136     + * and then we can skip low level housekeeping here.
137     + */
138     + acpi_enter_sleep_state(acpi_state);
139     +#else
140     do_suspend_lowlevel();
141     +#endif
142     break;
143     }
144    
145     @@ -187,7 +196,7 @@
146    
147     acpi_target_sleep_state = ACPI_STATE_S0;
148    
149     -#ifdef CONFIG_X86
150     +#if defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
151     if (init_8259A_after_S1) {
152     printk("Broken toshiba laptop -> kicking interrupts\n");
153     init_8259A(0);
154     --- a/drivers/char/agp/intel-agp.c
155     +++ b/drivers/char/agp/intel-agp.c
156     @@ -230,6 +230,13 @@
157     if (page == NULL)
158     return NULL;
159    
160     +#ifdef CONFIG_XEN
161     + if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
162     + __free_pages(page, 2);
163     + return NULL;
164     + }
165     +#endif
166     +
167     if (set_pages_uc(page, 4) < 0) {
168     set_pages_wb(page, 4);
169     __free_pages(page, 2);
170     @@ -249,6 +256,9 @@
171    
172     page = virt_to_page(addr);
173     set_pages_wb(page, 4);
174     +#ifdef CONFIG_XEN
175     + xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
176     +#endif
177     put_page(page);
178     __free_pages(page, 2);
179     atomic_dec(&agp_bridge->current_memory_agp);
180     --- a/drivers/char/mem.c
181     +++ b/drivers/char/mem.c
182     @@ -108,6 +108,7 @@
183     }
184     #endif
185    
186     +#ifndef ARCH_HAS_DEV_MEM
187     /*
188     * This funcion reads the *physical* memory. The f_pos points directly to the
189     * memory location.
190     @@ -230,6 +231,7 @@
191     *ppos += written;
192     return written;
193     }
194     +#endif
195    
196     #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
197     static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
198     @@ -725,6 +727,7 @@
199     #define open_kmem open_mem
200     #define open_oldmem open_mem
201    
202     +#ifndef ARCH_HAS_DEV_MEM
203     static const struct file_operations mem_fops = {
204     .llseek = memory_lseek,
205     .read = read_mem,
206     @@ -733,6 +736,9 @@
207     .open = open_mem,
208     .get_unmapped_area = get_unmapped_area_mem,
209     };
210     +#else
211     +extern const struct file_operations mem_fops;
212     +#endif
213    
214     static const struct file_operations kmem_fops = {
215     .llseek = memory_lseek,
216     --- a/drivers/char/tpm/Makefile
217     +++ b/drivers/char/tpm/Makefile
218     @@ -9,3 +9,5 @@
219     obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
220     obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
221     obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
222     +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
223     +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
224     --- a/drivers/char/tpm/tpm.h
225     +++ b/drivers/char/tpm/tpm.h
226     @@ -107,6 +107,9 @@
227     struct dentry **bios_dir;
228    
229     struct list_head list;
230     +#ifdef CONFIG_XEN
231     + void *priv;
232     +#endif
233     void (*release) (struct device *);
234     };
235    
236     @@ -124,6 +127,18 @@
237     outb(value & 0xFF, base+1);
238     }
239    
240     +#ifdef CONFIG_XEN
241     +static inline void *chip_get_private(const struct tpm_chip *chip)
242     +{
243     + return chip->priv;
244     +}
245     +
246     +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
247     +{
248     + chip->priv = priv;
249     +}
250     +#endif
251     +
252     extern void tpm_get_timeouts(struct tpm_chip *);
253     extern void tpm_gen_interrupt(struct tpm_chip *);
254     extern void tpm_continue_selftest(struct tpm_chip *);
255     --- /dev/null
256     +++ b/drivers/char/tpm/tpm_vtpm.c
257     @@ -0,0 +1,542 @@
258     +/*
259     + * Copyright (C) 2006 IBM Corporation
260     + *
261     + * Authors:
262     + * Stefan Berger <stefanb@us.ibm.com>
263     + *
264     + * Generic device driver part for device drivers in a virtualized
265     + * environment.
266     + *
267     + * This program is free software; you can redistribute it and/or
268     + * modify it under the terms of the GNU General Public License as
269     + * published by the Free Software Foundation, version 2 of the
270     + * License.
271     + *
272     + */
273     +
274     +#include <asm/uaccess.h>
275     +#include <linux/list.h>
276     +#include <linux/device.h>
277     +#include <linux/interrupt.h>
278     +#include <linux/platform_device.h>
279     +#include "tpm.h"
280     +#include "tpm_vtpm.h"
281     +
282     +/* read status bits */
283     +enum {
284     + STATUS_BUSY = 0x01,
285     + STATUS_DATA_AVAIL = 0x02,
286     + STATUS_READY = 0x04
287     +};
288     +
289     +struct transmission {
290     + struct list_head next;
291     +
292     + unsigned char *request;
293     + size_t request_len;
294     + size_t request_buflen;
295     +
296     + unsigned char *response;
297     + size_t response_len;
298     + size_t response_buflen;
299     +
300     + unsigned int flags;
301     +};
302     +
303     +enum {
304     + TRANSMISSION_FLAG_WAS_QUEUED = 0x1
305     +};
306     +
307     +
308     +enum {
309     + DATAEX_FLAG_QUEUED_ONLY = 0x1
310     +};
311     +
312     +
313     +/* local variables */
314     +
315     +/* local function prototypes */
316     +static int _vtpm_send_queued(struct tpm_chip *chip);
317     +
318     +
319     +/* =============================================================
320     + * Some utility functions
321     + * =============================================================
322     + */
323     +static void vtpm_state_init(struct vtpm_state *vtpms)
324     +{
325     + vtpms->current_request = NULL;
326     + spin_lock_init(&vtpms->req_list_lock);
327     + init_waitqueue_head(&vtpms->req_wait_queue);
328     + INIT_LIST_HEAD(&vtpms->queued_requests);
329     +
330     + vtpms->current_response = NULL;
331     + spin_lock_init(&vtpms->resp_list_lock);
332     + init_waitqueue_head(&vtpms->resp_wait_queue);
333     +
334     + vtpms->disconnect_time = jiffies;
335     +}
336     +
337     +
338     +static inline struct transmission *transmission_alloc(void)
339     +{
340     + return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
341     +}
342     +
343     +static unsigned char *
344     +transmission_set_req_buffer(struct transmission *t,
345     + unsigned char *buffer, size_t len)
346     +{
347     + if (t->request_buflen < len) {
348     + kfree(t->request);
349     + t->request = kmalloc(len, GFP_KERNEL);
350     + if (!t->request) {
351     + t->request_buflen = 0;
352     + return NULL;
353     + }
354     + t->request_buflen = len;
355     + }
356     +
357     + memcpy(t->request, buffer, len);
358     + t->request_len = len;
359     +
360     + return t->request;
361     +}
362     +
363     +static unsigned char *
364     +transmission_set_res_buffer(struct transmission *t,
365     + const unsigned char *buffer, size_t len)
366     +{
367     + if (t->response_buflen < len) {
368     + kfree(t->response);
369     + t->response = kmalloc(len, GFP_ATOMIC);
370     + if (!t->response) {
371     + t->response_buflen = 0;
372     + return NULL;
373     + }
374     + t->response_buflen = len;
375     + }
376     +
377     + memcpy(t->response, buffer, len);
378     + t->response_len = len;
379     +
380     + return t->response;
381     +}
382     +
383     +static inline void transmission_free(struct transmission *t)
384     +{
385     + kfree(t->request);
386     + kfree(t->response);
387     + kfree(t);
388     +}
389     +
390     +/* =============================================================
391     + * Interface with the lower layer driver
392     + * =============================================================
393     + */
394     +/*
395     + * Lower layer uses this function to make a response available.
396     + */
397     +int vtpm_vd_recv(const struct tpm_chip *chip,
398     + const unsigned char *buffer, size_t count,
399     + void *ptr)
400     +{
401     + unsigned long flags;
402     + int ret_size = 0;
403     + struct transmission *t;
404     + struct vtpm_state *vtpms;
405     +
406     + vtpms = (struct vtpm_state *)chip_get_private(chip);
407     +
408     + /*
409     + * The list with requests must contain one request
410     + * only and the element there must be the one that
411     + * was passed to me from the front-end.
412     + */
413     + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
414     + if (vtpms->current_request != ptr) {
415     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
416     + return 0;
417     + }
418     +
419     + if ((t = vtpms->current_request)) {
420     + transmission_free(t);
421     + vtpms->current_request = NULL;
422     + }
423     +
424     + t = transmission_alloc();
425     + if (t) {
426     + if (!transmission_set_res_buffer(t, buffer, count)) {
427     + transmission_free(t);
428     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
429     + return -ENOMEM;
430     + }
431     + ret_size = count;
432     + vtpms->current_response = t;
433     + wake_up_interruptible(&vtpms->resp_wait_queue);
434     + }
435     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
436     +
437     + return ret_size;
438     +}
439     +
440     +
441     +/*
442     + * Lower layer indicates its status (connected/disconnected)
443     + */
444     +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
445     +{
446     + struct vtpm_state *vtpms;
447     +
448     + vtpms = (struct vtpm_state *)chip_get_private(chip);
449     +
450     + vtpms->vd_status = vd_status;
451     + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
452     + vtpms->disconnect_time = jiffies;
453     + }
454     +}
455     +
456     +/* =============================================================
457     + * Interface with the generic TPM driver
458     + * =============================================================
459     + */
460     +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
461     +{
462     + int rc = 0;
463     + unsigned long flags;
464     + struct vtpm_state *vtpms;
465     +
466     + vtpms = (struct vtpm_state *)chip_get_private(chip);
467     +
468     + /*
469     + * Check if the previous operation only queued the command
470     + * In this case there won't be a response, so I just
471     + * return from here and reset that flag. In any other
472     + * case I should receive a response from the back-end.
473     + */
474     + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
475     + if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
476     + vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
477     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
478     + /*
479     + * The first few commands (measurements) must be
480     + * queued since it might not be possible to talk to the
481     + * TPM, yet.
482     + * Return a response of up to 30 '0's.
483     + */
484     +
485     + count = min_t(size_t, count, 30);
486     + memset(buf, 0x0, count);
487     + return count;
488     + }
489     + /*
490     + * Check whether something is in the responselist and if
491     + * there's nothing in the list wait for something to appear.
492     + */
493     +
494     + if (!vtpms->current_response) {
495     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
496     + interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
497     + 1000);
498     + spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
499     + }
500     +
501     + if (vtpms->current_response) {
502     + struct transmission *t = vtpms->current_response;
503     + vtpms->current_response = NULL;
504     + rc = min(count, t->response_len);
505     + memcpy(buf, t->response, rc);
506     + transmission_free(t);
507     + }
508     +
509     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
510     + return rc;
511     +}
512     +
513     +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
514     +{
515     + int rc = 0;
516     + unsigned long flags;
517     + struct transmission *t = transmission_alloc();
518     + struct vtpm_state *vtpms;
519     +
520     + vtpms = (struct vtpm_state *)chip_get_private(chip);
521     +
522     + if (!t)
523     + return -ENOMEM;
524     + /*
525     + * If there's a current request, it must be the
526     + * previous request that has timed out.
527     + */
528     + spin_lock_irqsave(&vtpms->req_list_lock, flags);
529     + if (vtpms->current_request != NULL) {
530     + printk("WARNING: Sending although there is a request outstanding.\n"
531     + " Previous request must have timed out.\n");
532     + transmission_free(vtpms->current_request);
533     + vtpms->current_request = NULL;
534     + }
535     + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
536     +
537     + /*
538     + * Queue the packet if the driver below is not
539     + * ready, yet, or there is any packet already
540     + * in the queue.
541     + * If the driver below is ready, unqueue all
542     + * packets first before sending our current
543     + * packet.
544     + * For each unqueued packet, except for the
545     + * last (=current) packet, call the function
546     + * tpm_xen_recv to wait for the response to come
547     + * back.
548     + */
549     + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
550     + if (time_after(jiffies,
551     + vtpms->disconnect_time + HZ * 10)) {
552     + rc = -ENOENT;
553     + } else {
554     + goto queue_it;
555     + }
556     + } else {
557     + /*
558     + * Send all queued packets.
559     + */
560     + if (_vtpm_send_queued(chip) == 0) {
561     +
562     + vtpms->current_request = t;
563     +
564     + rc = vtpm_vd_send(vtpms->tpm_private,
565     + buf,
566     + count,
567     + t);
568     + /*
569     + * The generic TPM driver will call
570     + * the function to receive the response.
571     + */
572     + if (rc < 0) {
573     + vtpms->current_request = NULL;
574     + goto queue_it;
575     + }
576     + } else {
577     +queue_it:
578     + if (!transmission_set_req_buffer(t, buf, count)) {
579     + transmission_free(t);
580     + rc = -ENOMEM;
581     + goto exit;
582     + }
583     + /*
584     + * An error occurred. Don't event try
585     + * to send the current request. Just
586     + * queue it.
587     + */
588     + spin_lock_irqsave(&vtpms->req_list_lock, flags);
589     + vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
590     + list_add_tail(&t->next, &vtpms->queued_requests);
591     + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
592     + }
593     + }
594     +
595     +exit:
596     + return rc;
597     +}
598     +
599     +
600     +/*
601     + * Send all queued requests.
602     + */
603     +static int _vtpm_send_queued(struct tpm_chip *chip)
604     +{
605     + int rc;
606     + int error = 0;
607     + long flags;
608     + unsigned char buffer[1];
609     + struct vtpm_state *vtpms;
610     + vtpms = (struct vtpm_state *)chip_get_private(chip);
611     +
612     + spin_lock_irqsave(&vtpms->req_list_lock, flags);
613     +
614     + while (!list_empty(&vtpms->queued_requests)) {
615     + /*
616     + * Need to dequeue them.
617     + * Read the result into a dummy buffer.
618     + */
619     + struct transmission *qt = (struct transmission *)
620     + vtpms->queued_requests.next;
621     + list_del(&qt->next);
622     + vtpms->current_request = qt;
623     + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
624     +
625     + rc = vtpm_vd_send(vtpms->tpm_private,
626     + qt->request,
627     + qt->request_len,
628     + qt);
629     +
630     + if (rc < 0) {
631     + spin_lock_irqsave(&vtpms->req_list_lock, flags);
632     + if ((qt = vtpms->current_request) != NULL) {
633     + /*
634     + * requeue it at the beginning
635     + * of the list
636     + */
637     + list_add(&qt->next,
638     + &vtpms->queued_requests);
639     + }
640     + vtpms->current_request = NULL;
641     + error = 1;
642     + break;
643     + }
644     + /*
645     + * After this point qt is not valid anymore!
646     + * It is freed when the front-end is delivering
647     + * the data by calling tpm_recv
648     + */
649     + /*
650     + * Receive response into provided dummy buffer
651     + */
652     + rc = vtpm_recv(chip, buffer, sizeof(buffer));
653     + spin_lock_irqsave(&vtpms->req_list_lock, flags);
654     + }
655     +
656     + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
657     +
658     + return error;
659     +}
660     +
661     +static void vtpm_cancel(struct tpm_chip *chip)
662     +{
663     + unsigned long flags;
664     + struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
665     +
666     + spin_lock_irqsave(&vtpms->resp_list_lock,flags);
667     +
668     + if (!vtpms->current_response && vtpms->current_request) {
669     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
670     + interruptible_sleep_on(&vtpms->resp_wait_queue);
671     + spin_lock_irqsave(&vtpms->resp_list_lock,flags);
672     + }
673     +
674     + if (vtpms->current_response) {
675     + struct transmission *t = vtpms->current_response;
676     + vtpms->current_response = NULL;
677     + transmission_free(t);
678     + }
679     +
680     + spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
681     +}
682     +
683     +static u8 vtpm_status(struct tpm_chip *chip)
684     +{
685     + u8 rc = 0;
686     + unsigned long flags;
687     + struct vtpm_state *vtpms;
688     +
689     + vtpms = (struct vtpm_state *)chip_get_private(chip);
690     +
691     + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
692     + /*
693     + * Data are available if:
694     + * - there's a current response
695     + * - the last packet was queued only (this is fake, but necessary to
696     + * get the generic TPM layer to call the receive function.)
697     + */
698     + if (vtpms->current_response ||
699     + 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
700     + rc = STATUS_DATA_AVAIL;
701     + } else if (!vtpms->current_response && !vtpms->current_request) {
702     + rc = STATUS_READY;
703     + }
704     +
705     + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
706     + return rc;
707     +}
708     +
709     +static struct file_operations vtpm_ops = {
710     + .owner = THIS_MODULE,
711     + .llseek = no_llseek,
712     + .open = tpm_open,
713     + .read = tpm_read,
714     + .write = tpm_write,
715     + .release = tpm_release,
716     +};
717     +
718     +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
719     +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
720     +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
721     +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
722     +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
723     +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
724     + NULL);
725     +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
726     +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
727     +
728     +static struct attribute *vtpm_attrs[] = {
729     + &dev_attr_pubek.attr,
730     + &dev_attr_pcrs.attr,
731     + &dev_attr_enabled.attr,
732     + &dev_attr_active.attr,
733     + &dev_attr_owned.attr,
734     + &dev_attr_temp_deactivated.attr,
735     + &dev_attr_caps.attr,
736     + &dev_attr_cancel.attr,
737     + NULL,
738     +};
739     +
740     +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
741     +
742     +#define TPM_LONG_TIMEOUT (10 * 60 * HZ)
743     +
744     +static struct tpm_vendor_specific tpm_vtpm = {
745     + .recv = vtpm_recv,
746     + .send = vtpm_send,
747     + .cancel = vtpm_cancel,
748     + .status = vtpm_status,
749     + .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
750     + .req_complete_val = STATUS_DATA_AVAIL,
751     + .req_canceled = STATUS_READY,
752     + .attr_group = &vtpm_attr_grp,
753     + .miscdev = {
754     + .fops = &vtpm_ops,
755     + },
756     + .duration = {
757     + TPM_LONG_TIMEOUT,
758     + TPM_LONG_TIMEOUT,
759     + TPM_LONG_TIMEOUT,
760     + },
761     +};
762     +
763     +struct tpm_chip *init_vtpm(struct device *dev,
764     + struct tpm_private *tp)
765     +{
766     + long rc;
767     + struct tpm_chip *chip;
768     + struct vtpm_state *vtpms;
769     +
770     + vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
771     + if (!vtpms)
772     + return ERR_PTR(-ENOMEM);
773     +
774     + vtpm_state_init(vtpms);
775     + vtpms->tpm_private = tp;
776     +
777     + chip = tpm_register_hardware(dev, &tpm_vtpm);
778     + if (!chip) {
779     + rc = -ENODEV;
780     + goto err_free_mem;
781     + }
782     +
783     + chip_set_private(chip, vtpms);
784     +
785     + return chip;
786     +
787     +err_free_mem:
788     + kfree(vtpms);
789     +
790     + return ERR_PTR(rc);
791     +}
792     +
793     +void cleanup_vtpm(struct device *dev)
794     +{
795     + struct tpm_chip *chip = dev_get_drvdata(dev);
796     + struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
797     + tpm_remove_hardware(dev);
798     + kfree(vtpms);
799     +}
800     --- /dev/null
801     +++ b/drivers/char/tpm/tpm_vtpm.h
802     @@ -0,0 +1,55 @@
803     +#ifndef TPM_VTPM_H
804     +#define TPM_VTPM_H
805     +
806     +struct tpm_chip;
807     +struct tpm_private;
808     +
809     +struct vtpm_state {
810     + struct transmission *current_request;
811     + spinlock_t req_list_lock;
812     + wait_queue_head_t req_wait_queue;
813     +
814     + struct list_head queued_requests;
815     +
816     + struct transmission *current_response;
817     + spinlock_t resp_list_lock;
818     + wait_queue_head_t resp_wait_queue; // processes waiting for responses
819     +
820     + u8 vd_status;
821     + u8 flags;
822     +
823     + unsigned long disconnect_time;
824     +
825     + /*
826     + * The following is a private structure of the underlying
827     + * driver. It is passed as parameter in the send function.
828     + */
829     + struct tpm_private *tpm_private;
830     +};
831     +
832     +
833     +enum vdev_status {
834     + TPM_VD_STATUS_DISCONNECTED = 0x0,
835     + TPM_VD_STATUS_CONNECTED = 0x1
836     +};
837     +
838     +/* this function is called from tpm_vtpm.c */
839     +int vtpm_vd_send(struct tpm_private * tp,
840     + const u8 * buf, size_t count, void *ptr);
841     +
842     +/* these functions are offered by tpm_vtpm.c */
843     +struct tpm_chip *init_vtpm(struct device *,
844     + struct tpm_private *);
845     +void cleanup_vtpm(struct device *);
846     +int vtpm_vd_recv(const struct tpm_chip* chip,
847     + const unsigned char *buffer, size_t count, void *ptr);
848     +void vtpm_vd_status(const struct tpm_chip *, u8 status);
849     +
850     +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
851     +{
852     + struct tpm_chip *chip = dev_get_drvdata(dev);
853     + struct vtpm_state *vtpms = chip_get_private(chip);
854     + return vtpms->tpm_private;
855     +}
856     +
857     +#endif
858     --- /dev/null
859     +++ b/drivers/char/tpm/tpm_xen.c
860     @@ -0,0 +1,722 @@
861     +/*
862     + * Copyright (c) 2005, IBM Corporation
863     + *
864     + * Author: Stefan Berger, stefanb@us.ibm.com
865     + * Grant table support: Mahadevan Gomathisankaran
866     + *
867     + * This code has been derived from drivers/xen/netfront/netfront.c
868     + *
869     + * Copyright (c) 2002-2004, K A Fraser
870     + *
871     + * This program is free software; you can redistribute it and/or
872     + * modify it under the terms of the GNU General Public License version 2
873     + * as published by the Free Software Foundation; or, when distributed
874     + * separately from the Linux kernel or incorporated into other
875     + * software packages, subject to the following license:
876     + *
877     + * Permission is hereby granted, free of charge, to any person obtaining a copy
878     + * of this source file (the "Software"), to deal in the Software without
879     + * restriction, including without limitation the rights to use, copy, modify,
880     + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
881     + * and to permit persons to whom the Software is furnished to do so, subject to
882     + * the following conditions:
883     + *
884     + * The above copyright notice and this permission notice shall be included in
885     + * all copies or substantial portions of the Software.
886     + *
887     + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
888     + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
889     + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
890     + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
891     + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
892     + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
893     + * IN THE SOFTWARE.
894     + */
895     +
896     +#include <linux/errno.h>
897     +#include <linux/err.h>
898     +#include <linux/interrupt.h>
899     +#include <linux/mutex.h>
900     +#include <asm/uaccess.h>
901     +#include <xen/evtchn.h>
902     +#include <xen/interface/grant_table.h>
903     +#include <xen/interface/io/tpmif.h>
904     +#include <xen/gnttab.h>
905     +#include <xen/xenbus.h>
906     +#include "tpm.h"
907     +#include "tpm_vtpm.h"
908     +
909     +#undef DEBUG
910     +
911     +/* local structures */
912     +struct tpm_private {
913     + struct tpm_chip *chip;
914     +
915     + tpmif_tx_interface_t *tx;
916     + atomic_t refcnt;
917     + unsigned int irq;
918     + u8 is_connected;
919     + u8 is_suspended;
920     +
921     + spinlock_t tx_lock;
922     +
923     + struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
924     +
925     + atomic_t tx_busy;
926     + void *tx_remember;
927     +
928     + domid_t backend_id;
929     + wait_queue_head_t wait_q;
930     +
931     + struct xenbus_device *dev;
932     + int ring_ref;
933     +};
934     +
935     +struct tx_buffer {
936     + unsigned int size; // available space in data
937     + unsigned int len; // used space in data
938     + unsigned char *data; // pointer to a page
939     +};
940     +
941     +
942     +/* locally visible variables */
943     +static grant_ref_t gref_head;
944     +static struct tpm_private *my_priv;
945     +
946     +/* local function prototypes */
947     +static irqreturn_t tpmif_int(int irq,
948     + void *tpm_priv,
949     + struct pt_regs *ptregs);
950     +static void tpmif_rx_action(unsigned long unused);
951     +static int tpmif_connect(struct xenbus_device *dev,
952     + struct tpm_private *tp,
953     + domid_t domid);
954     +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
955     +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
956     +static void tpmif_free_tx_buffers(struct tpm_private *tp);
957     +static void tpmif_set_connected_state(struct tpm_private *tp,
958     + u8 newstate);
959     +static int tpm_xmit(struct tpm_private *tp,
960     + const u8 * buf, size_t count, int userbuffer,
961     + void *remember);
962     +static void destroy_tpmring(struct tpm_private *tp);
963     +void __exit tpmif_exit(void);
964     +
965     +#define DPRINTK(fmt, args...) \
966     + pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
967     +#define IPRINTK(fmt, args...) \
968     + printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
969     +#define WPRINTK(fmt, args...) \
970     + printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
971     +
972     +#define GRANT_INVALID_REF 0
973     +
974     +
975     +static inline int
976     +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
977     + int isuserbuffer)
978     +{
979     + int copied = len;
980     +
981     + if (len > txb->size)
982     + copied = txb->size;
983     + if (isuserbuffer) {
984     + if (copy_from_user(txb->data, src, copied))
985     + return -EFAULT;
986     + } else {
987     + memcpy(txb->data, src, copied);
988     + }
989     + txb->len = len;
990     + return copied;
991     +}
992     +
993     +static inline struct tx_buffer *tx_buffer_alloc(void)
994     +{
995     + struct tx_buffer *txb;
996     +
997     + txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
998     + if (!txb)
999     + return NULL;
1000     +
1001     + txb->len = 0;
1002     + txb->size = PAGE_SIZE;
1003     + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1004     + if (txb->data == NULL) {
1005     + kfree(txb);
1006     + txb = NULL;
1007     + }
1008     +
1009     + return txb;
1010     +}
1011     +
1012     +
1013     +static inline void tx_buffer_free(struct tx_buffer *txb)
1014     +{
1015     + if (txb) {
1016     + free_page((long)txb->data);
1017     + kfree(txb);
1018     + }
1019     +}
1020     +
1021     +/**************************************************************
1022     + Utility function for the tpm_private structure
1023     +**************************************************************/
1024     +static void tpm_private_init(struct tpm_private *tp)
1025     +{
1026     + spin_lock_init(&tp->tx_lock);
1027     + init_waitqueue_head(&tp->wait_q);
1028     + atomic_set(&tp->refcnt, 1);
1029     +}
1030     +
1031     +static void tpm_private_put(void)
1032     +{
1033     + if (!atomic_dec_and_test(&my_priv->refcnt))
1034     + return;
1035     +
1036     + tpmif_free_tx_buffers(my_priv);
1037     + kfree(my_priv);
1038     + my_priv = NULL;
1039     +}
1040     +
1041     +static struct tpm_private *tpm_private_get(void)
1042     +{
1043     + int err;
1044     +
1045     + if (my_priv) {
1046     + atomic_inc(&my_priv->refcnt);
1047     + return my_priv;
1048     + }
1049     +
1050     + my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1051     + if (!my_priv)
1052     + return NULL;
1053     +
1054     + tpm_private_init(my_priv);
1055     + err = tpmif_allocate_tx_buffers(my_priv);
1056     + if (err < 0)
1057     + tpm_private_put();
1058     +
1059     + return my_priv;
1060     +}
1061     +
1062     +/**************************************************************
1063     +
1064     + The interface to let the tpm plugin register its callback
1065     + function and send data to another partition using this module
1066     +
1067     +**************************************************************/
1068     +
1069     +static DEFINE_MUTEX(suspend_lock);
1070     +/*
1071     + * Send data via this module by calling this function
1072     + */
1073     +int vtpm_vd_send(struct tpm_private *tp,
1074     + const u8 * buf, size_t count, void *ptr)
1075     +{
1076     + int sent;
1077     +
1078     + mutex_lock(&suspend_lock);
1079     + sent = tpm_xmit(tp, buf, count, 0, ptr);
1080     + mutex_unlock(&suspend_lock);
1081     +
1082     + return sent;
1083     +}
1084     +
1085     +/**************************************************************
1086     + XENBUS support code
1087     +**************************************************************/
1088     +
1089     +static int setup_tpmring(struct xenbus_device *dev,
1090     + struct tpm_private *tp)
1091     +{
1092     + tpmif_tx_interface_t *sring;
1093     + int err;
1094     +
1095     + tp->ring_ref = GRANT_INVALID_REF;
1096     +
1097     + sring = (void *)__get_free_page(GFP_KERNEL);
1098     + if (!sring) {
1099     + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1100     + return -ENOMEM;
1101     + }
1102     + tp->tx = sring;
1103     +
1104     + err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1105     + if (err < 0) {
1106     + free_page((unsigned long)sring);
1107     + tp->tx = NULL;
1108     + xenbus_dev_fatal(dev, err, "allocating grant reference");
1109     + goto fail;
1110     + }
1111     + tp->ring_ref = err;
1112     +
1113     + err = tpmif_connect(dev, tp, dev->otherend_id);
1114     + if (err)
1115     + goto fail;
1116     +
1117     + return 0;
1118     +fail:
1119     + destroy_tpmring(tp);
1120     + return err;
1121     +}
1122     +
1123     +
1124     +static void destroy_tpmring(struct tpm_private *tp)
1125     +{
1126     + tpmif_set_connected_state(tp, 0);
1127     +
1128     + if (tp->ring_ref != GRANT_INVALID_REF) {
1129     + gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1130     + tp->ring_ref = GRANT_INVALID_REF;
1131     + tp->tx = NULL;
1132     + }
1133     +
1134     + if (tp->irq)
1135     + unbind_from_irqhandler(tp->irq, tp);
1136     +
1137     + tp->irq = 0;
1138     +}
1139     +
1140     +
1141     +static int talk_to_backend(struct xenbus_device *dev,
1142     + struct tpm_private *tp)
1143     +{
1144     + const char *message = NULL;
1145     + int err;
1146     + struct xenbus_transaction xbt;
1147     +
1148     + err = setup_tpmring(dev, tp);
1149     + if (err) {
1150     + xenbus_dev_fatal(dev, err, "setting up ring");
1151     + goto out;
1152     + }
1153     +
1154     +again:
1155     + err = xenbus_transaction_start(&xbt);
1156     + if (err) {
1157     + xenbus_dev_fatal(dev, err, "starting transaction");
1158     + goto destroy_tpmring;
1159     + }
1160     +
1161     + err = xenbus_printf(xbt, dev->nodename,
1162     + "ring-ref","%u", tp->ring_ref);
1163     + if (err) {
1164     + message = "writing ring-ref";
1165     + goto abort_transaction;
1166     + }
1167     +
1168     + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1169     + irq_to_evtchn_port(tp->irq));
1170     + if (err) {
1171     + message = "writing event-channel";
1172     + goto abort_transaction;
1173     + }
1174     +
1175     + err = xenbus_transaction_end(xbt, 0);
1176     + if (err == -EAGAIN)
1177     + goto again;
1178     + if (err) {
1179     + xenbus_dev_fatal(dev, err, "completing transaction");
1180     + goto destroy_tpmring;
1181     + }
1182     +
1183     + xenbus_switch_state(dev, XenbusStateConnected);
1184     +
1185     + return 0;
1186     +
1187     +abort_transaction:
1188     + xenbus_transaction_end(xbt, 1);
1189     + if (message)
1190     + xenbus_dev_error(dev, err, "%s", message);
1191     +destroy_tpmring:
1192     + destroy_tpmring(tp);
1193     +out:
1194     + return err;
1195     +}
1196     +
1197     +/**
1198     + * Callback received when the backend's state changes.
1199     + */
1200     +static void backend_changed(struct xenbus_device *dev,
1201     + enum xenbus_state backend_state)
1202     +{
1203     + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1204     + DPRINTK("\n");
1205     +
1206     + switch (backend_state) {
1207     + case XenbusStateInitialising:
1208     + case XenbusStateInitWait:
1209     + case XenbusStateInitialised:
1210     + case XenbusStateReconfiguring:
1211     + case XenbusStateReconfigured:
1212     + case XenbusStateUnknown:
1213     + break;
1214     +
1215     + case XenbusStateConnected:
1216     + tpmif_set_connected_state(tp, 1);
1217     + break;
1218     +
1219     + case XenbusStateClosing:
1220     + tpmif_set_connected_state(tp, 0);
1221     + xenbus_frontend_closed(dev);
1222     + break;
1223     +
1224     + case XenbusStateClosed:
1225     + tpmif_set_connected_state(tp, 0);
1226     + if (tp->is_suspended == 0)
1227     + device_unregister(&dev->dev);
1228     + xenbus_frontend_closed(dev);
1229     + break;
1230     + }
1231     +}
1232     +
1233     +static int tpmfront_probe(struct xenbus_device *dev,
1234     + const struct xenbus_device_id *id)
1235     +{
1236     + int err;
1237     + int handle;
1238     + struct tpm_private *tp = tpm_private_get();
1239     +
1240     + if (!tp)
1241     + return -ENOMEM;
1242     +
1243     + tp->chip = init_vtpm(&dev->dev, tp);
1244     + if (IS_ERR(tp->chip))
1245     + return PTR_ERR(tp->chip);
1246     +
1247     + err = xenbus_scanf(XBT_NIL, dev->nodename,
1248     + "handle", "%i", &handle);
1249     + if (XENBUS_EXIST_ERR(err))
1250     + return err;
1251     +
1252     + if (err < 0) {
1253     + xenbus_dev_fatal(dev,err,"reading virtual-device");
1254     + return err;
1255     + }
1256     +
1257     + tp->dev = dev;
1258     +
1259     + err = talk_to_backend(dev, tp);
1260     + if (err) {
1261     + tpm_private_put();
1262     + return err;
1263     + }
1264     +
1265     + return 0;
1266     +}
1267     +
1268     +
1269     +static int tpmfront_remove(struct xenbus_device *dev)
1270     +{
1271     + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1272     + destroy_tpmring(tp);
1273     + cleanup_vtpm(&dev->dev);
1274     + return 0;
1275     +}
1276     +
1277     +static int tpmfront_suspend(struct xenbus_device *dev)
1278     +{
1279     + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1280     + u32 ctr;
1281     +
1282     + /* Take the lock, preventing any application from sending. */
1283     + mutex_lock(&suspend_lock);
1284     + tp->is_suspended = 1;
1285     +
1286     + for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1287     + if ((ctr % 10) == 0)
1288     + printk("TPM-FE [INFO]: Waiting for outstanding "
1289     + "request.\n");
1290     + /* Wait for a request to be responded to. */
1291     + interruptible_sleep_on_timeout(&tp->wait_q, 100);
1292     + }
1293     +
1294     + return 0;
1295     +}
1296     +
1297     +static int tpmfront_suspend_finish(struct tpm_private *tp)
1298     +{
1299     + tp->is_suspended = 0;
1300     + /* Allow applications to send again. */
1301     + mutex_unlock(&suspend_lock);
1302     + return 0;
1303     +}
1304     +
1305     +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1306     +{
1307     + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1308     + return tpmfront_suspend_finish(tp);
1309     +}
1310     +
1311     +static int tpmfront_resume(struct xenbus_device *dev)
1312     +{
1313     + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1314     + destroy_tpmring(tp);
1315     + return talk_to_backend(dev, tp);
1316     +}
1317     +
1318     +static int tpmif_connect(struct xenbus_device *dev,
1319     + struct tpm_private *tp,
1320     + domid_t domid)
1321     +{
1322     + int err;
1323     +
1324     + tp->backend_id = domid;
1325     +
1326     + err = bind_listening_port_to_irqhandler(
1327     + domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1328     + if (err <= 0) {
1329     + WPRINTK("bind_listening_port_to_irqhandler failed "
1330     + "(err=%d)\n", err);
1331     + return err;
1332     + }
1333     + tp->irq = err;
1334     +
1335     + return 0;
1336     +}
1337     +
1338     +static struct xenbus_device_id tpmfront_ids[] = {
1339     + { "vtpm" },
1340     + { "" }
1341     +};
1342     +
1343     +static struct xenbus_driver tpmfront = {
1344     + .name = "vtpm",
1345     + .owner = THIS_MODULE,
1346     + .ids = tpmfront_ids,
1347     + .probe = tpmfront_probe,
1348     + .remove = tpmfront_remove,
1349     + .resume = tpmfront_resume,
1350     + .otherend_changed = backend_changed,
1351     + .suspend = tpmfront_suspend,
1352     + .suspend_cancel = tpmfront_suspend_cancel,
1353     +};
1354     +
1355     +static void __init init_tpm_xenbus(void)
1356     +{
1357     + xenbus_register_frontend(&tpmfront);
1358     +}
1359     +
1360     +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1361     +{
1362     + unsigned int i;
1363     +
1364     + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1365     + tp->tx_buffers[i] = tx_buffer_alloc();
1366     + if (!tp->tx_buffers[i]) {
1367     + tpmif_free_tx_buffers(tp);
1368     + return -ENOMEM;
1369     + }
1370     + }
1371     + return 0;
1372     +}
1373     +
1374     +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1375     +{
1376     + unsigned int i;
1377     +
1378     + for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1379     + tx_buffer_free(tp->tx_buffers[i]);
1380     +}
1381     +
1382     +static void tpmif_rx_action(unsigned long priv)
1383     +{
1384     + struct tpm_private *tp = (struct tpm_private *)priv;
1385     + int i = 0;
1386     + unsigned int received;
1387     + unsigned int offset = 0;
1388     + u8 *buffer;
1389     + tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1390     +
1391     + atomic_set(&tp->tx_busy, 0);
1392     + wake_up_interruptible(&tp->wait_q);
1393     +
1394     + received = tx->size;
1395     +
1396     + buffer = kmalloc(received, GFP_ATOMIC);
1397     + if (!buffer)
1398     + return;
1399     +
1400     + for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1401     + struct tx_buffer *txb = tp->tx_buffers[i];
1402     + tpmif_tx_request_t *tx;
1403     + unsigned int tocopy;
1404     +
1405     + tx = &tp->tx->ring[i].req;
1406     + tocopy = tx->size;
1407     + if (tocopy > PAGE_SIZE)
1408     + tocopy = PAGE_SIZE;
1409     +
1410     + memcpy(&buffer[offset], txb->data, tocopy);
1411     +
1412     + gnttab_release_grant_reference(&gref_head, tx->ref);
1413     +
1414     + offset += tocopy;
1415     + }
1416     +
1417     + vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
1418     + kfree(buffer);
1419     +}
1420     +
1421     +
1422     +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
1423     +{
1424     + struct tpm_private *tp = tpm_priv;
1425     + unsigned long flags;
1426     +
1427     + spin_lock_irqsave(&tp->tx_lock, flags);
1428     + tpmif_rx_tasklet.data = (unsigned long)tp;
1429     + tasklet_schedule(&tpmif_rx_tasklet);
1430     + spin_unlock_irqrestore(&tp->tx_lock, flags);
1431     +
1432     + return IRQ_HANDLED;
1433     +}
1434     +
1435     +
1436     +static int tpm_xmit(struct tpm_private *tp,
1437     + const u8 * buf, size_t count, int isuserbuffer,
1438     + void *remember)
1439     +{
1440     + tpmif_tx_request_t *tx;
1441     + TPMIF_RING_IDX i;
1442     + unsigned int offset = 0;
1443     +
1444     + spin_lock_irq(&tp->tx_lock);
1445     +
1446     + if (unlikely(atomic_read(&tp->tx_busy))) {
1447     + printk("tpm_xmit: There's an outstanding request/response "
1448     + "on the way!\n");
1449     + spin_unlock_irq(&tp->tx_lock);
1450     + return -EBUSY;
1451     + }
1452     +
1453     + if (tp->is_connected != 1) {
1454     + spin_unlock_irq(&tp->tx_lock);
1455     + return -EIO;
1456     + }
1457     +
1458     + for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
1459     + struct tx_buffer *txb = tp->tx_buffers[i];
1460     + int copied;
1461     +
1462     + if (!txb) {
1463     + DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
1464     + "Not transmitting anything!\n", i);
1465     + spin_unlock_irq(&tp->tx_lock);
1466     + return -EFAULT;
1467     + }
1468     +
1469     + copied = tx_buffer_copy(txb, &buf[offset], count,
1470     + isuserbuffer);
1471     + if (copied < 0) {
1472     + /* An error occurred */
1473     + spin_unlock_irq(&tp->tx_lock);
1474     + return copied;
1475     + }
1476     + count -= copied;
1477     + offset += copied;
1478     +
1479     + tx = &tp->tx->ring[i].req;
1480     + tx->addr = virt_to_machine(txb->data);
1481     + tx->size = txb->len;
1482     + tx->unused = 0;
1483     +
1484     + DPRINTK("First 4 characters sent by TPM-FE are "
1485     + "0x%02x 0x%02x 0x%02x 0x%02x\n",
1486     + txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
1487     +
1488     + /* Get the granttable reference for this page. */
1489     + tx->ref = gnttab_claim_grant_reference(&gref_head);
1490     + if (tx->ref == -ENOSPC) {
1491     + spin_unlock_irq(&tp->tx_lock);
1492     + DPRINTK("Grant table claim reference failed in "
1493     + "func:%s line:%d file:%s\n",
1494     + __FUNCTION__, __LINE__, __FILE__);
1495     + return -ENOSPC;
1496     + }
1497     + gnttab_grant_foreign_access_ref(tx->ref,
1498     + tp->backend_id,
1499     + virt_to_mfn(txb->data),
1500     + 0 /*RW*/);
1501     + wmb();
1502     + }
1503     +
1504     + atomic_set(&tp->tx_busy, 1);
1505     + tp->tx_remember = remember;
1506     +
1507     + mb();
1508     +
1509     + notify_remote_via_irq(tp->irq);
1510     +
1511     + spin_unlock_irq(&tp->tx_lock);
1512     + return offset;
1513     +}
1514     +
1515     +
1516     +static void tpmif_notify_upperlayer(struct tpm_private *tp)
1517     +{
1518     + /* Notify upper layer about the state of the connection to the BE. */
1519     + vtpm_vd_status(tp->chip, (tp->is_connected
1520     + ? TPM_VD_STATUS_CONNECTED
1521     + : TPM_VD_STATUS_DISCONNECTED));
1522     +}
1523     +
1524     +
1525     +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
1526     +{
1527     + /*
1528     + * Don't notify upper layer if we are in suspend mode and
1529     + * should disconnect - assumption is that we will resume
1530     + * The mutex keeps apps from sending.
1531     + */
1532     + if (is_connected == 0 && tp->is_suspended == 1)
1533     + return;
1534     +
1535     + /*
1536     + * Unlock the mutex if we are connected again
1537     + * after being suspended - now resuming.
1538     + * This also removes the suspend state.
1539     + */
1540     + if (is_connected == 1 && tp->is_suspended == 1)
1541     + tpmfront_suspend_finish(tp);
1542     +
1543     + if (is_connected != tp->is_connected) {
1544     + tp->is_connected = is_connected;
1545     + tpmif_notify_upperlayer(tp);
1546     + }
1547     +}
1548     +
1549     +
1550     +
1551     +/* =================================================================
1552     + * Initialization function.
1553     + * =================================================================
1554     + */
1555     +
1556     +
1557     +static int __init tpmif_init(void)
1558     +{
1559     + struct tpm_private *tp;
1560     +
1561     + if (is_initial_xendomain())
1562     + return -EPERM;
1563     +
1564     + tp = tpm_private_get();
1565     + if (!tp)
1566     + return -ENOMEM;
1567     +
1568     + IPRINTK("Initialising the vTPM driver.\n");
1569     + if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
1570     + &gref_head) < 0) {
1571     + tpm_private_put();
1572     + return -EFAULT;
1573     + }
1574     +
1575     + init_tpm_xenbus();
1576     + return 0;
1577     +}
1578     +
1579     +
1580     +module_init(tpmif_init);
1581     +
1582     +MODULE_LICENSE("Dual BSD/GPL");
1583     --- a/drivers/ide/ide-lib.c
1584     +++ b/drivers/ide/ide-lib.c
1585     @@ -336,12 +336,12 @@
1586     {
1587     u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
1588    
1589     - if (!PCI_DMA_BUS_IS_PHYS) {
1590     - addr = BLK_BOUNCE_ANY;
1591     - } else if (on && drive->media == ide_disk) {
1592     + if (on && drive->media == ide_disk) {
1593     struct device *dev = drive->hwif->dev;
1594    
1595     - if (dev && dev->dma_mask)
1596     + if (!PCI_DMA_BUS_IS_PHYS)
1597     + addr = BLK_BOUNCE_ANY;
1598     + else if (dev && dev->dma_mask)
1599     addr = *dev->dma_mask;
1600     }
1601    
1602     --- a/drivers/oprofile/buffer_sync.c
1603     +++ b/drivers/oprofile/buffer_sync.c
1604     @@ -6,6 +6,10 @@
1605     *
1606     * @author John Levon <levon@movementarian.org>
1607     *
1608     + * Modified by Aravind Menon for Xen
1609     + * These modifications are:
1610     + * Copyright (C) 2005 Hewlett-Packard Co.
1611     + *
1612     * This is the core of the buffer management. Each
1613     * CPU buffer is processed and entered into the
1614     * global event buffer. Such processing is necessary
1615     @@ -40,6 +44,7 @@
1616     static DEFINE_SPINLOCK(task_mortuary);
1617     static void process_task_mortuary(void);
1618    
1619     +static int cpu_current_domain[NR_CPUS];
1620    
1621     /* Take ownership of the task struct and place it on the
1622     * list for processing. Only after two full buffer syncs
1623     @@ -148,6 +153,11 @@
1624     int sync_start(void)
1625     {
1626     int err;
1627     + int i;
1628     +
1629     + for (i = 0; i < NR_CPUS; i++) {
1630     + cpu_current_domain[i] = COORDINATOR_DOMAIN;
1631     + }
1632    
1633     start_cpu_work();
1634    
1635     @@ -274,15 +284,31 @@
1636     last_cookie = INVALID_COOKIE;
1637     }
1638    
1639     -static void add_kernel_ctx_switch(unsigned int in_kernel)
1640     +static void add_cpu_mode_switch(unsigned int cpu_mode)
1641     {
1642     add_event_entry(ESCAPE_CODE);
1643     - if (in_kernel)
1644     - add_event_entry(KERNEL_ENTER_SWITCH_CODE);
1645     - else
1646     - add_event_entry(KERNEL_EXIT_SWITCH_CODE);
1647     + switch (cpu_mode) {
1648     + case CPU_MODE_USER:
1649     + add_event_entry(USER_ENTER_SWITCH_CODE);
1650     + break;
1651     + case CPU_MODE_KERNEL:
1652     + add_event_entry(KERNEL_ENTER_SWITCH_CODE);
1653     + break;
1654     + case CPU_MODE_XEN:
1655     + add_event_entry(XEN_ENTER_SWITCH_CODE);
1656     + break;
1657     + default:
1658     + break;
1659     + }
1660     }
1661     -
1662     +
1663     +static void add_domain_switch(unsigned long domain_id)
1664     +{
1665     + add_event_entry(ESCAPE_CODE);
1666     + add_event_entry(DOMAIN_SWITCH_CODE);
1667     + add_event_entry(domain_id);
1668     +}
1669     +
1670     static void
1671     add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
1672     {
1673     @@ -347,9 +373,9 @@
1674     * for later lookup from userspace.
1675     */
1676     static int
1677     -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
1678     +add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
1679     {
1680     - if (in_kernel) {
1681     + if (cpu_mode >= CPU_MODE_KERNEL) {
1682     add_sample_entry(s->eip, s->event);
1683     return 1;
1684     } else if (mm) {
1685     @@ -495,15 +521,21 @@
1686     struct mm_struct *mm = NULL;
1687     struct task_struct * new;
1688     unsigned long cookie = 0;
1689     - int in_kernel = 1;
1690     + int cpu_mode = 1;
1691     unsigned int i;
1692     sync_buffer_state state = sb_buffer_start;
1693     unsigned long available;
1694     + int domain_switch = 0;
1695    
1696     mutex_lock(&buffer_mutex);
1697    
1698     add_cpu_switch(cpu);
1699    
1700     + /* We need to assign the first samples in this CPU buffer to the
1701     + same domain that we were processing at the last sync_buffer */
1702     + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
1703     + add_domain_switch(cpu_current_domain[cpu]);
1704     + }
1705     /* Remember, only we can modify tail_pos */
1706    
1707     available = get_slots(cpu_buf);
1708     @@ -511,16 +543,18 @@
1709     for (i = 0; i < available; ++i) {
1710     struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
1711    
1712     - if (is_code(s->eip)) {
1713     - if (s->event <= CPU_IS_KERNEL) {
1714     - /* kernel/userspace switch */
1715     - in_kernel = s->event;
1716     + if (is_code(s->eip) && !domain_switch) {
1717     + if (s->event <= CPU_MODE_XEN) {
1718     + /* xen/kernel/userspace switch */
1719     + cpu_mode = s->event;
1720     if (state == sb_buffer_start)
1721     state = sb_sample_start;
1722     - add_kernel_ctx_switch(s->event);
1723     + add_cpu_mode_switch(s->event);
1724     } else if (s->event == CPU_TRACE_BEGIN) {
1725     state = sb_bt_start;
1726     add_trace_begin();
1727     + } else if (s->event == CPU_DOMAIN_SWITCH) {
1728     + domain_switch = 1;
1729     } else {
1730     struct mm_struct * oldmm = mm;
1731    
1732     @@ -534,11 +568,21 @@
1733     add_user_ctx_switch(new, cookie);
1734     }
1735     } else {
1736     - if (state >= sb_bt_start &&
1737     - !add_sample(mm, s, in_kernel)) {
1738     - if (state == sb_bt_start) {
1739     - state = sb_bt_ignore;
1740     - atomic_inc(&oprofile_stats.bt_lost_no_mapping);
1741     + if (domain_switch) {
1742     + cpu_current_domain[cpu] = s->eip;
1743     + add_domain_switch(s->eip);
1744     + domain_switch = 0;
1745     + } else {
1746     + if (cpu_current_domain[cpu] !=
1747     + COORDINATOR_DOMAIN) {
1748     + add_sample_entry(s->eip, s->event);
1749     + }
1750     + else if (state >= sb_bt_start &&
1751     + !add_sample(mm, s, cpu_mode)) {
1752     + if (state == sb_bt_start) {
1753     + state = sb_bt_ignore;
1754     + atomic_inc(&oprofile_stats.bt_lost_no_mapping);
1755     + }
1756     }
1757     }
1758     }
1759     @@ -547,6 +591,11 @@
1760     }
1761     release_mm(mm);
1762    
1763     + /* We reset domain to COORDINATOR at each CPU switch */
1764     + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
1765     + add_domain_switch(COORDINATOR_DOMAIN);
1766     + }
1767     +
1768     mark_done(cpu);
1769    
1770     mutex_unlock(&buffer_mutex);
1771     --- a/drivers/oprofile/cpu_buffer.c
1772     +++ b/drivers/oprofile/cpu_buffer.c
1773     @@ -6,6 +6,10 @@
1774     *
1775     * @author John Levon <levon@movementarian.org>
1776     *
1777     + * Modified by Aravind Menon for Xen
1778     + * These modifications are:
1779     + * Copyright (C) 2005 Hewlett-Packard Co.
1780     + *
1781     * Each CPU has a local buffer that stores PC value/event
1782     * pairs. We also log context switches when we notice them.
1783     * Eventually each CPU's buffer is processed into the global
1784     @@ -34,6 +38,8 @@
1785     #define DEFAULT_TIMER_EXPIRE (HZ / 10)
1786     static int work_enabled;
1787    
1788     +static int32_t current_domain = COORDINATOR_DOMAIN;
1789     +
1790     void free_cpu_buffers(void)
1791     {
1792     int i;
1793     @@ -57,7 +63,7 @@
1794     goto fail;
1795    
1796     b->last_task = NULL;
1797     - b->last_is_kernel = -1;
1798     + b->last_cpu_mode = -1;
1799     b->tracing = 0;
1800     b->buffer_size = buffer_size;
1801     b->tail_pos = 0;
1802     @@ -115,7 +121,7 @@
1803     * collected will populate the buffer with proper
1804     * values to initialize the buffer
1805     */
1806     - cpu_buf->last_is_kernel = -1;
1807     + cpu_buf->last_cpu_mode = -1;
1808     cpu_buf->last_task = NULL;
1809     }
1810    
1811     @@ -165,13 +171,13 @@
1812     * because of the head/tail separation of the writer and reader
1813     * of the CPU buffer.
1814     *
1815     - * is_kernel is needed because on some architectures you cannot
1816     + * cpu_mode is needed because on some architectures you cannot
1817     * tell if you are in kernel or user space simply by looking at
1818     - * pc. We tag this in the buffer by generating kernel enter/exit
1819     - * events whenever is_kernel changes
1820     + * pc. We tag this in the buffer by generating kernel/user (and xen)
1821     + * enter events whenever cpu_mode changes
1822     */
1823     static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
1824     - int is_kernel, unsigned long event)
1825     + int cpu_mode, unsigned long event)
1826     {
1827     struct task_struct * task;
1828    
1829     @@ -187,18 +193,18 @@
1830     return 0;
1831     }
1832    
1833     - is_kernel = !!is_kernel;
1834     -
1835     task = current;
1836    
1837     /* notice a switch from user->kernel or vice versa */
1838     - if (cpu_buf->last_is_kernel != is_kernel) {
1839     - cpu_buf->last_is_kernel = is_kernel;
1840     - add_code(cpu_buf, is_kernel);
1841     + if (cpu_buf->last_cpu_mode != cpu_mode) {
1842     + cpu_buf->last_cpu_mode = cpu_mode;
1843     + add_code(cpu_buf, cpu_mode);
1844     }
1845     -
1846     +
1847     /* notice a task switch */
1848     - if (cpu_buf->last_task != task) {
1849     + /* if not processing other domain samples */
1850     + if ((cpu_buf->last_task != task) &&
1851     + (current_domain == COORDINATOR_DOMAIN)) {
1852     cpu_buf->last_task = task;
1853     add_code(cpu_buf, (unsigned long)task);
1854     }
1855     @@ -282,6 +288,25 @@
1856     add_sample(cpu_buf, pc, 0);
1857     }
1858    
1859     +int oprofile_add_domain_switch(int32_t domain_id)
1860     +{
1861     + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
1862     +
1863     + /* should have space for switching into and out of domain
1864     + (2 slots each) plus one sample and one cpu mode switch */
1865     + if (((nr_available_slots(cpu_buf) < 6) &&
1866     + (domain_id != COORDINATOR_DOMAIN)) ||
1867     + (nr_available_slots(cpu_buf) < 2))
1868     + return 0;
1869     +
1870     + add_code(cpu_buf, CPU_DOMAIN_SWITCH);
1871     + add_sample(cpu_buf, domain_id, 0);
1872     +
1873     + current_domain = domain_id;
1874     +
1875     + return 1;
1876     +}
1877     +
1878     /*
1879     * This serves to avoid cpu buffer overflow, and makes sure
1880     * the task mortuary progresses
1881     --- a/drivers/oprofile/cpu_buffer.h
1882     +++ b/drivers/oprofile/cpu_buffer.h
1883     @@ -36,7 +36,7 @@
1884     volatile unsigned long tail_pos;
1885     unsigned long buffer_size;
1886     struct task_struct * last_task;
1887     - int last_is_kernel;
1888     + int last_cpu_mode;
1889     int tracing;
1890     struct op_sample * buffer;
1891     unsigned long sample_received;
1892     @@ -52,7 +52,10 @@
1893     void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
1894    
1895     /* transient events for the CPU buffer -> event buffer */
1896     -#define CPU_IS_KERNEL 1
1897     -#define CPU_TRACE_BEGIN 2
1898     +#define CPU_MODE_USER 0
1899     +#define CPU_MODE_KERNEL 1
1900     +#define CPU_MODE_XEN 2
1901     +#define CPU_TRACE_BEGIN 3
1902     +#define CPU_DOMAIN_SWITCH 4
1903    
1904     #endif /* OPROFILE_CPU_BUFFER_H */
1905     --- a/drivers/oprofile/event_buffer.h
1906     +++ b/drivers/oprofile/event_buffer.h
1907     @@ -23,6 +23,9 @@
1908     #define INVALID_COOKIE ~0UL
1909     #define NO_COOKIE 0UL
1910    
1911     +/* Constant used to refer to coordinator domain (Xen) */
1912     +#define COORDINATOR_DOMAIN -1
1913     +
1914     extern const struct file_operations event_buffer_fops;
1915    
1916     /* mutex between sync_cpu_buffers() and the
1917     --- a/drivers/oprofile/oprof.c
1918     +++ b/drivers/oprofile/oprof.c
1919     @@ -5,6 +5,10 @@
1920     * @remark Read the file COPYING
1921     *
1922     * @author John Levon <levon@movementarian.org>
1923     + *
1924     + * Modified by Aravind Menon for Xen
1925     + * These modifications are:
1926     + * Copyright (C) 2005 Hewlett-Packard Co.
1927     */
1928    
1929     #include <linux/kernel.h>
1930     @@ -33,6 +37,32 @@
1931     */
1932     static int timer = 0;
1933    
1934     +int oprofile_set_active(int active_domains[], unsigned int adomains)
1935     +{
1936     + int err;
1937     +
1938     + if (!oprofile_ops.set_active)
1939     + return -EINVAL;
1940     +
1941     + mutex_lock(&start_mutex);
1942     + err = oprofile_ops.set_active(active_domains, adomains);
1943     + mutex_unlock(&start_mutex);
1944     + return err;
1945     +}
1946     +
1947     +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
1948     +{
1949     + int err;
1950     +
1951     + if (!oprofile_ops.set_passive)
1952     + return -EINVAL;
1953     +
1954     + mutex_lock(&start_mutex);
1955     + err = oprofile_ops.set_passive(passive_domains, pdomains);
1956     + mutex_unlock(&start_mutex);
1957     + return err;
1958     +}
1959     +
1960     int oprofile_setup(void)
1961     {
1962     int err;
1963     --- a/drivers/oprofile/oprof.h
1964     +++ b/drivers/oprofile/oprof.h
1965     @@ -35,5 +35,8 @@
1966     void oprofile_timer_init(struct oprofile_operations * ops);
1967    
1968     int oprofile_set_backtrace(unsigned long depth);
1969     +
1970     +int oprofile_set_active(int active_domains[], unsigned int adomains);
1971     +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
1972    
1973     #endif /* OPROF_H */
1974     --- a/drivers/oprofile/oprofile_files.c
1975     +++ b/drivers/oprofile/oprofile_files.c
1976     @@ -5,15 +5,21 @@
1977     * @remark Read the file COPYING
1978     *
1979     * @author John Levon <levon@movementarian.org>
1980     + *
1981     + * Modified by Aravind Menon for Xen
1982     + * These modifications are:
1983     + * Copyright (C) 2005 Hewlett-Packard Co.
1984     */
1985    
1986     #include <linux/fs.h>
1987     #include <linux/oprofile.h>
1988     +#include <asm/uaccess.h>
1989     +#include <linux/ctype.h>
1990    
1991     #include "event_buffer.h"
1992     #include "oprofile_stats.h"
1993     #include "oprof.h"
1994     -
1995     +
1996     unsigned long fs_buffer_size = 131072;
1997     unsigned long fs_cpu_buffer_size = 8192;
1998     unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
1999     @@ -117,11 +123,202 @@
2000     static const struct file_operations dump_fops = {
2001     .write = dump_write,
2002     };
2003     -
2004     +
2005     +#define TMPBUFSIZE 512
2006     +
2007     +static unsigned int adomains = 0;
2008     +static int active_domains[MAX_OPROF_DOMAINS + 1];
2009     +static DEFINE_MUTEX(adom_mutex);
2010     +
2011     +static ssize_t adomain_write(struct file * file, char const __user * buf,
2012     + size_t count, loff_t * offset)
2013     +{
2014     + char *tmpbuf;
2015     + char *startp, *endp;
2016     + int i;
2017     + unsigned long val;
2018     + ssize_t retval = count;
2019     +
2020     + if (*offset)
2021     + return -EINVAL;
2022     + if (count > TMPBUFSIZE - 1)
2023     + return -EINVAL;
2024     +
2025     + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2026     + return -ENOMEM;
2027     +
2028     + if (copy_from_user(tmpbuf, buf, count)) {
2029     + kfree(tmpbuf);
2030     + return -EFAULT;
2031     + }
2032     + tmpbuf[count] = 0;
2033     +
2034     + mutex_lock(&adom_mutex);
2035     +
2036     + startp = tmpbuf;
2037     + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2038     + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2039     + val = simple_strtoul(startp, &endp, 0);
2040     + if (endp == startp)
2041     + break;
2042     + while (ispunct(*endp) || isspace(*endp))
2043     + endp++;
2044     + active_domains[i] = val;
2045     + if (active_domains[i] != val)
2046     + /* Overflow, force error below */
2047     + i = MAX_OPROF_DOMAINS + 1;
2048     + startp = endp;
2049     + }
2050     + /* Force error on trailing junk */
2051     + adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2052     +
2053     + kfree(tmpbuf);
2054     +
2055     + if (adomains > MAX_OPROF_DOMAINS
2056     + || oprofile_set_active(active_domains, adomains)) {
2057     + adomains = 0;
2058     + retval = -EINVAL;
2059     + }
2060     +
2061     + mutex_unlock(&adom_mutex);
2062     + return retval;
2063     +}
2064     +
2065     +static ssize_t adomain_read(struct file * file, char __user * buf,
2066     + size_t count, loff_t * offset)
2067     +{
2068     + char * tmpbuf;
2069     + size_t len;
2070     + int i;
2071     + ssize_t retval;
2072     +
2073     + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2074     + return -ENOMEM;
2075     +
2076     + mutex_lock(&adom_mutex);
2077     +
2078     + len = 0;
2079     + for (i = 0; i < adomains; i++)
2080     + len += snprintf(tmpbuf + len,
2081     + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2082     + "%u ", active_domains[i]);
2083     + WARN_ON(len > TMPBUFSIZE);
2084     + if (len != 0 && len <= TMPBUFSIZE)
2085     + tmpbuf[len-1] = '\n';
2086     +
2087     + mutex_unlock(&adom_mutex);
2088     +
2089     + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2090     +
2091     + kfree(tmpbuf);
2092     + return retval;
2093     +}
2094     +
2095     +
2096     +static struct file_operations active_domain_ops = {
2097     + .read = adomain_read,
2098     + .write = adomain_write,
2099     +};
2100     +
2101     +static unsigned int pdomains = 0;
2102     +static int passive_domains[MAX_OPROF_DOMAINS];
2103     +static DEFINE_MUTEX(pdom_mutex);
2104     +
2105     +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2106     + size_t count, loff_t * offset)
2107     +{
2108     + char *tmpbuf;
2109     + char *startp, *endp;
2110     + int i;
2111     + unsigned long val;
2112     + ssize_t retval = count;
2113     +
2114     + if (*offset)
2115     + return -EINVAL;
2116     + if (count > TMPBUFSIZE - 1)
2117     + return -EINVAL;
2118     +
2119     + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2120     + return -ENOMEM;
2121     +
2122     + if (copy_from_user(tmpbuf, buf, count)) {
2123     + kfree(tmpbuf);
2124     + return -EFAULT;
2125     + }
2126     + tmpbuf[count] = 0;
2127     +
2128     + mutex_lock(&pdom_mutex);
2129     +
2130     + startp = tmpbuf;
2131     + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2132     + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2133     + val = simple_strtoul(startp, &endp, 0);
2134     + if (endp == startp)
2135     + break;
2136     + while (ispunct(*endp) || isspace(*endp))
2137     + endp++;
2138     + passive_domains[i] = val;
2139     + if (passive_domains[i] != val)
2140     + /* Overflow, force error below */
2141     + i = MAX_OPROF_DOMAINS + 1;
2142     + startp = endp;
2143     + }
2144     + /* Force error on trailing junk */
2145     + pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2146     +
2147     + kfree(tmpbuf);
2148     +
2149     + if (pdomains > MAX_OPROF_DOMAINS
2150     + || oprofile_set_passive(passive_domains, pdomains)) {
2151     + pdomains = 0;
2152     + retval = -EINVAL;
2153     + }
2154     +
2155     + mutex_unlock(&pdom_mutex);
2156     + return retval;
2157     +}
2158     +
2159     +static ssize_t pdomain_read(struct file * file, char __user * buf,
2160     + size_t count, loff_t * offset)
2161     +{
2162     + char * tmpbuf;
2163     + size_t len;
2164     + int i;
2165     + ssize_t retval;
2166     +
2167     + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2168     + return -ENOMEM;
2169     +
2170     + mutex_lock(&pdom_mutex);
2171     +
2172     + len = 0;
2173     + for (i = 0; i < pdomains; i++)
2174     + len += snprintf(tmpbuf + len,
2175     + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2176     + "%u ", passive_domains[i]);
2177     + WARN_ON(len > TMPBUFSIZE);
2178     + if (len != 0 && len <= TMPBUFSIZE)
2179     + tmpbuf[len-1] = '\n';
2180     +
2181     + mutex_unlock(&pdom_mutex);
2182     +
2183     + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2184     +
2185     + kfree(tmpbuf);
2186     + return retval;
2187     +}
2188     +
2189     +static struct file_operations passive_domain_ops = {
2190     + .read = pdomain_read,
2191     + .write = pdomain_write,
2192     +};
2193     +
2194     void oprofile_create_files(struct super_block * sb, struct dentry * root)
2195     {
2196     oprofilefs_create_file(sb, root, "enable", &enable_fops);
2197     oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2198     + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2199     + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2200     oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2201     oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
2202     oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
2203     --- a/drivers/pci/bus.c
2204     +++ b/drivers/pci/bus.c
2205     @@ -17,6 +17,8 @@
2206    
2207     #include "pci.h"
2208    
2209     +extern int pci_mem_align;
2210     +
2211     /**
2212     * pci_bus_alloc_resource - allocate a resource from a parent bus
2213     * @bus: PCI bus
2214     @@ -44,6 +46,11 @@
2215    
2216     type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
2217    
2218     + /* If the boot parameter 'pci-mem-align' was specified then we need to
2219     + align the memory addresses, at page size alignment. */
2220     + if (pci_mem_align && (align < (PAGE_SIZE-1)))
2221     + align = PAGE_SIZE - 1;
2222     +
2223     for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
2224     struct resource *r = bus->resource[i];
2225     if (!r)
2226     --- a/drivers/pci/quirks.c
2227     +++ b/drivers/pci/quirks.c
2228     @@ -24,6 +24,40 @@
2229     #include <linux/kallsyms.h>
2230     #include "pci.h"
2231    
2232     +/* A global flag which signals if we should page-align PCI mem windows. */
2233     +int pci_mem_align = 0;
2234     +
2235     +static int __init set_pci_mem_align(char *str)
2236     +{
2237     + pci_mem_align = 1;
2238     + return 1;
2239     +}
2240     +__setup("pci-mem-align", set_pci_mem_align);
2241     +
2242     +/* This quirk function enables us to force all memory resources which are
2243     + * assigned to PCI devices, to be page-aligned.
2244     + */
2245     +static void __devinit quirk_align_mem_resources(struct pci_dev *dev)
2246     +{
2247     + int i;
2248     + struct resource *r;
2249     + resource_size_t old_start;
2250     +
2251     + if (!pci_mem_align)
2252     + return;
2253     +
2254     + for (i=0; i < DEVICE_COUNT_RESOURCE; i++) {
2255     + r = &dev->resource[i];
2256     + if ((r == NULL) || !(r->flags & IORESOURCE_MEM))
2257     + continue;
2258     +
2259     + old_start = r->start;
2260     + r->start = (r->start + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
2261     + r->end = r->end - (old_start - r->start);
2262     + }
2263     +}
2264     +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_align_mem_resources);
2265     +
2266     /* The Mellanox Tavor device gives false positive parity errors
2267     * Mark this device with a broken_parity_status, to allow
2268     * PCI scanning code to "skip" this now blacklisted device.
2269     --- a/fs/aio.c
2270     +++ b/fs/aio.c
2271     @@ -36,6 +36,11 @@
2272     #include <asm/uaccess.h>
2273     #include <asm/mmu_context.h>
2274    
2275     +#ifdef CONFIG_EPOLL
2276     +#include <linux/poll.h>
2277     +#include <linux/eventpoll.h>
2278     +#endif
2279     +
2280     #if DEBUG > 1
2281     #define dprintk printk
2282     #else
2283     @@ -1008,6 +1013,11 @@
2284     if (waitqueue_active(&ctx->wait))
2285     wake_up(&ctx->wait);
2286    
2287     +#ifdef CONFIG_EPOLL
2288     + if (ctx->file && waitqueue_active(&ctx->poll_wait))
2289     + wake_up(&ctx->poll_wait);
2290     +#endif
2291     +
2292     spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2293     return ret;
2294     }
2295     @@ -1015,6 +1025,8 @@
2296     /* aio_read_evt
2297     * Pull an event off of the ioctx's event ring. Returns the number of
2298     * events fetched (0 or 1 ;-)
2299     + * If ent parameter is 0, just returns the number of events that would
2300     + * be fetched.
2301     * FIXME: make this use cmpxchg.
2302     * TODO: make the ringbuffer user mmap()able (requires FIXME).
2303     */
2304     @@ -1037,13 +1049,18 @@
2305    
2306     head = ring->head % info->nr;
2307     if (head != ring->tail) {
2308     - struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2309     - *ent = *evp;
2310     - head = (head + 1) % info->nr;
2311     - smp_mb(); /* finish reading the event before updatng the head */
2312     - ring->head = head;
2313     - ret = 1;
2314     - put_aio_ring_event(evp, KM_USER1);
2315     + if (ent) { /* event requested */
2316     + struct io_event *evp =
2317     + aio_ring_event(info, head, KM_USER1);
2318     + *ent = *evp;
2319     + head = (head + 1) % info->nr;
2320     + /* finish reading the event before updatng the head */
2321     + smp_mb();
2322     + ring->head = head;
2323     + ret = 1;
2324     + put_aio_ring_event(evp, KM_USER1);
2325     + } else /* only need to know availability */
2326     + ret = 1;
2327     }
2328     spin_unlock(&info->ring_lock);
2329    
2330     @@ -1234,6 +1251,13 @@
2331    
2332     aio_cancel_all(ioctx);
2333     wait_for_all_aios(ioctx);
2334     +#ifdef CONFIG_EPOLL
2335     + /* forget the poll file, but it's up to the user to close it */
2336     + if (ioctx->file) {
2337     + ioctx->file->private_data = 0;
2338     + ioctx->file = 0;
2339     + }
2340     +#endif
2341    
2342     /*
2343     * Wake up any waiters. The setting of ctx->dead must be seen
2344     @@ -1244,6 +1268,68 @@
2345     put_ioctx(ioctx); /* once for the lookup */
2346     }
2347    
2348     +#ifdef CONFIG_EPOLL
2349     +
2350     +static int aio_queue_fd_close(struct inode *inode, struct file *file)
2351     +{
2352     + struct kioctx *ioctx = file->private_data;
2353     + if (ioctx) {
2354     + file->private_data = 0;
2355     + spin_lock_irq(&ioctx->ctx_lock);
2356     + ioctx->file = 0;
2357     + spin_unlock_irq(&ioctx->ctx_lock);
2358     + }
2359     + return 0;
2360     +}
2361     +
2362     +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2363     +{ unsigned int pollflags = 0;
2364     + struct kioctx *ioctx = file->private_data;
2365     +
2366     + if (ioctx) {
2367     +
2368     + spin_lock_irq(&ioctx->ctx_lock);
2369     + /* Insert inside our poll wait queue */
2370     + poll_wait(file, &ioctx->poll_wait, wait);
2371     +
2372     + /* Check our condition */
2373     + if (aio_read_evt(ioctx, 0))
2374     + pollflags = POLLIN | POLLRDNORM;
2375     + spin_unlock_irq(&ioctx->ctx_lock);
2376     + }
2377     +
2378     + return pollflags;
2379     +}
2380     +
2381     +static const struct file_operations aioq_fops = {
2382     + .release = aio_queue_fd_close,
2383     + .poll = aio_queue_fd_poll
2384     +};
2385     +
2386     +/* make_aio_fd:
2387     + * Create a file descriptor that can be used to poll the event queue.
2388     + * Based and piggybacked on the excellent epoll code.
2389     + */
2390     +
2391     +static int make_aio_fd(struct kioctx *ioctx)
2392     +{
2393     + int error, fd;
2394     + struct inode *inode;
2395     + struct file *file;
2396     +
2397     + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2398     + if (error)
2399     + return error;
2400     +
2401     + /* associate the file with the IO context */
2402     + file->private_data = ioctx;
2403     + ioctx->file = file;
2404     + init_waitqueue_head(&ioctx->poll_wait);
2405     + return fd;
2406     +}
2407     +#endif
2408     +
2409     +
2410     /* sys_io_setup:
2411     * Create an aio_context capable of receiving at least nr_events.
2412     * ctxp must not point to an aio_context that already exists, and
2413     @@ -1256,18 +1342,30 @@
2414     * resources are available. May fail with -EFAULT if an invalid
2415     * pointer is passed for ctxp. Will fail with -ENOSYS if not
2416     * implemented.
2417     + *
2418     + * To request a selectable fd, the user context has to be initialized
2419     + * to 1, instead of 0, and the return value is the fd.
2420     + * This keeps the system call compatible, since a non-zero value
2421     + * was not allowed so far.
2422     */
2423     asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
2424     {
2425     struct kioctx *ioctx = NULL;
2426     unsigned long ctx;
2427     long ret;
2428     + int make_fd = 0;
2429    
2430     ret = get_user(ctx, ctxp);
2431     if (unlikely(ret))
2432     goto out;
2433    
2434     ret = -EINVAL;
2435     +#ifdef CONFIG_EPOLL
2436     + if (ctx == 1) {
2437     + make_fd = 1;
2438     + ctx = 0;
2439     + }
2440     +#endif
2441     if (unlikely(ctx || nr_events == 0)) {
2442     pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
2443     ctx, nr_events);
2444     @@ -1278,8 +1376,12 @@
2445     ret = PTR_ERR(ioctx);
2446     if (!IS_ERR(ioctx)) {
2447     ret = put_user(ioctx->user_id, ctxp);
2448     - if (!ret)
2449     - return 0;
2450     +#ifdef CONFIG_EPOLL
2451     + if (make_fd && ret >= 0)
2452     + ret = make_aio_fd(ioctx);
2453     +#endif
2454     + if (ret >= 0)
2455     + return ret;
2456    
2457     get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
2458     io_destroy(ioctx);
2459     --- a/fs/compat_ioctl.c
2460     +++ b/fs/compat_ioctl.c
2461     @@ -114,6 +114,13 @@
2462     #include <asm/fbio.h>
2463     #endif
2464    
2465     +#ifdef CONFIG_XEN
2466     +#include <xen/interface/xen.h>
2467     +#include <xen/public/evtchn.h>
2468     +#include <xen/public/privcmd.h>
2469     +#include <xen/compat_ioctl.h>
2470     +#endif
2471     +
2472     static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
2473     unsigned long arg, struct file *f)
2474     {
2475     @@ -2834,6 +2841,18 @@
2476     IGNORE_IOCTL(FBIOSCURSOR32)
2477     IGNORE_IOCTL(FBIOGCURSOR32)
2478     #endif
2479     +
2480     +#ifdef CONFIG_XEN
2481     +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
2482     +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
2483     +COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
2484     +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
2485     +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
2486     +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT)
2487     +COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND)
2488     +COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY)
2489     +COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET)
2490     +#endif
2491     };
2492    
2493     #define IOCTL_HASHSIZE 256
2494     --- a/fs/splice.c
2495     +++ b/fs/splice.c
2496     @@ -1218,6 +1218,9 @@
2497     if (!access_ok(VERIFY_READ, base, len))
2498     break;
2499    
2500     + if (unlikely(!access_ok(VERIFY_READ, base, len)))
2501     + break;
2502     +
2503     /*
2504     * Get this base offset and number of pages, then map
2505     * in the user pages.
2506     --- a/include/asm-generic/pci.h
2507     +++ b/include/asm-generic/pci.h
2508     @@ -43,7 +43,9 @@
2509     return root;
2510     }
2511    
2512     +#ifndef pcibios_scan_all_fns
2513     #define pcibios_scan_all_fns(a, b) 0
2514     +#endif
2515    
2516     #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
2517     static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
2518     --- a/include/asm-generic/pgtable.h
2519     +++ b/include/asm-generic/pgtable.h
2520     @@ -99,6 +99,10 @@
2521     }
2522     #endif
2523    
2524     +#ifndef arch_change_pte_range
2525     +#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
2526     +#endif
2527     +
2528     #ifndef __HAVE_ARCH_PTE_SAME
2529     #define pte_same(A,B) (pte_val(A) == pte_val(B))
2530     #endif
2531     --- a/include/linux/aio.h
2532     +++ b/include/linux/aio.h
2533     @@ -200,6 +200,11 @@
2534     struct aio_ring_info ring_info;
2535    
2536     struct delayed_work wq;
2537     +#ifdef CONFIG_EPOLL
2538     + // poll integration
2539     + wait_queue_head_t poll_wait;
2540     + struct file *file;
2541     +#endif
2542     };
2543    
2544     /* prototypes */
2545     --- a/include/linux/interrupt.h
2546     +++ b/include/linux/interrupt.h
2547     @@ -194,6 +194,12 @@
2548     }
2549     #endif /* CONFIG_GENERIC_HARDIRQS */
2550    
2551     +#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED
2552     +int irq_ignore_unhandled(unsigned int irq);
2553     +#else
2554     +#define irq_ignore_unhandled(irq) 0
2555     +#endif
2556     +
2557     #ifndef __ARCH_SET_SOFTIRQ_PENDING
2558     #define set_softirq_pending(x) (local_softirq_pending() = (x))
2559     #define or_softirq_pending(x) (local_softirq_pending() |= (x))
2560     --- a/include/linux/kexec.h
2561     +++ b/include/linux/kexec.h
2562     @@ -46,6 +46,13 @@
2563     KEXEC_CORE_NOTE_NAME_BYTES + \
2564     KEXEC_CORE_NOTE_DESC_BYTES )
2565    
2566     +#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
2567     +#define kexec_page_to_pfn(page) page_to_pfn(page)
2568     +#define kexec_pfn_to_page(pfn) pfn_to_page(pfn)
2569     +#define kexec_virt_to_phys(addr) virt_to_phys(addr)
2570     +#define kexec_phys_to_virt(addr) phys_to_virt(addr)
2571     +#endif
2572     +
2573     /*
2574     * This structure is used to hold the arguments that are used when loading
2575     * kernel binaries.
2576     @@ -106,6 +113,12 @@
2577     extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
2578     extern int machine_kexec_prepare(struct kimage *image);
2579     extern void machine_kexec_cleanup(struct kimage *image);
2580     +#ifdef CONFIG_XEN
2581     +extern int xen_machine_kexec_load(struct kimage *image);
2582     +extern void xen_machine_kexec_unload(struct kimage *image);
2583     +extern void xen_machine_kexec_setup_resources(void);
2584     +extern void xen_machine_kexec_register_resources(struct resource *res);
2585     +#endif
2586     extern asmlinkage long sys_kexec_load(unsigned long entry,
2587     unsigned long nr_segments,
2588     struct kexec_segment __user *segments,
2589     @@ -154,6 +167,10 @@
2590    
2591     #ifndef kexec_flush_icache_page
2592     #define kexec_flush_icache_page(page)
2593     +#endif
2594     +
2595     +#ifndef kexec_flush_icache_page
2596     +#define kexec_flush_icache_page(page)
2597     #endif
2598    
2599     #define KEXEC_ON_CRASH 0x00000001
2600     --- a/include/linux/mm.h
2601     +++ b/include/linux/mm.h
2602     @@ -100,6 +100,9 @@
2603     #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
2604     #define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
2605     #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
2606     +#ifdef CONFIG_XEN
2607     +#define VM_FOREIGN 0x00200000 /* Has pages belonging to another VM */
2608     +#endif
2609     #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
2610     #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
2611     #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
2612     @@ -172,6 +175,10 @@
2613     /* notification that a previously read-only page is about to become
2614     * writable, if an error is returned it will cause a SIGBUS */
2615     int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
2616     + /* Area-specific function for clearing the PTE at @ptep. Returns the
2617     + * original value of @ptep. */
2618     + pte_t (*zap_pte)(struct vm_area_struct *vma,
2619     + unsigned long addr, pte_t *ptep, int is_fullmm);
2620     #ifdef CONFIG_NUMA
2621     int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
2622     struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
2623     --- a/include/linux/oprofile.h
2624     +++ b/include/linux/oprofile.h
2625     @@ -16,6 +16,8 @@
2626     #include <linux/types.h>
2627     #include <linux/spinlock.h>
2628     #include <asm/atomic.h>
2629     +
2630     +#include <xen/interface/xenoprof.h>
2631    
2632     /* Each escaped entry is prefixed by ESCAPE_CODE
2633     * then one of the following codes, then the
2634     @@ -28,7 +30,7 @@
2635     #define CPU_SWITCH_CODE 2
2636     #define COOKIE_SWITCH_CODE 3
2637     #define KERNEL_ENTER_SWITCH_CODE 4
2638     -#define KERNEL_EXIT_SWITCH_CODE 5
2639     +#define USER_ENTER_SWITCH_CODE 5
2640     #define MODULE_LOADED_CODE 6
2641     #define CTX_TGID_CODE 7
2642     #define TRACE_BEGIN_CODE 8
2643     @@ -36,6 +38,7 @@
2644     #define XEN_ENTER_SWITCH_CODE 10
2645     #define SPU_PROFILING_CODE 11
2646     #define SPU_CTX_SWITCH_CODE 12
2647     +#define DOMAIN_SWITCH_CODE 13
2648    
2649     struct super_block;
2650     struct dentry;
2651     @@ -47,6 +50,11 @@
2652     /* create any necessary configuration files in the oprofile fs.
2653     * Optional. */
2654     int (*create_files)(struct super_block * sb, struct dentry * root);
2655     + /* setup active domains with Xen */
2656     + int (*set_active)(int *active_domains, unsigned int adomains);
2657     + /* setup passive domains with Xen */
2658     + int (*set_passive)(int *passive_domains, unsigned int pdomains);
2659     +
2660     /* Do any necessary interrupt setup. Optional. */
2661     int (*setup)(void);
2662     /* Do any necessary interrupt shutdown. Optional. */
2663     @@ -113,6 +121,8 @@
2664     /* add a backtrace entry, to be called from the ->backtrace callback */
2665     void oprofile_add_trace(unsigned long eip);
2666    
2667     +/* add a domain switch entry */
2668     +int oprofile_add_domain_switch(int32_t domain_id);
2669    
2670     /**
2671     * Create a file of the given name as a child of the given root, with
2672     --- a/include/linux/page-flags.h
2673     +++ b/include/linux/page-flags.h
2674     @@ -97,6 +97,8 @@
2675     #define PG_checked PG_owner_priv_1 /* Used by some filesystems */
2676     #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */
2677    
2678     +#define PG_foreign 20 /* Page is owned by foreign allocator. */
2679     +
2680     #if (BITS_PER_LONG > 32)
2681     /*
2682     * 64-bit-only flags build down from bit 31
2683     @@ -296,6 +298,19 @@
2684     #define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
2685     #define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)
2686    
2687     +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
2688     +#define SetPageForeign(_page, dtor) do { \
2689     + set_bit(PG_foreign, &(_page)->flags); \
2690     + BUG_ON((dtor) == (void (*)(struct page *))0); \
2691     + (_page)->index = (long)(dtor); \
2692     +} while (0)
2693     +#define ClearPageForeign(page) do { \
2694     + clear_bit(PG_foreign, &(page)->flags); \
2695     + (page)->index = 0; \
2696     +} while (0)
2697     +#define PageForeignDestructor(_page) \
2698     + ((void (*)(struct page *))(_page)->index)(_page)
2699     +
2700     struct page; /* forward declaration */
2701    
2702     extern void cancel_dirty_page(struct page *page, unsigned int account_size);
2703     --- a/include/linux/sched.h
2704     +++ b/include/linux/sched.h
2705     @@ -290,6 +290,7 @@
2706     extern void sched_show_task(struct task_struct *p);
2707    
2708     #ifdef CONFIG_DETECT_SOFTLOCKUP
2709     +extern unsigned long softlockup_get_next_event(void);
2710     extern void softlockup_tick(void);
2711     extern void spawn_softlockup_task(void);
2712     extern void touch_softlockup_watchdog(void);
2713     @@ -299,6 +300,10 @@
2714     extern unsigned long sysctl_hung_task_timeout_secs;
2715     extern unsigned long sysctl_hung_task_warnings;
2716     #else
2717     +static inline unsigned long softlockup_get_next_event(void)
2718     +{
2719     + return MAX_JIFFY_OFFSET;
2720     +}
2721     static inline void softlockup_tick(void)
2722     {
2723     }
2724     --- a/include/linux/skbuff.h
2725     +++ b/include/linux/skbuff.h
2726     @@ -217,6 +217,8 @@
2727     * @local_df: allow local fragmentation
2728     * @cloned: Head may be cloned (check refcnt to be sure)
2729     * @nohdr: Payload reference only, must not modify header
2730     + * @proto_data_valid: Protocol data validated since arriving at localhost
2731     + * @proto_csum_blank: Protocol csum must be added before leaving localhost
2732     * @pkt_type: Packet class
2733     * @fclone: skbuff clone status
2734     * @ip_summed: Driver fed us an IP checksum
2735     @@ -310,7 +312,13 @@
2736     __u16 tc_verd; /* traffic control verdict */
2737     #endif
2738     #endif
2739     +#ifndef CONFIG_XEN
2740     /* 2 byte hole */
2741     +#else
2742     + __u8 proto_data_valid:1,
2743     + proto_csum_blank:1;
2744     + /* 1 byte hole */
2745     +#endif
2746    
2747     #ifdef CONFIG_NET_DMA
2748     dma_cookie_t dma_cookie;
2749     --- a/include/linux/vermagic.h
2750     +++ b/include/linux/vermagic.h
2751     @@ -17,6 +17,11 @@
2752     #else
2753     #define MODULE_VERMAGIC_MODULE_UNLOAD ""
2754     #endif
2755     +#ifdef CONFIG_XEN
2756     +#define MODULE_VERMAGIC_XEN "Xen "
2757     +#else
2758     +#define MODULE_VERMAGIC_XEN
2759     +#endif
2760     #ifndef MODULE_ARCH_VERMAGIC
2761     #define MODULE_ARCH_VERMAGIC ""
2762     #endif
2763     @@ -24,5 +29,6 @@
2764     #define VERMAGIC_STRING \
2765     UTS_RELEASE " " \
2766     MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
2767     - MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC
2768     + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_XEN \
2769     + MODULE_ARCH_VERMAGIC
2770    
2771     --- a/kernel/irq/spurious.c
2772     +++ b/kernel/irq/spurious.c
2773     @@ -182,7 +182,7 @@
2774     */
2775     if (time_after(jiffies, desc->last_unhandled + HZ/10))
2776     desc->irqs_unhandled = 1;
2777     - else
2778     + else if (!irq_ignore_unhandled(irq))
2779     desc->irqs_unhandled++;
2780     desc->last_unhandled = jiffies;
2781     if (unlikely(action_ret != IRQ_NONE))
2782     --- a/kernel/kexec.c
2783     +++ b/kernel/kexec.c
2784     @@ -340,13 +340,26 @@
2785     return 0;
2786     }
2787    
2788     -static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
2789     +static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit)
2790     {
2791     struct page *pages;
2792    
2793     pages = alloc_pages(gfp_mask, order);
2794     if (pages) {
2795     unsigned int count, i;
2796     +#ifdef CONFIG_XEN
2797     + int address_bits;
2798     +
2799     + if (limit == ~0UL)
2800     + address_bits = BITS_PER_LONG;
2801     + else
2802     + address_bits = long_log2(limit);
2803     +
2804     + if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
2805     + __free_pages(pages, order);
2806     + return NULL;
2807     + }
2808     +#endif
2809     pages->mapping = NULL;
2810     set_page_private(pages, order);
2811     count = 1 << order;
2812     @@ -365,6 +378,9 @@
2813     count = 1 << order;
2814     for (i = 0; i < count; i++)
2815     ClearPageReserved(page + i);
2816     +#ifdef CONFIG_XEN
2817     + xen_destroy_contiguous_region((unsigned long)page_address(page), order);
2818     +#endif
2819     __free_pages(page, order);
2820     }
2821    
2822     @@ -410,10 +426,10 @@
2823     do {
2824     unsigned long pfn, epfn, addr, eaddr;
2825    
2826     - pages = kimage_alloc_pages(GFP_KERNEL, order);
2827     + pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT);
2828     if (!pages)
2829     break;
2830     - pfn = page_to_pfn(pages);
2831     + pfn = kexec_page_to_pfn(pages);
2832     epfn = pfn + count;
2833     addr = pfn << PAGE_SHIFT;
2834     eaddr = epfn << PAGE_SHIFT;
2835     @@ -447,6 +463,7 @@
2836     return pages;
2837     }
2838    
2839     +#ifndef CONFIG_XEN
2840     static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
2841     unsigned int order)
2842     {
2843     @@ -500,7 +517,7 @@
2844     }
2845     /* If I don't overlap any segments I have found my hole! */
2846     if (i == image->nr_segments) {
2847     - pages = pfn_to_page(hole_start >> PAGE_SHIFT);
2848     + pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
2849     break;
2850     }
2851     }
2852     @@ -527,6 +544,13 @@
2853    
2854     return pages;
2855     }
2856     +#else /* !CONFIG_XEN */
2857     +struct page *kimage_alloc_control_pages(struct kimage *image,
2858     + unsigned int order)
2859     +{
2860     + return kimage_alloc_normal_control_pages(image, order);
2861     +}
2862     +#endif
2863    
2864     static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
2865     {
2866     @@ -542,7 +566,7 @@
2867     return -ENOMEM;
2868    
2869     ind_page = page_address(page);
2870     - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
2871     + *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
2872     image->entry = ind_page;
2873     image->last_entry = ind_page +
2874     ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
2875     @@ -603,13 +627,13 @@
2876     #define for_each_kimage_entry(image, ptr, entry) \
2877     for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
2878     ptr = (entry & IND_INDIRECTION)? \
2879     - phys_to_virt((entry & PAGE_MASK)): ptr +1)
2880     + kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
2881    
2882     static void kimage_free_entry(kimage_entry_t entry)
2883     {
2884     struct page *page;
2885    
2886     - page = pfn_to_page(entry >> PAGE_SHIFT);
2887     + page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
2888     kimage_free_pages(page);
2889     }
2890    
2891     @@ -621,6 +645,10 @@
2892     if (!image)
2893     return;
2894    
2895     +#ifdef CONFIG_XEN
2896     + xen_machine_kexec_unload(image);
2897     +#endif
2898     +
2899     kimage_free_extra_pages(image);
2900     for_each_kimage_entry(image, ptr, entry) {
2901     if (entry & IND_INDIRECTION) {
2902     @@ -696,7 +724,7 @@
2903     * have a match.
2904     */
2905     list_for_each_entry(page, &image->dest_pages, lru) {
2906     - addr = page_to_pfn(page) << PAGE_SHIFT;
2907     + addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
2908     if (addr == destination) {
2909     list_del(&page->lru);
2910     return page;
2911     @@ -707,16 +735,16 @@
2912     kimage_entry_t *old;
2913    
2914     /* Allocate a page, if we run out of memory give up */
2915     - page = kimage_alloc_pages(gfp_mask, 0);
2916     + page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT);
2917     if (!page)
2918     return NULL;
2919     /* If the page cannot be used file it away */
2920     - if (page_to_pfn(page) >
2921     + if (kexec_page_to_pfn(page) >
2922     (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
2923     list_add(&page->lru, &image->unuseable_pages);
2924     continue;
2925     }
2926     - addr = page_to_pfn(page) << PAGE_SHIFT;
2927     + addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
2928    
2929     /* If it is the destination page we want use it */
2930     if (addr == destination)
2931     @@ -739,7 +767,7 @@
2932     struct page *old_page;
2933    
2934     old_addr = *old & PAGE_MASK;
2935     - old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
2936     + old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
2937     copy_highpage(page, old_page);
2938     *old = addr | (*old & ~PAGE_MASK);
2939    
2940     @@ -789,7 +817,7 @@
2941     result = -ENOMEM;
2942     goto out;
2943     }
2944     - result = kimage_add_page(image, page_to_pfn(page)
2945     + result = kimage_add_page(image, kexec_page_to_pfn(page)
2946     << PAGE_SHIFT);
2947     if (result < 0)
2948     goto out;
2949     @@ -821,6 +849,7 @@
2950     return result;
2951     }
2952    
2953     +#ifndef CONFIG_XEN
2954     static int kimage_load_crash_segment(struct kimage *image,
2955     struct kexec_segment *segment)
2956     {
2957     @@ -843,7 +872,7 @@
2958     char *ptr;
2959     size_t uchunk, mchunk;
2960    
2961     - page = pfn_to_page(maddr >> PAGE_SHIFT);
2962     + page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
2963     if (!page) {
2964     result = -ENOMEM;
2965     goto out;
2966     @@ -892,6 +921,13 @@
2967    
2968     return result;
2969     }
2970     +#else /* CONFIG_XEN */
2971     +static int kimage_load_segment(struct kimage *image,
2972     + struct kexec_segment *segment)
2973     +{
2974     + return kimage_load_normal_segment(image, segment);
2975     +}
2976     +#endif
2977    
2978     /*
2979     * Exec Kernel system call: for obvious reasons only root may call it.
2980     @@ -1002,6 +1038,13 @@
2981     if (result)
2982     goto out;
2983     }
2984     +#ifdef CONFIG_XEN
2985     + if (image) {
2986     + result = xen_machine_kexec_load(image);
2987     + if (result)
2988     + goto out;
2989     + }
2990     +#endif
2991     /* Install the new kernel, and Uninstall the old */
2992     image = xchg(dest_image, image);
2993    
2994     --- a/kernel/softlockup.c
2995     +++ b/kernel/softlockup.c
2996     @@ -39,6 +39,19 @@
2997     .notifier_call = softlock_panic,
2998     };
2999    
3000     +unsigned long softlockup_get_next_event(void)
3001     +{
3002     + int this_cpu = smp_processor_id();
3003     + unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
3004     +
3005     + if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
3006     + did_panic ||
3007     + !per_cpu(watchdog_task, this_cpu))
3008     + return MAX_JIFFY_OFFSET;
3009     +
3010     + return max_t(long, 0, touch_timestamp + HZ - jiffies);
3011     +}
3012     +
3013     /*
3014     * Returns seconds, approximately. We don't need nanosecond
3015     * resolution, and we don't need to waste time with a big divide when
3016     --- a/kernel/sysctl.c
3017     +++ b/kernel/sysctl.c
3018     @@ -742,7 +742,7 @@
3019     .proc_handler = &proc_dointvec,
3020     },
3021     #endif
3022     -#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
3023     +#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
3024     {
3025     .procname = "acpi_video_flags",
3026     .data = &acpi_realmode_flags,
3027     --- a/kernel/timer.c
3028     +++ b/kernel/timer.c
3029     @@ -802,7 +802,7 @@
3030     unsigned long get_next_timer_interrupt(unsigned long now)
3031     {
3032     struct tvec_base *base = __get_cpu_var(tvec_bases);
3033     - unsigned long expires;
3034     + unsigned long expires, sl_next;
3035    
3036     spin_lock(&base->lock);
3037     expires = __next_timer_interrupt(base);
3038     @@ -811,7 +811,11 @@
3039     if (time_before_eq(expires, now))
3040     return now;
3041    
3042     - return cmp_next_hrtimer_event(now, expires);
3043     + expires = cmp_next_hrtimer_event(now, expires);
3044     + sl_next = softlockup_get_next_event();
3045     +
3046     + return expires <= now || expires - now < sl_next
3047     + ? expires : now + sl_next;
3048     }
3049    
3050     #ifdef CONFIG_NO_IDLE_HZ
3051     --- a/mm/memory.c
3052     +++ b/mm/memory.c
3053     @@ -402,6 +402,12 @@
3054     return NULL;
3055     }
3056    
3057     +#if defined(CONFIG_XEN) && defined(CONFIG_X86)
3058     + /* XEN: Covers user-space grant mappings (even of local pages). */
3059     + if (unlikely(vma->vm_flags & VM_FOREIGN))
3060     + return NULL;
3061     +#endif
3062     +
3063     #ifdef CONFIG_DEBUG_VM
3064     /*
3065     * Add some anal sanity checks for now. Eventually,
3066     @@ -410,7 +416,8 @@
3067     * and that the resulting page looks ok.
3068     */
3069     if (unlikely(!pfn_valid(pfn))) {
3070     - print_bad_pte(vma, pte, addr);
3071     + if (!(vma->vm_flags & VM_RESERVED))
3072     + print_bad_pte(vma, pte, addr);
3073     return NULL;
3074     }
3075     #endif
3076     @@ -668,8 +675,12 @@
3077     page->index > details->last_index))
3078     continue;
3079     }
3080     - ptent = ptep_get_and_clear_full(mm, addr, pte,
3081     - tlb->fullmm);
3082     + if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
3083     + ptent = vma->vm_ops->zap_pte(vma, addr, pte,
3084     + tlb->fullmm);
3085     + else
3086     + ptent = ptep_get_and_clear_full(mm, addr, pte,
3087     + tlb->fullmm);
3088     tlb_remove_tlb_entry(tlb, pte, addr);
3089     if (unlikely(!page))
3090     continue;
3091     @@ -902,6 +913,7 @@
3092     tlb_finish_mmu(tlb, address, end);
3093     return end;
3094     }
3095     +EXPORT_SYMBOL(zap_page_range);
3096    
3097     /*
3098     * Do a quick page-table lookup for a single page.
3099     @@ -1043,6 +1055,26 @@
3100     continue;
3101     }
3102    
3103     +#ifdef CONFIG_XEN
3104     + if (vma && (vma->vm_flags & VM_FOREIGN)) {
3105     + struct page **map = vma->vm_private_data;
3106     + int offset = (start - vma->vm_start) >> PAGE_SHIFT;
3107     + if (map[offset] != NULL) {
3108     + if (pages) {
3109     + struct page *page = map[offset];
3110     +
3111     + pages[i] = page;
3112     + get_page(page);
3113     + }
3114     + if (vmas)
3115     + vmas[i] = vma;
3116     + i++;
3117     + start += PAGE_SIZE;
3118     + len--;
3119     + continue;
3120     + }
3121     + }
3122     +#endif
3123     if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
3124     || !(vm_flags & vma->vm_flags))
3125     return i ? : -EFAULT;
3126     --- a/mm/mprotect.c
3127     +++ b/mm/mprotect.c
3128     @@ -86,6 +86,8 @@
3129     next = pmd_addr_end(addr, end);
3130     if (pmd_none_or_clear_bad(pmd))
3131     continue;
3132     + if (arch_change_pte_range(mm, pmd, addr, next, newprot))
3133     + continue;
3134     change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
3135     } while (pmd++, addr = next, addr != end);
3136     }
3137     --- a/mm/page_alloc.c
3138     +++ b/mm/page_alloc.c
3139     @@ -245,7 +245,11 @@
3140     1 << PG_slab |
3141     1 << PG_swapcache |
3142     1 << PG_writeback |
3143     - 1 << PG_buddy );
3144     + 1 << PG_buddy |
3145     +#ifdef CONFIG_X86_XEN
3146     + 1 << PG_pinned |
3147     +#endif
3148     + 1 << PG_foreign );
3149     set_page_count(page, 0);
3150     reset_page_mapcount(page);
3151     page->mapping = NULL;
3152     @@ -471,7 +475,11 @@
3153     1 << PG_swapcache |
3154     1 << PG_writeback |
3155     1 << PG_reserved |
3156     - 1 << PG_buddy ))))
3157     + 1 << PG_buddy |
3158     +#ifdef CONFIG_X86_XEN
3159     + 1 << PG_pinned |
3160     +#endif
3161     + 1 << PG_foreign ))))
3162     bad_page(page);
3163     if (PageDirty(page))
3164     __ClearPageDirty(page);
3165     @@ -527,6 +535,12 @@
3166     int i;
3167     int reserved = 0;
3168    
3169     +#ifdef CONFIG_XEN
3170     + if (PageForeign(page)) {
3171     + PageForeignDestructor(page);
3172     + return;
3173     + }
3174     +#endif
3175     for (i = 0 ; i < (1 << order) ; ++i)
3176     reserved += free_pages_check(page + i);
3177     if (reserved)
3178     @@ -622,7 +636,11 @@
3179     1 << PG_swapcache |
3180     1 << PG_writeback |
3181     1 << PG_reserved |
3182     - 1 << PG_buddy ))))
3183     + 1 << PG_buddy |
3184     +#ifdef CONFIG_X86_XEN
3185     + 1 << PG_pinned |
3186     +#endif
3187     + 1 << PG_foreign ))))
3188     bad_page(page);
3189    
3190     /*
3191     @@ -990,6 +1008,12 @@
3192     struct per_cpu_pages *pcp;
3193     unsigned long flags;
3194    
3195     +#ifdef CONFIG_XEN
3196     + if (PageForeign(page)) {
3197     + PageForeignDestructor(page);
3198     + return;
3199     + }
3200     +#endif
3201     if (PageAnon(page))
3202     page->mapping = NULL;
3203     if (free_pages_check(page))
3204     --- a/net/core/dev.c
3205     +++ b/net/core/dev.c
3206     @@ -122,6 +122,12 @@
3207    
3208     #include "net-sysfs.h"
3209    
3210     +#ifdef CONFIG_XEN
3211     +#include <net/ip.h>
3212     +#include <linux/tcp.h>
3213     +#include <linux/udp.h>
3214     +#endif
3215     +
3216     /*
3217     * The list of packet types we will receive (as opposed to discard)
3218     * and the routines to invoke.
3219     @@ -1580,6 +1586,42 @@
3220     return 0;
3221     }
3222    
3223     +#ifdef CONFIG_XEN
3224     +inline int skb_checksum_setup(struct sk_buff *skb)
3225     +{
3226     + if (skb->proto_csum_blank) {
3227     + if (skb->protocol != htons(ETH_P_IP))
3228     + goto out;
3229     + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
3230     + if (skb->h.raw >= skb->tail)
3231     + goto out;
3232     + switch (skb->nh.iph->protocol) {
3233     + case IPPROTO_TCP:
3234     + skb->csum = offsetof(struct tcphdr, check);
3235     + break;
3236     + case IPPROTO_UDP:
3237     + skb->csum = offsetof(struct udphdr, check);
3238     + break;
3239     + default:
3240     + if (net_ratelimit())
3241     + printk(KERN_ERR "Attempting to checksum a non-"
3242     + "TCP/UDP packet, dropping a protocol"
3243     + " %d packet", skb->nh.iph->protocol);
3244     + goto out;
3245     + }
3246     + if ((skb->h.raw + skb->csum + 2) > skb->tail)
3247     + goto out;
3248     + skb->ip_summed = CHECKSUM_HW;
3249     + skb->proto_csum_blank = 0;
3250     + }
3251     + return 0;
3252     +out:
3253     + return -EPROTO;
3254     +}
3255     +#else
3256     +inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
3257     +#endif
3258     +
3259     /**
3260     * dev_queue_xmit - transmit a buffer
3261     * @skb: buffer to transmit
3262     @@ -1612,6 +1654,12 @@
3263     struct Qdisc *q;
3264     int rc = -ENOMEM;
3265    
3266     + /* If a checksum-deferred packet is forwarded to a device that needs a
3267     + * checksum, correct the pointers and force checksumming.
3268     + */
3269     + if (skb_checksum_setup(skb))
3270     + goto out_kfree_skb;
3271     +
3272     /* GSO will handle the following emulations directly. */
3273     if (netif_needs_gso(dev, skb))
3274     goto gso;
3275     @@ -2062,6 +2110,19 @@
3276     }
3277     #endif
3278    
3279     +#ifdef CONFIG_XEN
3280     + switch (skb->ip_summed) {
3281     + case CHECKSUM_UNNECESSARY:
3282     + skb->proto_data_valid = 1;
3283     + break;
3284     + case CHECKSUM_HW:
3285     + /* XXX Implement me. */
3286     + default:
3287     + skb->proto_data_valid = 0;
3288     + break;
3289     + }
3290     +#endif
3291     +
3292     list_for_each_entry_rcu(ptype, &ptype_all, list) {
3293     if (!ptype->dev || ptype->dev == skb->dev) {
3294     if (pt_prev)
3295     @@ -4587,6 +4648,7 @@
3296     EXPORT_SYMBOL(net_enable_timestamp);
3297     EXPORT_SYMBOL(net_disable_timestamp);
3298     EXPORT_SYMBOL(dev_get_flags);
3299     +EXPORT_SYMBOL(skb_checksum_setup);
3300    
3301     #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3302     EXPORT_SYMBOL(br_handle_frame_hook);
3303     --- a/net/core/skbuff.c
3304     +++ b/net/core/skbuff.c
3305     @@ -454,6 +454,10 @@
3306     n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
3307     n->cloned = 1;
3308     n->nohdr = 0;
3309     +#ifdef CONFIG_XEN
3310     + C(proto_data_valid);
3311     + C(proto_csum_blank);
3312     +#endif
3313     n->destructor = NULL;
3314     C(iif);
3315     C(tail);
3316     --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
3317     +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
3318     @@ -132,6 +132,9 @@
3319     if (hdrsize < sizeof(*hdr))
3320     return 1;
3321    
3322     + if (skb_checksum_setup(skb))
3323     + return 0;
3324     +
3325     inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
3326     inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
3327     return 1;
3328     --- a/net/ipv4/netfilter/nf_nat_proto_udp.c
3329     +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
3330     @@ -116,6 +116,10 @@
3331     newport = tuple->dst.u.udp.port;
3332     portptr = &hdr->dest;
3333     }
3334     +
3335     + if (skb_checksum_setup(skb))
3336     + return 0;
3337     +
3338     if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
3339     inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
3340     inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
3341     --- a/net/ipv4/xfrm4_output.c
3342     +++ b/net/ipv4/xfrm4_output.c
3343     @@ -81,7 +81,7 @@
3344     #endif
3345    
3346     skb->protocol = htons(ETH_P_IP);
3347     - return xfrm_output(skb);
3348     + return skb_checksum_setup(skb) ?: xfrm_output(skb);
3349     }
3350    
3351     int xfrm4_output(struct sk_buff *skb)
3352     --- a/scripts/Makefile.build
3353     +++ b/scripts/Makefile.build
3354     @@ -73,6 +73,20 @@
3355     $(warning kbuild: Makefile.build is included improperly)
3356     endif
3357    
3358     +ifeq ($(CONFIG_XEN),y)
3359     +$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build
3360     + @echo ' Updating $@'
3361     + $(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\
3362     + ,$(error 'Your awk program does not define gensub. Use gawk or another awk with gensub'))
3363     + @$(AWK) -f $< $(filter-out $<,$^) >$@
3364     +
3365     +xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c))))
3366     +xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o)
3367     +single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m))
3368     +
3369     +-include $(objtree)/scripts/Makefile.xen
3370     +endif
3371     +
3372     # ===========================================================================
3373    
3374     ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
3375     --- a/scripts/Makefile.lib
3376     +++ b/scripts/Makefile.lib
3377     @@ -17,6 +17,12 @@
3378    
3379     lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m)))
3380    
3381     +# Remove objects forcibly disabled
3382     +
3383     +obj-y := $(filter-out $(disabled-obj-y),$(obj-y))
3384     +obj-m := $(filter-out $(disabled-obj-y),$(obj-m))
3385     +lib-y := $(filter-out $(disabled-obj-y),$(lib-y))
3386     +
3387    
3388     # Handle objects in subdirs
3389     # ---------------------------------------------------------------------------