Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1011-2.6.25-xen-auto-common.patch
Parent Directory | Revision Log
Revision 609 -
(hide annotations)
(download)
Fri May 23 17:35:37 2008 UTC (16 years, 5 months ago) by niro
File size: 88655 byte(s)
Fri May 23 17:35:37 2008 UTC (16 years, 5 months ago) by niro
File size: 88655 byte(s)
-using opensuse xen patchset, updated kernel configs
1 | niro | 609 | Subject: xen3 common |
2 | From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 517:d71965a78c20) | ||
3 | Patch-mainline: obsolete | ||
4 | Acked-by: jbeulich@novell.com | ||
5 | |||
6 | List of files that don't require modification anymore (and hence | ||
7 | removed from this patch), for reference and in case upstream wants to | ||
8 | take the forward porting patches: | ||
9 | 2.6.25/mm/highmem.c | ||
10 | |||
11 | --- | ||
12 | drivers/Makefile | 1 | ||
13 | drivers/acpi/hardware/hwsleep.c | 15 | ||
14 | drivers/acpi/sleep/main.c | 11 | ||
15 | drivers/char/agp/intel-agp.c | 10 | ||
16 | drivers/char/mem.c | 6 | ||
17 | drivers/char/tpm/Makefile | 2 | ||
18 | drivers/char/tpm/tpm.h | 15 | ||
19 | drivers/char/tpm/tpm_vtpm.c | 542 +++++++++++++++++++++++++ | ||
20 | drivers/char/tpm/tpm_vtpm.h | 55 ++ | ||
21 | drivers/char/tpm/tpm_xen.c | 722 ++++++++++++++++++++++++++++++++++ | ||
22 | drivers/ide/ide-lib.c | 8 | ||
23 | drivers/oprofile/buffer_sync.c | 87 +++- | ||
24 | drivers/oprofile/cpu_buffer.c | 51 +- | ||
25 | drivers/oprofile/cpu_buffer.h | 9 | ||
26 | drivers/oprofile/event_buffer.h | 3 | ||
27 | drivers/oprofile/oprof.c | 30 + | ||
28 | drivers/oprofile/oprof.h | 3 | ||
29 | drivers/oprofile/oprofile_files.c | 201 +++++++++ | ||
30 | drivers/pci/bus.c | 7 | ||
31 | drivers/pci/quirks.c | 34 + | ||
32 | fs/aio.c | 120 +++++ | ||
33 | fs/compat_ioctl.c | 19 | ||
34 | fs/splice.c | 3 | ||
35 | include/asm-generic/pci.h | 2 | ||
36 | include/asm-generic/pgtable.h | 4 | ||
37 | include/linux/aio.h | 5 | ||
38 | include/linux/interrupt.h | 6 | ||
39 | include/linux/kexec.h | 17 | ||
40 | include/linux/mm.h | 7 | ||
41 | include/linux/oprofile.h | 12 | ||
42 | include/linux/page-flags.h | 15 | ||
43 | include/linux/sched.h | 5 | ||
44 | include/linux/skbuff.h | 8 | ||
45 | include/linux/vermagic.h | 8 | ||
46 | kernel/irq/spurious.c | 2 | ||
47 | kernel/kexec.c | 71 ++- | ||
48 | kernel/softlockup.c | 13 | ||
49 | kernel/sysctl.c | 2 | ||
50 | kernel/timer.c | 8 | ||
51 | mm/memory.c | 38 + | ||
52 | mm/mprotect.c | 2 | ||
53 | mm/page_alloc.c | 30 + | ||
54 | net/core/dev.c | 62 ++ | ||
55 | net/core/skbuff.c | 4 | ||
56 | net/ipv4/netfilter/nf_nat_proto_tcp.c | 3 | ||
57 | net/ipv4/netfilter/nf_nat_proto_udp.c | 4 | ||
58 | net/ipv4/xfrm4_output.c | 2 | ||
59 | scripts/Makefile.build | 14 | ||
60 | scripts/Makefile.lib | 6 | ||
61 | 49 files changed, 2226 insertions(+), 78 deletions(-) | ||
62 | |||
63 | --- a/drivers/Makefile | ||
64 | +++ b/drivers/Makefile | ||
65 | @@ -34,6 +34,7 @@ | ||
66 | obj-$(CONFIG_NUBUS) += nubus/ | ||
67 | obj-$(CONFIG_ATM) += atm/ | ||
68 | obj-y += macintosh/ | ||
69 | +obj-$(CONFIG_XEN) += xen/ | ||
70 | obj-$(CONFIG_IDE) += ide/ | ||
71 | obj-$(CONFIG_SCSI) += scsi/ | ||
72 | obj-$(CONFIG_ATA) += ata/ | ||
73 | --- a/drivers/acpi/hardware/hwsleep.c | ||
74 | +++ b/drivers/acpi/hardware/hwsleep.c | ||
75 | @@ -252,7 +252,11 @@ | ||
76 | u32 PM1Bcontrol; | ||
77 | struct acpi_bit_register_info *sleep_type_reg_info; | ||
78 | struct acpi_bit_register_info *sleep_enable_reg_info; | ||
79 | +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86)) | ||
80 | u32 in_value; | ||
81 | +#else | ||
82 | + int err; | ||
83 | +#endif | ||
84 | struct acpi_object_list arg_list; | ||
85 | union acpi_object arg; | ||
86 | acpi_status status; | ||
87 | @@ -362,6 +366,7 @@ | ||
88 | |||
89 | ACPI_FLUSH_CPU_CACHE(); | ||
90 | |||
91 | +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86)) | ||
92 | status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL, | ||
93 | PM1Acontrol); | ||
94 | if (ACPI_FAILURE(status)) { | ||
95 | @@ -408,6 +413,16 @@ | ||
96 | /* Spin until we wake */ | ||
97 | |||
98 | } while (!in_value); | ||
99 | +#else | ||
100 | + /* PV ACPI just need check hypercall return value */ | ||
101 | + err = acpi_notify_hypervisor_state(sleep_state, | ||
102 | + PM1Acontrol, PM1Bcontrol); | ||
103 | + if (err) { | ||
104 | + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, | ||
105 | + "Hypervisor failure [%d]\n", err)); | ||
106 | + return_ACPI_STATUS(AE_ERROR); | ||
107 | + } | ||
108 | +#endif | ||
109 | |||
110 | return_ACPI_STATUS(AE_OK); | ||
111 | } | ||
112 | --- a/drivers/acpi/sleep/main.c | ||
113 | +++ b/drivers/acpi/sleep/main.c | ||
114 | @@ -31,6 +31,7 @@ | ||
115 | static int acpi_sleep_prepare(u32 acpi_state) | ||
116 | { | ||
117 | #ifdef CONFIG_ACPI_SLEEP | ||
118 | +#ifndef CONFIG_ACPI_PV_SLEEP | ||
119 | /* do we have a wakeup address for S2 and S3? */ | ||
120 | if (acpi_state == ACPI_STATE_S3) { | ||
121 | if (!acpi_wakeup_address) { | ||
122 | @@ -41,6 +42,7 @@ | ||
123 | acpi_wakeup_address)); | ||
124 | |||
125 | } | ||
126 | +#endif | ||
127 | ACPI_FLUSH_CPU_CACHE(); | ||
128 | acpi_enable_wakeup_device_prep(acpi_state); | ||
129 | #endif | ||
130 | @@ -137,7 +139,14 @@ | ||
131 | break; | ||
132 | |||
133 | case ACPI_STATE_S3: | ||
134 | +#ifdef CONFIG_ACPI_PV_SLEEP | ||
135 | + /* Hyperviosr will save and restore CPU context | ||
136 | + * and then we can skip low level housekeeping here. | ||
137 | + */ | ||
138 | + acpi_enter_sleep_state(acpi_state); | ||
139 | +#else | ||
140 | do_suspend_lowlevel(); | ||
141 | +#endif | ||
142 | break; | ||
143 | } | ||
144 | |||
145 | @@ -187,7 +196,7 @@ | ||
146 | |||
147 | acpi_target_sleep_state = ACPI_STATE_S0; | ||
148 | |||
149 | -#ifdef CONFIG_X86 | ||
150 | +#if defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP) | ||
151 | if (init_8259A_after_S1) { | ||
152 | printk("Broken toshiba laptop -> kicking interrupts\n"); | ||
153 | init_8259A(0); | ||
154 | --- a/drivers/char/agp/intel-agp.c | ||
155 | +++ b/drivers/char/agp/intel-agp.c | ||
156 | @@ -230,6 +230,13 @@ | ||
157 | if (page == NULL) | ||
158 | return NULL; | ||
159 | |||
160 | +#ifdef CONFIG_XEN | ||
161 | + if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) { | ||
162 | + __free_pages(page, 2); | ||
163 | + return NULL; | ||
164 | + } | ||
165 | +#endif | ||
166 | + | ||
167 | if (set_pages_uc(page, 4) < 0) { | ||
168 | set_pages_wb(page, 4); | ||
169 | __free_pages(page, 2); | ||
170 | @@ -249,6 +256,9 @@ | ||
171 | |||
172 | page = virt_to_page(addr); | ||
173 | set_pages_wb(page, 4); | ||
174 | +#ifdef CONFIG_XEN | ||
175 | + xen_destroy_contiguous_region((unsigned long)page_address(page), 2); | ||
176 | +#endif | ||
177 | put_page(page); | ||
178 | __free_pages(page, 2); | ||
179 | atomic_dec(&agp_bridge->current_memory_agp); | ||
180 | --- a/drivers/char/mem.c | ||
181 | +++ b/drivers/char/mem.c | ||
182 | @@ -108,6 +108,7 @@ | ||
183 | } | ||
184 | #endif | ||
185 | |||
186 | +#ifndef ARCH_HAS_DEV_MEM | ||
187 | /* | ||
188 | * This funcion reads the *physical* memory. The f_pos points directly to the | ||
189 | * memory location. | ||
190 | @@ -230,6 +231,7 @@ | ||
191 | *ppos += written; | ||
192 | return written; | ||
193 | } | ||
194 | +#endif | ||
195 | |||
196 | #ifndef __HAVE_PHYS_MEM_ACCESS_PROT | ||
197 | static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | ||
198 | @@ -725,6 +727,7 @@ | ||
199 | #define open_kmem open_mem | ||
200 | #define open_oldmem open_mem | ||
201 | |||
202 | +#ifndef ARCH_HAS_DEV_MEM | ||
203 | static const struct file_operations mem_fops = { | ||
204 | .llseek = memory_lseek, | ||
205 | .read = read_mem, | ||
206 | @@ -733,6 +736,9 @@ | ||
207 | .open = open_mem, | ||
208 | .get_unmapped_area = get_unmapped_area_mem, | ||
209 | }; | ||
210 | +#else | ||
211 | +extern const struct file_operations mem_fops; | ||
212 | +#endif | ||
213 | |||
214 | static const struct file_operations kmem_fops = { | ||
215 | .llseek = memory_lseek, | ||
216 | --- a/drivers/char/tpm/Makefile | ||
217 | +++ b/drivers/char/tpm/Makefile | ||
218 | @@ -9,3 +9,5 @@ | ||
219 | obj-$(CONFIG_TCG_NSC) += tpm_nsc.o | ||
220 | obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o | ||
221 | obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o | ||
222 | +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o | ||
223 | +tpm_xenu-y = tpm_xen.o tpm_vtpm.o | ||
224 | --- a/drivers/char/tpm/tpm.h | ||
225 | +++ b/drivers/char/tpm/tpm.h | ||
226 | @@ -107,6 +107,9 @@ | ||
227 | struct dentry **bios_dir; | ||
228 | |||
229 | struct list_head list; | ||
230 | +#ifdef CONFIG_XEN | ||
231 | + void *priv; | ||
232 | +#endif | ||
233 | void (*release) (struct device *); | ||
234 | }; | ||
235 | |||
236 | @@ -124,6 +127,18 @@ | ||
237 | outb(value & 0xFF, base+1); | ||
238 | } | ||
239 | |||
240 | +#ifdef CONFIG_XEN | ||
241 | +static inline void *chip_get_private(const struct tpm_chip *chip) | ||
242 | +{ | ||
243 | + return chip->priv; | ||
244 | +} | ||
245 | + | ||
246 | +static inline void chip_set_private(struct tpm_chip *chip, void *priv) | ||
247 | +{ | ||
248 | + chip->priv = priv; | ||
249 | +} | ||
250 | +#endif | ||
251 | + | ||
252 | extern void tpm_get_timeouts(struct tpm_chip *); | ||
253 | extern void tpm_gen_interrupt(struct tpm_chip *); | ||
254 | extern void tpm_continue_selftest(struct tpm_chip *); | ||
255 | --- /dev/null | ||
256 | +++ b/drivers/char/tpm/tpm_vtpm.c | ||
257 | @@ -0,0 +1,542 @@ | ||
258 | +/* | ||
259 | + * Copyright (C) 2006 IBM Corporation | ||
260 | + * | ||
261 | + * Authors: | ||
262 | + * Stefan Berger <stefanb@us.ibm.com> | ||
263 | + * | ||
264 | + * Generic device driver part for device drivers in a virtualized | ||
265 | + * environment. | ||
266 | + * | ||
267 | + * This program is free software; you can redistribute it and/or | ||
268 | + * modify it under the terms of the GNU General Public License as | ||
269 | + * published by the Free Software Foundation, version 2 of the | ||
270 | + * License. | ||
271 | + * | ||
272 | + */ | ||
273 | + | ||
274 | +#include <asm/uaccess.h> | ||
275 | +#include <linux/list.h> | ||
276 | +#include <linux/device.h> | ||
277 | +#include <linux/interrupt.h> | ||
278 | +#include <linux/platform_device.h> | ||
279 | +#include "tpm.h" | ||
280 | +#include "tpm_vtpm.h" | ||
281 | + | ||
282 | +/* read status bits */ | ||
283 | +enum { | ||
284 | + STATUS_BUSY = 0x01, | ||
285 | + STATUS_DATA_AVAIL = 0x02, | ||
286 | + STATUS_READY = 0x04 | ||
287 | +}; | ||
288 | + | ||
289 | +struct transmission { | ||
290 | + struct list_head next; | ||
291 | + | ||
292 | + unsigned char *request; | ||
293 | + size_t request_len; | ||
294 | + size_t request_buflen; | ||
295 | + | ||
296 | + unsigned char *response; | ||
297 | + size_t response_len; | ||
298 | + size_t response_buflen; | ||
299 | + | ||
300 | + unsigned int flags; | ||
301 | +}; | ||
302 | + | ||
303 | +enum { | ||
304 | + TRANSMISSION_FLAG_WAS_QUEUED = 0x1 | ||
305 | +}; | ||
306 | + | ||
307 | + | ||
308 | +enum { | ||
309 | + DATAEX_FLAG_QUEUED_ONLY = 0x1 | ||
310 | +}; | ||
311 | + | ||
312 | + | ||
313 | +/* local variables */ | ||
314 | + | ||
315 | +/* local function prototypes */ | ||
316 | +static int _vtpm_send_queued(struct tpm_chip *chip); | ||
317 | + | ||
318 | + | ||
319 | +/* ============================================================= | ||
320 | + * Some utility functions | ||
321 | + * ============================================================= | ||
322 | + */ | ||
323 | +static void vtpm_state_init(struct vtpm_state *vtpms) | ||
324 | +{ | ||
325 | + vtpms->current_request = NULL; | ||
326 | + spin_lock_init(&vtpms->req_list_lock); | ||
327 | + init_waitqueue_head(&vtpms->req_wait_queue); | ||
328 | + INIT_LIST_HEAD(&vtpms->queued_requests); | ||
329 | + | ||
330 | + vtpms->current_response = NULL; | ||
331 | + spin_lock_init(&vtpms->resp_list_lock); | ||
332 | + init_waitqueue_head(&vtpms->resp_wait_queue); | ||
333 | + | ||
334 | + vtpms->disconnect_time = jiffies; | ||
335 | +} | ||
336 | + | ||
337 | + | ||
338 | +static inline struct transmission *transmission_alloc(void) | ||
339 | +{ | ||
340 | + return kzalloc(sizeof(struct transmission), GFP_ATOMIC); | ||
341 | +} | ||
342 | + | ||
343 | +static unsigned char * | ||
344 | +transmission_set_req_buffer(struct transmission *t, | ||
345 | + unsigned char *buffer, size_t len) | ||
346 | +{ | ||
347 | + if (t->request_buflen < len) { | ||
348 | + kfree(t->request); | ||
349 | + t->request = kmalloc(len, GFP_KERNEL); | ||
350 | + if (!t->request) { | ||
351 | + t->request_buflen = 0; | ||
352 | + return NULL; | ||
353 | + } | ||
354 | + t->request_buflen = len; | ||
355 | + } | ||
356 | + | ||
357 | + memcpy(t->request, buffer, len); | ||
358 | + t->request_len = len; | ||
359 | + | ||
360 | + return t->request; | ||
361 | +} | ||
362 | + | ||
363 | +static unsigned char * | ||
364 | +transmission_set_res_buffer(struct transmission *t, | ||
365 | + const unsigned char *buffer, size_t len) | ||
366 | +{ | ||
367 | + if (t->response_buflen < len) { | ||
368 | + kfree(t->response); | ||
369 | + t->response = kmalloc(len, GFP_ATOMIC); | ||
370 | + if (!t->response) { | ||
371 | + t->response_buflen = 0; | ||
372 | + return NULL; | ||
373 | + } | ||
374 | + t->response_buflen = len; | ||
375 | + } | ||
376 | + | ||
377 | + memcpy(t->response, buffer, len); | ||
378 | + t->response_len = len; | ||
379 | + | ||
380 | + return t->response; | ||
381 | +} | ||
382 | + | ||
383 | +static inline void transmission_free(struct transmission *t) | ||
384 | +{ | ||
385 | + kfree(t->request); | ||
386 | + kfree(t->response); | ||
387 | + kfree(t); | ||
388 | +} | ||
389 | + | ||
390 | +/* ============================================================= | ||
391 | + * Interface with the lower layer driver | ||
392 | + * ============================================================= | ||
393 | + */ | ||
394 | +/* | ||
395 | + * Lower layer uses this function to make a response available. | ||
396 | + */ | ||
397 | +int vtpm_vd_recv(const struct tpm_chip *chip, | ||
398 | + const unsigned char *buffer, size_t count, | ||
399 | + void *ptr) | ||
400 | +{ | ||
401 | + unsigned long flags; | ||
402 | + int ret_size = 0; | ||
403 | + struct transmission *t; | ||
404 | + struct vtpm_state *vtpms; | ||
405 | + | ||
406 | + vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
407 | + | ||
408 | + /* | ||
409 | + * The list with requests must contain one request | ||
410 | + * only and the element there must be the one that | ||
411 | + * was passed to me from the front-end. | ||
412 | + */ | ||
413 | + spin_lock_irqsave(&vtpms->resp_list_lock, flags); | ||
414 | + if (vtpms->current_request != ptr) { | ||
415 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
416 | + return 0; | ||
417 | + } | ||
418 | + | ||
419 | + if ((t = vtpms->current_request)) { | ||
420 | + transmission_free(t); | ||
421 | + vtpms->current_request = NULL; | ||
422 | + } | ||
423 | + | ||
424 | + t = transmission_alloc(); | ||
425 | + if (t) { | ||
426 | + if (!transmission_set_res_buffer(t, buffer, count)) { | ||
427 | + transmission_free(t); | ||
428 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
429 | + return -ENOMEM; | ||
430 | + } | ||
431 | + ret_size = count; | ||
432 | + vtpms->current_response = t; | ||
433 | + wake_up_interruptible(&vtpms->resp_wait_queue); | ||
434 | + } | ||
435 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
436 | + | ||
437 | + return ret_size; | ||
438 | +} | ||
439 | + | ||
440 | + | ||
441 | +/* | ||
442 | + * Lower layer indicates its status (connected/disconnected) | ||
443 | + */ | ||
444 | +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status) | ||
445 | +{ | ||
446 | + struct vtpm_state *vtpms; | ||
447 | + | ||
448 | + vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
449 | + | ||
450 | + vtpms->vd_status = vd_status; | ||
451 | + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) { | ||
452 | + vtpms->disconnect_time = jiffies; | ||
453 | + } | ||
454 | +} | ||
455 | + | ||
456 | +/* ============================================================= | ||
457 | + * Interface with the generic TPM driver | ||
458 | + * ============================================================= | ||
459 | + */ | ||
460 | +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) | ||
461 | +{ | ||
462 | + int rc = 0; | ||
463 | + unsigned long flags; | ||
464 | + struct vtpm_state *vtpms; | ||
465 | + | ||
466 | + vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
467 | + | ||
468 | + /* | ||
469 | + * Check if the previous operation only queued the command | ||
470 | + * In this case there won't be a response, so I just | ||
471 | + * return from here and reset that flag. In any other | ||
472 | + * case I should receive a response from the back-end. | ||
473 | + */ | ||
474 | + spin_lock_irqsave(&vtpms->resp_list_lock, flags); | ||
475 | + if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) { | ||
476 | + vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY; | ||
477 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
478 | + /* | ||
479 | + * The first few commands (measurements) must be | ||
480 | + * queued since it might not be possible to talk to the | ||
481 | + * TPM, yet. | ||
482 | + * Return a response of up to 30 '0's. | ||
483 | + */ | ||
484 | + | ||
485 | + count = min_t(size_t, count, 30); | ||
486 | + memset(buf, 0x0, count); | ||
487 | + return count; | ||
488 | + } | ||
489 | + /* | ||
490 | + * Check whether something is in the responselist and if | ||
491 | + * there's nothing in the list wait for something to appear. | ||
492 | + */ | ||
493 | + | ||
494 | + if (!vtpms->current_response) { | ||
495 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
496 | + interruptible_sleep_on_timeout(&vtpms->resp_wait_queue, | ||
497 | + 1000); | ||
498 | + spin_lock_irqsave(&vtpms->resp_list_lock ,flags); | ||
499 | + } | ||
500 | + | ||
501 | + if (vtpms->current_response) { | ||
502 | + struct transmission *t = vtpms->current_response; | ||
503 | + vtpms->current_response = NULL; | ||
504 | + rc = min(count, t->response_len); | ||
505 | + memcpy(buf, t->response, rc); | ||
506 | + transmission_free(t); | ||
507 | + } | ||
508 | + | ||
509 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
510 | + return rc; | ||
511 | +} | ||
512 | + | ||
513 | +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) | ||
514 | +{ | ||
515 | + int rc = 0; | ||
516 | + unsigned long flags; | ||
517 | + struct transmission *t = transmission_alloc(); | ||
518 | + struct vtpm_state *vtpms; | ||
519 | + | ||
520 | + vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
521 | + | ||
522 | + if (!t) | ||
523 | + return -ENOMEM; | ||
524 | + /* | ||
525 | + * If there's a current request, it must be the | ||
526 | + * previous request that has timed out. | ||
527 | + */ | ||
528 | + spin_lock_irqsave(&vtpms->req_list_lock, flags); | ||
529 | + if (vtpms->current_request != NULL) { | ||
530 | + printk("WARNING: Sending although there is a request outstanding.\n" | ||
531 | + " Previous request must have timed out.\n"); | ||
532 | + transmission_free(vtpms->current_request); | ||
533 | + vtpms->current_request = NULL; | ||
534 | + } | ||
535 | + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); | ||
536 | + | ||
537 | + /* | ||
538 | + * Queue the packet if the driver below is not | ||
539 | + * ready, yet, or there is any packet already | ||
540 | + * in the queue. | ||
541 | + * If the driver below is ready, unqueue all | ||
542 | + * packets first before sending our current | ||
543 | + * packet. | ||
544 | + * For each unqueued packet, except for the | ||
545 | + * last (=current) packet, call the function | ||
546 | + * tpm_xen_recv to wait for the response to come | ||
547 | + * back. | ||
548 | + */ | ||
549 | + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) { | ||
550 | + if (time_after(jiffies, | ||
551 | + vtpms->disconnect_time + HZ * 10)) { | ||
552 | + rc = -ENOENT; | ||
553 | + } else { | ||
554 | + goto queue_it; | ||
555 | + } | ||
556 | + } else { | ||
557 | + /* | ||
558 | + * Send all queued packets. | ||
559 | + */ | ||
560 | + if (_vtpm_send_queued(chip) == 0) { | ||
561 | + | ||
562 | + vtpms->current_request = t; | ||
563 | + | ||
564 | + rc = vtpm_vd_send(vtpms->tpm_private, | ||
565 | + buf, | ||
566 | + count, | ||
567 | + t); | ||
568 | + /* | ||
569 | + * The generic TPM driver will call | ||
570 | + * the function to receive the response. | ||
571 | + */ | ||
572 | + if (rc < 0) { | ||
573 | + vtpms->current_request = NULL; | ||
574 | + goto queue_it; | ||
575 | + } | ||
576 | + } else { | ||
577 | +queue_it: | ||
578 | + if (!transmission_set_req_buffer(t, buf, count)) { | ||
579 | + transmission_free(t); | ||
580 | + rc = -ENOMEM; | ||
581 | + goto exit; | ||
582 | + } | ||
583 | + /* | ||
584 | + * An error occurred. Don't event try | ||
585 | + * to send the current request. Just | ||
586 | + * queue it. | ||
587 | + */ | ||
588 | + spin_lock_irqsave(&vtpms->req_list_lock, flags); | ||
589 | + vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY; | ||
590 | + list_add_tail(&t->next, &vtpms->queued_requests); | ||
591 | + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); | ||
592 | + } | ||
593 | + } | ||
594 | + | ||
595 | +exit: | ||
596 | + return rc; | ||
597 | +} | ||
598 | + | ||
599 | + | ||
600 | +/* | ||
601 | + * Send all queued requests. | ||
602 | + */ | ||
603 | +static int _vtpm_send_queued(struct tpm_chip *chip) | ||
604 | +{ | ||
605 | + int rc; | ||
606 | + int error = 0; | ||
607 | + long flags; | ||
608 | + unsigned char buffer[1]; | ||
609 | + struct vtpm_state *vtpms; | ||
610 | + vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
611 | + | ||
612 | + spin_lock_irqsave(&vtpms->req_list_lock, flags); | ||
613 | + | ||
614 | + while (!list_empty(&vtpms->queued_requests)) { | ||
615 | + /* | ||
616 | + * Need to dequeue them. | ||
617 | + * Read the result into a dummy buffer. | ||
618 | + */ | ||
619 | + struct transmission *qt = (struct transmission *) | ||
620 | + vtpms->queued_requests.next; | ||
621 | + list_del(&qt->next); | ||
622 | + vtpms->current_request = qt; | ||
623 | + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); | ||
624 | + | ||
625 | + rc = vtpm_vd_send(vtpms->tpm_private, | ||
626 | + qt->request, | ||
627 | + qt->request_len, | ||
628 | + qt); | ||
629 | + | ||
630 | + if (rc < 0) { | ||
631 | + spin_lock_irqsave(&vtpms->req_list_lock, flags); | ||
632 | + if ((qt = vtpms->current_request) != NULL) { | ||
633 | + /* | ||
634 | + * requeue it at the beginning | ||
635 | + * of the list | ||
636 | + */ | ||
637 | + list_add(&qt->next, | ||
638 | + &vtpms->queued_requests); | ||
639 | + } | ||
640 | + vtpms->current_request = NULL; | ||
641 | + error = 1; | ||
642 | + break; | ||
643 | + } | ||
644 | + /* | ||
645 | + * After this point qt is not valid anymore! | ||
646 | + * It is freed when the front-end is delivering | ||
647 | + * the data by calling tpm_recv | ||
648 | + */ | ||
649 | + /* | ||
650 | + * Receive response into provided dummy buffer | ||
651 | + */ | ||
652 | + rc = vtpm_recv(chip, buffer, sizeof(buffer)); | ||
653 | + spin_lock_irqsave(&vtpms->req_list_lock, flags); | ||
654 | + } | ||
655 | + | ||
656 | + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); | ||
657 | + | ||
658 | + return error; | ||
659 | +} | ||
660 | + | ||
661 | +static void vtpm_cancel(struct tpm_chip *chip) | ||
662 | +{ | ||
663 | + unsigned long flags; | ||
664 | + struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
665 | + | ||
666 | + spin_lock_irqsave(&vtpms->resp_list_lock,flags); | ||
667 | + | ||
668 | + if (!vtpms->current_response && vtpms->current_request) { | ||
669 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
670 | + interruptible_sleep_on(&vtpms->resp_wait_queue); | ||
671 | + spin_lock_irqsave(&vtpms->resp_list_lock,flags); | ||
672 | + } | ||
673 | + | ||
674 | + if (vtpms->current_response) { | ||
675 | + struct transmission *t = vtpms->current_response; | ||
676 | + vtpms->current_response = NULL; | ||
677 | + transmission_free(t); | ||
678 | + } | ||
679 | + | ||
680 | + spin_unlock_irqrestore(&vtpms->resp_list_lock,flags); | ||
681 | +} | ||
682 | + | ||
683 | +static u8 vtpm_status(struct tpm_chip *chip) | ||
684 | +{ | ||
685 | + u8 rc = 0; | ||
686 | + unsigned long flags; | ||
687 | + struct vtpm_state *vtpms; | ||
688 | + | ||
689 | + vtpms = (struct vtpm_state *)chip_get_private(chip); | ||
690 | + | ||
691 | + spin_lock_irqsave(&vtpms->resp_list_lock, flags); | ||
692 | + /* | ||
693 | + * Data are available if: | ||
694 | + * - there's a current response | ||
695 | + * - the last packet was queued only (this is fake, but necessary to | ||
696 | + * get the generic TPM layer to call the receive function.) | ||
697 | + */ | ||
698 | + if (vtpms->current_response || | ||
699 | + 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) { | ||
700 | + rc = STATUS_DATA_AVAIL; | ||
701 | + } else if (!vtpms->current_response && !vtpms->current_request) { | ||
702 | + rc = STATUS_READY; | ||
703 | + } | ||
704 | + | ||
705 | + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); | ||
706 | + return rc; | ||
707 | +} | ||
708 | + | ||
709 | +static struct file_operations vtpm_ops = { | ||
710 | + .owner = THIS_MODULE, | ||
711 | + .llseek = no_llseek, | ||
712 | + .open = tpm_open, | ||
713 | + .read = tpm_read, | ||
714 | + .write = tpm_write, | ||
715 | + .release = tpm_release, | ||
716 | +}; | ||
717 | + | ||
718 | +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); | ||
719 | +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); | ||
720 | +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); | ||
721 | +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); | ||
722 | +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); | ||
723 | +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, | ||
724 | + NULL); | ||
725 | +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); | ||
726 | +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel); | ||
727 | + | ||
728 | +static struct attribute *vtpm_attrs[] = { | ||
729 | + &dev_attr_pubek.attr, | ||
730 | + &dev_attr_pcrs.attr, | ||
731 | + &dev_attr_enabled.attr, | ||
732 | + &dev_attr_active.attr, | ||
733 | + &dev_attr_owned.attr, | ||
734 | + &dev_attr_temp_deactivated.attr, | ||
735 | + &dev_attr_caps.attr, | ||
736 | + &dev_attr_cancel.attr, | ||
737 | + NULL, | ||
738 | +}; | ||
739 | + | ||
740 | +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs }; | ||
741 | + | ||
742 | +#define TPM_LONG_TIMEOUT (10 * 60 * HZ) | ||
743 | + | ||
744 | +static struct tpm_vendor_specific tpm_vtpm = { | ||
745 | + .recv = vtpm_recv, | ||
746 | + .send = vtpm_send, | ||
747 | + .cancel = vtpm_cancel, | ||
748 | + .status = vtpm_status, | ||
749 | + .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL, | ||
750 | + .req_complete_val = STATUS_DATA_AVAIL, | ||
751 | + .req_canceled = STATUS_READY, | ||
752 | + .attr_group = &vtpm_attr_grp, | ||
753 | + .miscdev = { | ||
754 | + .fops = &vtpm_ops, | ||
755 | + }, | ||
756 | + .duration = { | ||
757 | + TPM_LONG_TIMEOUT, | ||
758 | + TPM_LONG_TIMEOUT, | ||
759 | + TPM_LONG_TIMEOUT, | ||
760 | + }, | ||
761 | +}; | ||
762 | + | ||
763 | +struct tpm_chip *init_vtpm(struct device *dev, | ||
764 | + struct tpm_private *tp) | ||
765 | +{ | ||
766 | + long rc; | ||
767 | + struct tpm_chip *chip; | ||
768 | + struct vtpm_state *vtpms; | ||
769 | + | ||
770 | + vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL); | ||
771 | + if (!vtpms) | ||
772 | + return ERR_PTR(-ENOMEM); | ||
773 | + | ||
774 | + vtpm_state_init(vtpms); | ||
775 | + vtpms->tpm_private = tp; | ||
776 | + | ||
777 | + chip = tpm_register_hardware(dev, &tpm_vtpm); | ||
778 | + if (!chip) { | ||
779 | + rc = -ENODEV; | ||
780 | + goto err_free_mem; | ||
781 | + } | ||
782 | + | ||
783 | + chip_set_private(chip, vtpms); | ||
784 | + | ||
785 | + return chip; | ||
786 | + | ||
787 | +err_free_mem: | ||
788 | + kfree(vtpms); | ||
789 | + | ||
790 | + return ERR_PTR(rc); | ||
791 | +} | ||
792 | + | ||
793 | +void cleanup_vtpm(struct device *dev) | ||
794 | +{ | ||
795 | + struct tpm_chip *chip = dev_get_drvdata(dev); | ||
796 | + struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip); | ||
797 | + tpm_remove_hardware(dev); | ||
798 | + kfree(vtpms); | ||
799 | +} | ||
800 | --- /dev/null | ||
801 | +++ b/drivers/char/tpm/tpm_vtpm.h | ||
802 | @@ -0,0 +1,55 @@ | ||
803 | +#ifndef TPM_VTPM_H | ||
804 | +#define TPM_VTPM_H | ||
805 | + | ||
806 | +struct tpm_chip; | ||
807 | +struct tpm_private; | ||
808 | + | ||
809 | +struct vtpm_state { | ||
810 | + struct transmission *current_request; | ||
811 | + spinlock_t req_list_lock; | ||
812 | + wait_queue_head_t req_wait_queue; | ||
813 | + | ||
814 | + struct list_head queued_requests; | ||
815 | + | ||
816 | + struct transmission *current_response; | ||
817 | + spinlock_t resp_list_lock; | ||
818 | + wait_queue_head_t resp_wait_queue; // processes waiting for responses | ||
819 | + | ||
820 | + u8 vd_status; | ||
821 | + u8 flags; | ||
822 | + | ||
823 | + unsigned long disconnect_time; | ||
824 | + | ||
825 | + /* | ||
826 | + * The following is a private structure of the underlying | ||
827 | + * driver. It is passed as parameter in the send function. | ||
828 | + */ | ||
829 | + struct tpm_private *tpm_private; | ||
830 | +}; | ||
831 | + | ||
832 | + | ||
833 | +enum vdev_status { | ||
834 | + TPM_VD_STATUS_DISCONNECTED = 0x0, | ||
835 | + TPM_VD_STATUS_CONNECTED = 0x1 | ||
836 | +}; | ||
837 | + | ||
838 | +/* this function is called from tpm_vtpm.c */ | ||
839 | +int vtpm_vd_send(struct tpm_private * tp, | ||
840 | + const u8 * buf, size_t count, void *ptr); | ||
841 | + | ||
842 | +/* these functions are offered by tpm_vtpm.c */ | ||
843 | +struct tpm_chip *init_vtpm(struct device *, | ||
844 | + struct tpm_private *); | ||
845 | +void cleanup_vtpm(struct device *); | ||
846 | +int vtpm_vd_recv(const struct tpm_chip* chip, | ||
847 | + const unsigned char *buffer, size_t count, void *ptr); | ||
848 | +void vtpm_vd_status(const struct tpm_chip *, u8 status); | ||
849 | + | ||
850 | +static inline struct tpm_private *tpm_private_from_dev(struct device *dev) | ||
851 | +{ | ||
852 | + struct tpm_chip *chip = dev_get_drvdata(dev); | ||
853 | + struct vtpm_state *vtpms = chip_get_private(chip); | ||
854 | + return vtpms->tpm_private; | ||
855 | +} | ||
856 | + | ||
857 | +#endif | ||
858 | --- /dev/null | ||
859 | +++ b/drivers/char/tpm/tpm_xen.c | ||
860 | @@ -0,0 +1,722 @@ | ||
861 | +/* | ||
862 | + * Copyright (c) 2005, IBM Corporation | ||
863 | + * | ||
864 | + * Author: Stefan Berger, stefanb@us.ibm.com | ||
865 | + * Grant table support: Mahadevan Gomathisankaran | ||
866 | + * | ||
867 | + * This code has been derived from drivers/xen/netfront/netfront.c | ||
868 | + * | ||
869 | + * Copyright (c) 2002-2004, K A Fraser | ||
870 | + * | ||
871 | + * This program is free software; you can redistribute it and/or | ||
872 | + * modify it under the terms of the GNU General Public License version 2 | ||
873 | + * as published by the Free Software Foundation; or, when distributed | ||
874 | + * separately from the Linux kernel or incorporated into other | ||
875 | + * software packages, subject to the following license: | ||
876 | + * | ||
877 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
878 | + * of this source file (the "Software"), to deal in the Software without | ||
879 | + * restriction, including without limitation the rights to use, copy, modify, | ||
880 | + * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
881 | + * and to permit persons to whom the Software is furnished to do so, subject to | ||
882 | + * the following conditions: | ||
883 | + * | ||
884 | + * The above copyright notice and this permission notice shall be included in | ||
885 | + * all copies or substantial portions of the Software. | ||
886 | + * | ||
887 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
888 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
889 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
890 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
891 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
892 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
893 | + * IN THE SOFTWARE. | ||
894 | + */ | ||
895 | + | ||
896 | +#include <linux/errno.h> | ||
897 | +#include <linux/err.h> | ||
898 | +#include <linux/interrupt.h> | ||
899 | +#include <linux/mutex.h> | ||
900 | +#include <asm/uaccess.h> | ||
901 | +#include <xen/evtchn.h> | ||
902 | +#include <xen/interface/grant_table.h> | ||
903 | +#include <xen/interface/io/tpmif.h> | ||
904 | +#include <xen/gnttab.h> | ||
905 | +#include <xen/xenbus.h> | ||
906 | +#include "tpm.h" | ||
907 | +#include "tpm_vtpm.h" | ||
908 | + | ||
909 | +#undef DEBUG | ||
910 | + | ||
911 | +/* local structures */ | ||
912 | +struct tpm_private { | ||
913 | + struct tpm_chip *chip; | ||
914 | + | ||
915 | + tpmif_tx_interface_t *tx; | ||
916 | + atomic_t refcnt; | ||
917 | + unsigned int irq; | ||
918 | + u8 is_connected; | ||
919 | + u8 is_suspended; | ||
920 | + | ||
921 | + spinlock_t tx_lock; | ||
922 | + | ||
923 | + struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE]; | ||
924 | + | ||
925 | + atomic_t tx_busy; | ||
926 | + void *tx_remember; | ||
927 | + | ||
928 | + domid_t backend_id; | ||
929 | + wait_queue_head_t wait_q; | ||
930 | + | ||
931 | + struct xenbus_device *dev; | ||
932 | + int ring_ref; | ||
933 | +}; | ||
934 | + | ||
935 | +struct tx_buffer { | ||
936 | + unsigned int size; // available space in data | ||
937 | + unsigned int len; // used space in data | ||
938 | + unsigned char *data; // pointer to a page | ||
939 | +}; | ||
940 | + | ||
941 | + | ||
942 | +/* locally visible variables */ | ||
943 | +static grant_ref_t gref_head; | ||
944 | +static struct tpm_private *my_priv; | ||
945 | + | ||
946 | +/* local function prototypes */ | ||
947 | +static irqreturn_t tpmif_int(int irq, | ||
948 | + void *tpm_priv, | ||
949 | + struct pt_regs *ptregs); | ||
950 | +static void tpmif_rx_action(unsigned long unused); | ||
951 | +static int tpmif_connect(struct xenbus_device *dev, | ||
952 | + struct tpm_private *tp, | ||
953 | + domid_t domid); | ||
954 | +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0); | ||
955 | +static int tpmif_allocate_tx_buffers(struct tpm_private *tp); | ||
956 | +static void tpmif_free_tx_buffers(struct tpm_private *tp); | ||
957 | +static void tpmif_set_connected_state(struct tpm_private *tp, | ||
958 | + u8 newstate); | ||
959 | +static int tpm_xmit(struct tpm_private *tp, | ||
960 | + const u8 * buf, size_t count, int userbuffer, | ||
961 | + void *remember); | ||
962 | +static void destroy_tpmring(struct tpm_private *tp); | ||
963 | +void __exit tpmif_exit(void); | ||
964 | + | ||
965 | +#define DPRINTK(fmt, args...) \ | ||
966 | + pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args) | ||
967 | +#define IPRINTK(fmt, args...) \ | ||
968 | + printk(KERN_INFO "xen_tpm_fr: " fmt, ##args) | ||
969 | +#define WPRINTK(fmt, args...) \ | ||
970 | + printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args) | ||
971 | + | ||
972 | +#define GRANT_INVALID_REF 0 | ||
973 | + | ||
974 | + | ||
975 | +static inline int | ||
976 | +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len, | ||
977 | + int isuserbuffer) | ||
978 | +{ | ||
979 | + int copied = len; | ||
980 | + | ||
981 | + if (len > txb->size) | ||
982 | + copied = txb->size; | ||
983 | + if (isuserbuffer) { | ||
984 | + if (copy_from_user(txb->data, src, copied)) | ||
985 | + return -EFAULT; | ||
986 | + } else { | ||
987 | + memcpy(txb->data, src, copied); | ||
988 | + } | ||
989 | + txb->len = len; | ||
990 | + return copied; | ||
991 | +} | ||
992 | + | ||
993 | +static inline struct tx_buffer *tx_buffer_alloc(void) | ||
994 | +{ | ||
995 | + struct tx_buffer *txb; | ||
996 | + | ||
997 | + txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL); | ||
998 | + if (!txb) | ||
999 | + return NULL; | ||
1000 | + | ||
1001 | + txb->len = 0; | ||
1002 | + txb->size = PAGE_SIZE; | ||
1003 | + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL); | ||
1004 | + if (txb->data == NULL) { | ||
1005 | + kfree(txb); | ||
1006 | + txb = NULL; | ||
1007 | + } | ||
1008 | + | ||
1009 | + return txb; | ||
1010 | +} | ||
1011 | + | ||
1012 | + | ||
1013 | +static inline void tx_buffer_free(struct tx_buffer *txb) | ||
1014 | +{ | ||
1015 | + if (txb) { | ||
1016 | + free_page((long)txb->data); | ||
1017 | + kfree(txb); | ||
1018 | + } | ||
1019 | +} | ||
1020 | + | ||
1021 | +/************************************************************** | ||
1022 | + Utility function for the tpm_private structure | ||
1023 | +**************************************************************/ | ||
1024 | +static void tpm_private_init(struct tpm_private *tp) | ||
1025 | +{ | ||
1026 | + spin_lock_init(&tp->tx_lock); | ||
1027 | + init_waitqueue_head(&tp->wait_q); | ||
1028 | + atomic_set(&tp->refcnt, 1); | ||
1029 | +} | ||
1030 | + | ||
1031 | +static void tpm_private_put(void) | ||
1032 | +{ | ||
1033 | + if (!atomic_dec_and_test(&my_priv->refcnt)) | ||
1034 | + return; | ||
1035 | + | ||
1036 | + tpmif_free_tx_buffers(my_priv); | ||
1037 | + kfree(my_priv); | ||
1038 | + my_priv = NULL; | ||
1039 | +} | ||
1040 | + | ||
1041 | +static struct tpm_private *tpm_private_get(void) | ||
1042 | +{ | ||
1043 | + int err; | ||
1044 | + | ||
1045 | + if (my_priv) { | ||
1046 | + atomic_inc(&my_priv->refcnt); | ||
1047 | + return my_priv; | ||
1048 | + } | ||
1049 | + | ||
1050 | + my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL); | ||
1051 | + if (!my_priv) | ||
1052 | + return NULL; | ||
1053 | + | ||
1054 | + tpm_private_init(my_priv); | ||
1055 | + err = tpmif_allocate_tx_buffers(my_priv); | ||
1056 | + if (err < 0) | ||
1057 | + tpm_private_put(); | ||
1058 | + | ||
1059 | + return my_priv; | ||
1060 | +} | ||
1061 | + | ||
1062 | +/************************************************************** | ||
1063 | + | ||
1064 | + The interface to let the tpm plugin register its callback | ||
1065 | + function and send data to another partition using this module | ||
1066 | + | ||
1067 | +**************************************************************/ | ||
1068 | + | ||
1069 | +static DEFINE_MUTEX(suspend_lock); | ||
1070 | +/* | ||
1071 | + * Send data via this module by calling this function | ||
1072 | + */ | ||
1073 | +int vtpm_vd_send(struct tpm_private *tp, | ||
1074 | + const u8 * buf, size_t count, void *ptr) | ||
1075 | +{ | ||
1076 | + int sent; | ||
1077 | + | ||
1078 | + mutex_lock(&suspend_lock); | ||
1079 | + sent = tpm_xmit(tp, buf, count, 0, ptr); | ||
1080 | + mutex_unlock(&suspend_lock); | ||
1081 | + | ||
1082 | + return sent; | ||
1083 | +} | ||
1084 | + | ||
1085 | +/************************************************************** | ||
1086 | + XENBUS support code | ||
1087 | +**************************************************************/ | ||
1088 | + | ||
1089 | +static int setup_tpmring(struct xenbus_device *dev, | ||
1090 | + struct tpm_private *tp) | ||
1091 | +{ | ||
1092 | + tpmif_tx_interface_t *sring; | ||
1093 | + int err; | ||
1094 | + | ||
1095 | + tp->ring_ref = GRANT_INVALID_REF; | ||
1096 | + | ||
1097 | + sring = (void *)__get_free_page(GFP_KERNEL); | ||
1098 | + if (!sring) { | ||
1099 | + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); | ||
1100 | + return -ENOMEM; | ||
1101 | + } | ||
1102 | + tp->tx = sring; | ||
1103 | + | ||
1104 | + err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx)); | ||
1105 | + if (err < 0) { | ||
1106 | + free_page((unsigned long)sring); | ||
1107 | + tp->tx = NULL; | ||
1108 | + xenbus_dev_fatal(dev, err, "allocating grant reference"); | ||
1109 | + goto fail; | ||
1110 | + } | ||
1111 | + tp->ring_ref = err; | ||
1112 | + | ||
1113 | + err = tpmif_connect(dev, tp, dev->otherend_id); | ||
1114 | + if (err) | ||
1115 | + goto fail; | ||
1116 | + | ||
1117 | + return 0; | ||
1118 | +fail: | ||
1119 | + destroy_tpmring(tp); | ||
1120 | + return err; | ||
1121 | +} | ||
1122 | + | ||
1123 | + | ||
1124 | +static void destroy_tpmring(struct tpm_private *tp) | ||
1125 | +{ | ||
1126 | + tpmif_set_connected_state(tp, 0); | ||
1127 | + | ||
1128 | + if (tp->ring_ref != GRANT_INVALID_REF) { | ||
1129 | + gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx); | ||
1130 | + tp->ring_ref = GRANT_INVALID_REF; | ||
1131 | + tp->tx = NULL; | ||
1132 | + } | ||
1133 | + | ||
1134 | + if (tp->irq) | ||
1135 | + unbind_from_irqhandler(tp->irq, tp); | ||
1136 | + | ||
1137 | + tp->irq = 0; | ||
1138 | +} | ||
1139 | + | ||
1140 | + | ||
1141 | +static int talk_to_backend(struct xenbus_device *dev, | ||
1142 | + struct tpm_private *tp) | ||
1143 | +{ | ||
1144 | + const char *message = NULL; | ||
1145 | + int err; | ||
1146 | + struct xenbus_transaction xbt; | ||
1147 | + | ||
1148 | + err = setup_tpmring(dev, tp); | ||
1149 | + if (err) { | ||
1150 | + xenbus_dev_fatal(dev, err, "setting up ring"); | ||
1151 | + goto out; | ||
1152 | + } | ||
1153 | + | ||
1154 | +again: | ||
1155 | + err = xenbus_transaction_start(&xbt); | ||
1156 | + if (err) { | ||
1157 | + xenbus_dev_fatal(dev, err, "starting transaction"); | ||
1158 | + goto destroy_tpmring; | ||
1159 | + } | ||
1160 | + | ||
1161 | + err = xenbus_printf(xbt, dev->nodename, | ||
1162 | + "ring-ref","%u", tp->ring_ref); | ||
1163 | + if (err) { | ||
1164 | + message = "writing ring-ref"; | ||
1165 | + goto abort_transaction; | ||
1166 | + } | ||
1167 | + | ||
1168 | + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | ||
1169 | + irq_to_evtchn_port(tp->irq)); | ||
1170 | + if (err) { | ||
1171 | + message = "writing event-channel"; | ||
1172 | + goto abort_transaction; | ||
1173 | + } | ||
1174 | + | ||
1175 | + err = xenbus_transaction_end(xbt, 0); | ||
1176 | + if (err == -EAGAIN) | ||
1177 | + goto again; | ||
1178 | + if (err) { | ||
1179 | + xenbus_dev_fatal(dev, err, "completing transaction"); | ||
1180 | + goto destroy_tpmring; | ||
1181 | + } | ||
1182 | + | ||
1183 | + xenbus_switch_state(dev, XenbusStateConnected); | ||
1184 | + | ||
1185 | + return 0; | ||
1186 | + | ||
1187 | +abort_transaction: | ||
1188 | + xenbus_transaction_end(xbt, 1); | ||
1189 | + if (message) | ||
1190 | + xenbus_dev_error(dev, err, "%s", message); | ||
1191 | +destroy_tpmring: | ||
1192 | + destroy_tpmring(tp); | ||
1193 | +out: | ||
1194 | + return err; | ||
1195 | +} | ||
1196 | + | ||
1197 | +/** | ||
1198 | + * Callback received when the backend's state changes. | ||
1199 | + */ | ||
1200 | +static void backend_changed(struct xenbus_device *dev, | ||
1201 | + enum xenbus_state backend_state) | ||
1202 | +{ | ||
1203 | + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); | ||
1204 | + DPRINTK("\n"); | ||
1205 | + | ||
1206 | + switch (backend_state) { | ||
1207 | + case XenbusStateInitialising: | ||
1208 | + case XenbusStateInitWait: | ||
1209 | + case XenbusStateInitialised: | ||
1210 | + case XenbusStateReconfiguring: | ||
1211 | + case XenbusStateReconfigured: | ||
1212 | + case XenbusStateUnknown: | ||
1213 | + break; | ||
1214 | + | ||
1215 | + case XenbusStateConnected: | ||
1216 | + tpmif_set_connected_state(tp, 1); | ||
1217 | + break; | ||
1218 | + | ||
1219 | + case XenbusStateClosing: | ||
1220 | + tpmif_set_connected_state(tp, 0); | ||
1221 | + xenbus_frontend_closed(dev); | ||
1222 | + break; | ||
1223 | + | ||
1224 | + case XenbusStateClosed: | ||
1225 | + tpmif_set_connected_state(tp, 0); | ||
1226 | + if (tp->is_suspended == 0) | ||
1227 | + device_unregister(&dev->dev); | ||
1228 | + xenbus_frontend_closed(dev); | ||
1229 | + break; | ||
1230 | + } | ||
1231 | +} | ||
1232 | + | ||
1233 | +static int tpmfront_probe(struct xenbus_device *dev, | ||
1234 | + const struct xenbus_device_id *id) | ||
1235 | +{ | ||
1236 | + int err; | ||
1237 | + int handle; | ||
1238 | + struct tpm_private *tp = tpm_private_get(); | ||
1239 | + | ||
1240 | + if (!tp) | ||
1241 | + return -ENOMEM; | ||
1242 | + | ||
1243 | + tp->chip = init_vtpm(&dev->dev, tp); | ||
1244 | + if (IS_ERR(tp->chip)) | ||
1245 | + return PTR_ERR(tp->chip); | ||
1246 | + | ||
1247 | + err = xenbus_scanf(XBT_NIL, dev->nodename, | ||
1248 | + "handle", "%i", &handle); | ||
1249 | + if (XENBUS_EXIST_ERR(err)) | ||
1250 | + return err; | ||
1251 | + | ||
1252 | + if (err < 0) { | ||
1253 | + xenbus_dev_fatal(dev,err,"reading virtual-device"); | ||
1254 | + return err; | ||
1255 | + } | ||
1256 | + | ||
1257 | + tp->dev = dev; | ||
1258 | + | ||
1259 | + err = talk_to_backend(dev, tp); | ||
1260 | + if (err) { | ||
1261 | + tpm_private_put(); | ||
1262 | + return err; | ||
1263 | + } | ||
1264 | + | ||
1265 | + return 0; | ||
1266 | +} | ||
1267 | + | ||
1268 | + | ||
1269 | +static int tpmfront_remove(struct xenbus_device *dev) | ||
1270 | +{ | ||
1271 | + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); | ||
1272 | + destroy_tpmring(tp); | ||
1273 | + cleanup_vtpm(&dev->dev); | ||
1274 | + return 0; | ||
1275 | +} | ||
1276 | + | ||
1277 | +static int tpmfront_suspend(struct xenbus_device *dev) | ||
1278 | +{ | ||
1279 | + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); | ||
1280 | + u32 ctr; | ||
1281 | + | ||
1282 | + /* Take the lock, preventing any application from sending. */ | ||
1283 | + mutex_lock(&suspend_lock); | ||
1284 | + tp->is_suspended = 1; | ||
1285 | + | ||
1286 | + for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) { | ||
1287 | + if ((ctr % 10) == 0) | ||
1288 | + printk("TPM-FE [INFO]: Waiting for outstanding " | ||
1289 | + "request.\n"); | ||
1290 | + /* Wait for a request to be responded to. */ | ||
1291 | + interruptible_sleep_on_timeout(&tp->wait_q, 100); | ||
1292 | + } | ||
1293 | + | ||
1294 | + return 0; | ||
1295 | +} | ||
1296 | + | ||
1297 | +static int tpmfront_suspend_finish(struct tpm_private *tp) | ||
1298 | +{ | ||
1299 | + tp->is_suspended = 0; | ||
1300 | + /* Allow applications to send again. */ | ||
1301 | + mutex_unlock(&suspend_lock); | ||
1302 | + return 0; | ||
1303 | +} | ||
1304 | + | ||
1305 | +static int tpmfront_suspend_cancel(struct xenbus_device *dev) | ||
1306 | +{ | ||
1307 | + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); | ||
1308 | + return tpmfront_suspend_finish(tp); | ||
1309 | +} | ||
1310 | + | ||
1311 | +static int tpmfront_resume(struct xenbus_device *dev) | ||
1312 | +{ | ||
1313 | + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); | ||
1314 | + destroy_tpmring(tp); | ||
1315 | + return talk_to_backend(dev, tp); | ||
1316 | +} | ||
1317 | + | ||
1318 | +static int tpmif_connect(struct xenbus_device *dev, | ||
1319 | + struct tpm_private *tp, | ||
1320 | + domid_t domid) | ||
1321 | +{ | ||
1322 | + int err; | ||
1323 | + | ||
1324 | + tp->backend_id = domid; | ||
1325 | + | ||
1326 | + err = bind_listening_port_to_irqhandler( | ||
1327 | + domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); | ||
1328 | + if (err <= 0) { | ||
1329 | + WPRINTK("bind_listening_port_to_irqhandler failed " | ||
1330 | + "(err=%d)\n", err); | ||
1331 | + return err; | ||
1332 | + } | ||
1333 | + tp->irq = err; | ||
1334 | + | ||
1335 | + return 0; | ||
1336 | +} | ||
1337 | + | ||
1338 | +static struct xenbus_device_id tpmfront_ids[] = { | ||
1339 | + { "vtpm" }, | ||
1340 | + { "" } | ||
1341 | +}; | ||
1342 | + | ||
1343 | +static struct xenbus_driver tpmfront = { | ||
1344 | + .name = "vtpm", | ||
1345 | + .owner = THIS_MODULE, | ||
1346 | + .ids = tpmfront_ids, | ||
1347 | + .probe = tpmfront_probe, | ||
1348 | + .remove = tpmfront_remove, | ||
1349 | + .resume = tpmfront_resume, | ||
1350 | + .otherend_changed = backend_changed, | ||
1351 | + .suspend = tpmfront_suspend, | ||
1352 | + .suspend_cancel = tpmfront_suspend_cancel, | ||
1353 | +}; | ||
1354 | + | ||
1355 | +static void __init init_tpm_xenbus(void) | ||
1356 | +{ | ||
1357 | + xenbus_register_frontend(&tpmfront); | ||
1358 | +} | ||
1359 | + | ||
1360 | +static int tpmif_allocate_tx_buffers(struct tpm_private *tp) | ||
1361 | +{ | ||
1362 | + unsigned int i; | ||
1363 | + | ||
1364 | + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) { | ||
1365 | + tp->tx_buffers[i] = tx_buffer_alloc(); | ||
1366 | + if (!tp->tx_buffers[i]) { | ||
1367 | + tpmif_free_tx_buffers(tp); | ||
1368 | + return -ENOMEM; | ||
1369 | + } | ||
1370 | + } | ||
1371 | + return 0; | ||
1372 | +} | ||
1373 | + | ||
1374 | +static void tpmif_free_tx_buffers(struct tpm_private *tp) | ||
1375 | +{ | ||
1376 | + unsigned int i; | ||
1377 | + | ||
1378 | + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) | ||
1379 | + tx_buffer_free(tp->tx_buffers[i]); | ||
1380 | +} | ||
1381 | + | ||
1382 | +static void tpmif_rx_action(unsigned long priv) | ||
1383 | +{ | ||
1384 | + struct tpm_private *tp = (struct tpm_private *)priv; | ||
1385 | + int i = 0; | ||
1386 | + unsigned int received; | ||
1387 | + unsigned int offset = 0; | ||
1388 | + u8 *buffer; | ||
1389 | + tpmif_tx_request_t *tx = &tp->tx->ring[i].req; | ||
1390 | + | ||
1391 | + atomic_set(&tp->tx_busy, 0); | ||
1392 | + wake_up_interruptible(&tp->wait_q); | ||
1393 | + | ||
1394 | + received = tx->size; | ||
1395 | + | ||
1396 | + buffer = kmalloc(received, GFP_ATOMIC); | ||
1397 | + if (!buffer) | ||
1398 | + return; | ||
1399 | + | ||
1400 | + for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) { | ||
1401 | + struct tx_buffer *txb = tp->tx_buffers[i]; | ||
1402 | + tpmif_tx_request_t *tx; | ||
1403 | + unsigned int tocopy; | ||
1404 | + | ||
1405 | + tx = &tp->tx->ring[i].req; | ||
1406 | + tocopy = tx->size; | ||
1407 | + if (tocopy > PAGE_SIZE) | ||
1408 | + tocopy = PAGE_SIZE; | ||
1409 | + | ||
1410 | + memcpy(&buffer[offset], txb->data, tocopy); | ||
1411 | + | ||
1412 | + gnttab_release_grant_reference(&gref_head, tx->ref); | ||
1413 | + | ||
1414 | + offset += tocopy; | ||
1415 | + } | ||
1416 | + | ||
1417 | + vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember); | ||
1418 | + kfree(buffer); | ||
1419 | +} | ||
1420 | + | ||
1421 | + | ||
1422 | +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) | ||
1423 | +{ | ||
1424 | + struct tpm_private *tp = tpm_priv; | ||
1425 | + unsigned long flags; | ||
1426 | + | ||
1427 | + spin_lock_irqsave(&tp->tx_lock, flags); | ||
1428 | + tpmif_rx_tasklet.data = (unsigned long)tp; | ||
1429 | + tasklet_schedule(&tpmif_rx_tasklet); | ||
1430 | + spin_unlock_irqrestore(&tp->tx_lock, flags); | ||
1431 | + | ||
1432 | + return IRQ_HANDLED; | ||
1433 | +} | ||
1434 | + | ||
1435 | + | ||
1436 | +static int tpm_xmit(struct tpm_private *tp, | ||
1437 | + const u8 * buf, size_t count, int isuserbuffer, | ||
1438 | + void *remember) | ||
1439 | +{ | ||
1440 | + tpmif_tx_request_t *tx; | ||
1441 | + TPMIF_RING_IDX i; | ||
1442 | + unsigned int offset = 0; | ||
1443 | + | ||
1444 | + spin_lock_irq(&tp->tx_lock); | ||
1445 | + | ||
1446 | + if (unlikely(atomic_read(&tp->tx_busy))) { | ||
1447 | + printk("tpm_xmit: There's an outstanding request/response " | ||
1448 | + "on the way!\n"); | ||
1449 | + spin_unlock_irq(&tp->tx_lock); | ||
1450 | + return -EBUSY; | ||
1451 | + } | ||
1452 | + | ||
1453 | + if (tp->is_connected != 1) { | ||
1454 | + spin_unlock_irq(&tp->tx_lock); | ||
1455 | + return -EIO; | ||
1456 | + } | ||
1457 | + | ||
1458 | + for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) { | ||
1459 | + struct tx_buffer *txb = tp->tx_buffers[i]; | ||
1460 | + int copied; | ||
1461 | + | ||
1462 | + if (!txb) { | ||
1463 | + DPRINTK("txb (i=%d) is NULL. buffers initilized?\n" | ||
1464 | + "Not transmitting anything!\n", i); | ||
1465 | + spin_unlock_irq(&tp->tx_lock); | ||
1466 | + return -EFAULT; | ||
1467 | + } | ||
1468 | + | ||
1469 | + copied = tx_buffer_copy(txb, &buf[offset], count, | ||
1470 | + isuserbuffer); | ||
1471 | + if (copied < 0) { | ||
1472 | + /* An error occurred */ | ||
1473 | + spin_unlock_irq(&tp->tx_lock); | ||
1474 | + return copied; | ||
1475 | + } | ||
1476 | + count -= copied; | ||
1477 | + offset += copied; | ||
1478 | + | ||
1479 | + tx = &tp->tx->ring[i].req; | ||
1480 | + tx->addr = virt_to_machine(txb->data); | ||
1481 | + tx->size = txb->len; | ||
1482 | + tx->unused = 0; | ||
1483 | + | ||
1484 | + DPRINTK("First 4 characters sent by TPM-FE are " | ||
1485 | + "0x%02x 0x%02x 0x%02x 0x%02x\n", | ||
1486 | + txb->data[0],txb->data[1],txb->data[2],txb->data[3]); | ||
1487 | + | ||
1488 | + /* Get the granttable reference for this page. */ | ||
1489 | + tx->ref = gnttab_claim_grant_reference(&gref_head); | ||
1490 | + if (tx->ref == -ENOSPC) { | ||
1491 | + spin_unlock_irq(&tp->tx_lock); | ||
1492 | + DPRINTK("Grant table claim reference failed in " | ||
1493 | + "func:%s line:%d file:%s\n", | ||
1494 | + __FUNCTION__, __LINE__, __FILE__); | ||
1495 | + return -ENOSPC; | ||
1496 | + } | ||
1497 | + gnttab_grant_foreign_access_ref(tx->ref, | ||
1498 | + tp->backend_id, | ||
1499 | + virt_to_mfn(txb->data), | ||
1500 | + 0 /*RW*/); | ||
1501 | + wmb(); | ||
1502 | + } | ||
1503 | + | ||
1504 | + atomic_set(&tp->tx_busy, 1); | ||
1505 | + tp->tx_remember = remember; | ||
1506 | + | ||
1507 | + mb(); | ||
1508 | + | ||
1509 | + notify_remote_via_irq(tp->irq); | ||
1510 | + | ||
1511 | + spin_unlock_irq(&tp->tx_lock); | ||
1512 | + return offset; | ||
1513 | +} | ||
1514 | + | ||
1515 | + | ||
1516 | +static void tpmif_notify_upperlayer(struct tpm_private *tp) | ||
1517 | +{ | ||
1518 | + /* Notify upper layer about the state of the connection to the BE. */ | ||
1519 | + vtpm_vd_status(tp->chip, (tp->is_connected | ||
1520 | + ? TPM_VD_STATUS_CONNECTED | ||
1521 | + : TPM_VD_STATUS_DISCONNECTED)); | ||
1522 | +} | ||
1523 | + | ||
1524 | + | ||
1525 | +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected) | ||
1526 | +{ | ||
1527 | + /* | ||
1528 | + * Don't notify upper layer if we are in suspend mode and | ||
1529 | + * should disconnect - assumption is that we will resume | ||
1530 | + * The mutex keeps apps from sending. | ||
1531 | + */ | ||
1532 | + if (is_connected == 0 && tp->is_suspended == 1) | ||
1533 | + return; | ||
1534 | + | ||
1535 | + /* | ||
1536 | + * Unlock the mutex if we are connected again | ||
1537 | + * after being suspended - now resuming. | ||
1538 | + * This also removes the suspend state. | ||
1539 | + */ | ||
1540 | + if (is_connected == 1 && tp->is_suspended == 1) | ||
1541 | + tpmfront_suspend_finish(tp); | ||
1542 | + | ||
1543 | + if (is_connected != tp->is_connected) { | ||
1544 | + tp->is_connected = is_connected; | ||
1545 | + tpmif_notify_upperlayer(tp); | ||
1546 | + } | ||
1547 | +} | ||
1548 | + | ||
1549 | + | ||
1550 | + | ||
1551 | +/* ================================================================= | ||
1552 | + * Initialization function. | ||
1553 | + * ================================================================= | ||
1554 | + */ | ||
1555 | + | ||
1556 | + | ||
1557 | +static int __init tpmif_init(void) | ||
1558 | +{ | ||
1559 | + struct tpm_private *tp; | ||
1560 | + | ||
1561 | + if (is_initial_xendomain()) | ||
1562 | + return -EPERM; | ||
1563 | + | ||
1564 | + tp = tpm_private_get(); | ||
1565 | + if (!tp) | ||
1566 | + return -ENOMEM; | ||
1567 | + | ||
1568 | + IPRINTK("Initialising the vTPM driver.\n"); | ||
1569 | + if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE, | ||
1570 | + &gref_head) < 0) { | ||
1571 | + tpm_private_put(); | ||
1572 | + return -EFAULT; | ||
1573 | + } | ||
1574 | + | ||
1575 | + init_tpm_xenbus(); | ||
1576 | + return 0; | ||
1577 | +} | ||
1578 | + | ||
1579 | + | ||
1580 | +module_init(tpmif_init); | ||
1581 | + | ||
1582 | +MODULE_LICENSE("Dual BSD/GPL"); | ||
1583 | --- a/drivers/ide/ide-lib.c | ||
1584 | +++ b/drivers/ide/ide-lib.c | ||
1585 | @@ -336,12 +336,12 @@ | ||
1586 | { | ||
1587 | u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ | ||
1588 | |||
1589 | - if (!PCI_DMA_BUS_IS_PHYS) { | ||
1590 | - addr = BLK_BOUNCE_ANY; | ||
1591 | - } else if (on && drive->media == ide_disk) { | ||
1592 | + if (on && drive->media == ide_disk) { | ||
1593 | struct device *dev = drive->hwif->dev; | ||
1594 | |||
1595 | - if (dev && dev->dma_mask) | ||
1596 | + if (!PCI_DMA_BUS_IS_PHYS) | ||
1597 | + addr = BLK_BOUNCE_ANY; | ||
1598 | + else if (dev && dev->dma_mask) | ||
1599 | addr = *dev->dma_mask; | ||
1600 | } | ||
1601 | |||
1602 | --- a/drivers/oprofile/buffer_sync.c | ||
1603 | +++ b/drivers/oprofile/buffer_sync.c | ||
1604 | @@ -6,6 +6,10 @@ | ||
1605 | * | ||
1606 | * @author John Levon <levon@movementarian.org> | ||
1607 | * | ||
1608 | + * Modified by Aravind Menon for Xen | ||
1609 | + * These modifications are: | ||
1610 | + * Copyright (C) 2005 Hewlett-Packard Co. | ||
1611 | + * | ||
1612 | * This is the core of the buffer management. Each | ||
1613 | * CPU buffer is processed and entered into the | ||
1614 | * global event buffer. Such processing is necessary | ||
1615 | @@ -40,6 +44,7 @@ | ||
1616 | static DEFINE_SPINLOCK(task_mortuary); | ||
1617 | static void process_task_mortuary(void); | ||
1618 | |||
1619 | +static int cpu_current_domain[NR_CPUS]; | ||
1620 | |||
1621 | /* Take ownership of the task struct and place it on the | ||
1622 | * list for processing. Only after two full buffer syncs | ||
1623 | @@ -148,6 +153,11 @@ | ||
1624 | int sync_start(void) | ||
1625 | { | ||
1626 | int err; | ||
1627 | + int i; | ||
1628 | + | ||
1629 | + for (i = 0; i < NR_CPUS; i++) { | ||
1630 | + cpu_current_domain[i] = COORDINATOR_DOMAIN; | ||
1631 | + } | ||
1632 | |||
1633 | start_cpu_work(); | ||
1634 | |||
1635 | @@ -274,15 +284,31 @@ | ||
1636 | last_cookie = INVALID_COOKIE; | ||
1637 | } | ||
1638 | |||
1639 | -static void add_kernel_ctx_switch(unsigned int in_kernel) | ||
1640 | +static void add_cpu_mode_switch(unsigned int cpu_mode) | ||
1641 | { | ||
1642 | add_event_entry(ESCAPE_CODE); | ||
1643 | - if (in_kernel) | ||
1644 | - add_event_entry(KERNEL_ENTER_SWITCH_CODE); | ||
1645 | - else | ||
1646 | - add_event_entry(KERNEL_EXIT_SWITCH_CODE); | ||
1647 | + switch (cpu_mode) { | ||
1648 | + case CPU_MODE_USER: | ||
1649 | + add_event_entry(USER_ENTER_SWITCH_CODE); | ||
1650 | + break; | ||
1651 | + case CPU_MODE_KERNEL: | ||
1652 | + add_event_entry(KERNEL_ENTER_SWITCH_CODE); | ||
1653 | + break; | ||
1654 | + case CPU_MODE_XEN: | ||
1655 | + add_event_entry(XEN_ENTER_SWITCH_CODE); | ||
1656 | + break; | ||
1657 | + default: | ||
1658 | + break; | ||
1659 | + } | ||
1660 | } | ||
1661 | - | ||
1662 | + | ||
1663 | +static void add_domain_switch(unsigned long domain_id) | ||
1664 | +{ | ||
1665 | + add_event_entry(ESCAPE_CODE); | ||
1666 | + add_event_entry(DOMAIN_SWITCH_CODE); | ||
1667 | + add_event_entry(domain_id); | ||
1668 | +} | ||
1669 | + | ||
1670 | static void | ||
1671 | add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) | ||
1672 | { | ||
1673 | @@ -347,9 +373,9 @@ | ||
1674 | * for later lookup from userspace. | ||
1675 | */ | ||
1676 | static int | ||
1677 | -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) | ||
1678 | +add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) | ||
1679 | { | ||
1680 | - if (in_kernel) { | ||
1681 | + if (cpu_mode >= CPU_MODE_KERNEL) { | ||
1682 | add_sample_entry(s->eip, s->event); | ||
1683 | return 1; | ||
1684 | } else if (mm) { | ||
1685 | @@ -495,15 +521,21 @@ | ||
1686 | struct mm_struct *mm = NULL; | ||
1687 | struct task_struct * new; | ||
1688 | unsigned long cookie = 0; | ||
1689 | - int in_kernel = 1; | ||
1690 | + int cpu_mode = 1; | ||
1691 | unsigned int i; | ||
1692 | sync_buffer_state state = sb_buffer_start; | ||
1693 | unsigned long available; | ||
1694 | + int domain_switch = 0; | ||
1695 | |||
1696 | mutex_lock(&buffer_mutex); | ||
1697 | |||
1698 | add_cpu_switch(cpu); | ||
1699 | |||
1700 | + /* We need to assign the first samples in this CPU buffer to the | ||
1701 | + same domain that we were processing at the last sync_buffer */ | ||
1702 | + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { | ||
1703 | + add_domain_switch(cpu_current_domain[cpu]); | ||
1704 | + } | ||
1705 | /* Remember, only we can modify tail_pos */ | ||
1706 | |||
1707 | available = get_slots(cpu_buf); | ||
1708 | @@ -511,16 +543,18 @@ | ||
1709 | for (i = 0; i < available; ++i) { | ||
1710 | struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; | ||
1711 | |||
1712 | - if (is_code(s->eip)) { | ||
1713 | - if (s->event <= CPU_IS_KERNEL) { | ||
1714 | - /* kernel/userspace switch */ | ||
1715 | - in_kernel = s->event; | ||
1716 | + if (is_code(s->eip) && !domain_switch) { | ||
1717 | + if (s->event <= CPU_MODE_XEN) { | ||
1718 | + /* xen/kernel/userspace switch */ | ||
1719 | + cpu_mode = s->event; | ||
1720 | if (state == sb_buffer_start) | ||
1721 | state = sb_sample_start; | ||
1722 | - add_kernel_ctx_switch(s->event); | ||
1723 | + add_cpu_mode_switch(s->event); | ||
1724 | } else if (s->event == CPU_TRACE_BEGIN) { | ||
1725 | state = sb_bt_start; | ||
1726 | add_trace_begin(); | ||
1727 | + } else if (s->event == CPU_DOMAIN_SWITCH) { | ||
1728 | + domain_switch = 1; | ||
1729 | } else { | ||
1730 | struct mm_struct * oldmm = mm; | ||
1731 | |||
1732 | @@ -534,11 +568,21 @@ | ||
1733 | add_user_ctx_switch(new, cookie); | ||
1734 | } | ||
1735 | } else { | ||
1736 | - if (state >= sb_bt_start && | ||
1737 | - !add_sample(mm, s, in_kernel)) { | ||
1738 | - if (state == sb_bt_start) { | ||
1739 | - state = sb_bt_ignore; | ||
1740 | - atomic_inc(&oprofile_stats.bt_lost_no_mapping); | ||
1741 | + if (domain_switch) { | ||
1742 | + cpu_current_domain[cpu] = s->eip; | ||
1743 | + add_domain_switch(s->eip); | ||
1744 | + domain_switch = 0; | ||
1745 | + } else { | ||
1746 | + if (cpu_current_domain[cpu] != | ||
1747 | + COORDINATOR_DOMAIN) { | ||
1748 | + add_sample_entry(s->eip, s->event); | ||
1749 | + } | ||
1750 | + else if (state >= sb_bt_start && | ||
1751 | + !add_sample(mm, s, cpu_mode)) { | ||
1752 | + if (state == sb_bt_start) { | ||
1753 | + state = sb_bt_ignore; | ||
1754 | + atomic_inc(&oprofile_stats.bt_lost_no_mapping); | ||
1755 | + } | ||
1756 | } | ||
1757 | } | ||
1758 | } | ||
1759 | @@ -547,6 +591,11 @@ | ||
1760 | } | ||
1761 | release_mm(mm); | ||
1762 | |||
1763 | + /* We reset domain to COORDINATOR at each CPU switch */ | ||
1764 | + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { | ||
1765 | + add_domain_switch(COORDINATOR_DOMAIN); | ||
1766 | + } | ||
1767 | + | ||
1768 | mark_done(cpu); | ||
1769 | |||
1770 | mutex_unlock(&buffer_mutex); | ||
1771 | --- a/drivers/oprofile/cpu_buffer.c | ||
1772 | +++ b/drivers/oprofile/cpu_buffer.c | ||
1773 | @@ -6,6 +6,10 @@ | ||
1774 | * | ||
1775 | * @author John Levon <levon@movementarian.org> | ||
1776 | * | ||
1777 | + * Modified by Aravind Menon for Xen | ||
1778 | + * These modifications are: | ||
1779 | + * Copyright (C) 2005 Hewlett-Packard Co. | ||
1780 | + * | ||
1781 | * Each CPU has a local buffer that stores PC value/event | ||
1782 | * pairs. We also log context switches when we notice them. | ||
1783 | * Eventually each CPU's buffer is processed into the global | ||
1784 | @@ -34,6 +38,8 @@ | ||
1785 | #define DEFAULT_TIMER_EXPIRE (HZ / 10) | ||
1786 | static int work_enabled; | ||
1787 | |||
1788 | +static int32_t current_domain = COORDINATOR_DOMAIN; | ||
1789 | + | ||
1790 | void free_cpu_buffers(void) | ||
1791 | { | ||
1792 | int i; | ||
1793 | @@ -57,7 +63,7 @@ | ||
1794 | goto fail; | ||
1795 | |||
1796 | b->last_task = NULL; | ||
1797 | - b->last_is_kernel = -1; | ||
1798 | + b->last_cpu_mode = -1; | ||
1799 | b->tracing = 0; | ||
1800 | b->buffer_size = buffer_size; | ||
1801 | b->tail_pos = 0; | ||
1802 | @@ -115,7 +121,7 @@ | ||
1803 | * collected will populate the buffer with proper | ||
1804 | * values to initialize the buffer | ||
1805 | */ | ||
1806 | - cpu_buf->last_is_kernel = -1; | ||
1807 | + cpu_buf->last_cpu_mode = -1; | ||
1808 | cpu_buf->last_task = NULL; | ||
1809 | } | ||
1810 | |||
1811 | @@ -165,13 +171,13 @@ | ||
1812 | * because of the head/tail separation of the writer and reader | ||
1813 | * of the CPU buffer. | ||
1814 | * | ||
1815 | - * is_kernel is needed because on some architectures you cannot | ||
1816 | + * cpu_mode is needed because on some architectures you cannot | ||
1817 | * tell if you are in kernel or user space simply by looking at | ||
1818 | - * pc. We tag this in the buffer by generating kernel enter/exit | ||
1819 | - * events whenever is_kernel changes | ||
1820 | + * pc. We tag this in the buffer by generating kernel/user (and xen) | ||
1821 | + * enter events whenever cpu_mode changes | ||
1822 | */ | ||
1823 | static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, | ||
1824 | - int is_kernel, unsigned long event) | ||
1825 | + int cpu_mode, unsigned long event) | ||
1826 | { | ||
1827 | struct task_struct * task; | ||
1828 | |||
1829 | @@ -187,18 +193,18 @@ | ||
1830 | return 0; | ||
1831 | } | ||
1832 | |||
1833 | - is_kernel = !!is_kernel; | ||
1834 | - | ||
1835 | task = current; | ||
1836 | |||
1837 | /* notice a switch from user->kernel or vice versa */ | ||
1838 | - if (cpu_buf->last_is_kernel != is_kernel) { | ||
1839 | - cpu_buf->last_is_kernel = is_kernel; | ||
1840 | - add_code(cpu_buf, is_kernel); | ||
1841 | + if (cpu_buf->last_cpu_mode != cpu_mode) { | ||
1842 | + cpu_buf->last_cpu_mode = cpu_mode; | ||
1843 | + add_code(cpu_buf, cpu_mode); | ||
1844 | } | ||
1845 | - | ||
1846 | + | ||
1847 | /* notice a task switch */ | ||
1848 | - if (cpu_buf->last_task != task) { | ||
1849 | + /* if not processing other domain samples */ | ||
1850 | + if ((cpu_buf->last_task != task) && | ||
1851 | + (current_domain == COORDINATOR_DOMAIN)) { | ||
1852 | cpu_buf->last_task = task; | ||
1853 | add_code(cpu_buf, (unsigned long)task); | ||
1854 | } | ||
1855 | @@ -282,6 +288,25 @@ | ||
1856 | add_sample(cpu_buf, pc, 0); | ||
1857 | } | ||
1858 | |||
1859 | +int oprofile_add_domain_switch(int32_t domain_id) | ||
1860 | +{ | ||
1861 | + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; | ||
1862 | + | ||
1863 | + /* should have space for switching into and out of domain | ||
1864 | + (2 slots each) plus one sample and one cpu mode switch */ | ||
1865 | + if (((nr_available_slots(cpu_buf) < 6) && | ||
1866 | + (domain_id != COORDINATOR_DOMAIN)) || | ||
1867 | + (nr_available_slots(cpu_buf) < 2)) | ||
1868 | + return 0; | ||
1869 | + | ||
1870 | + add_code(cpu_buf, CPU_DOMAIN_SWITCH); | ||
1871 | + add_sample(cpu_buf, domain_id, 0); | ||
1872 | + | ||
1873 | + current_domain = domain_id; | ||
1874 | + | ||
1875 | + return 1; | ||
1876 | +} | ||
1877 | + | ||
1878 | /* | ||
1879 | * This serves to avoid cpu buffer overflow, and makes sure | ||
1880 | * the task mortuary progresses | ||
1881 | --- a/drivers/oprofile/cpu_buffer.h | ||
1882 | +++ b/drivers/oprofile/cpu_buffer.h | ||
1883 | @@ -36,7 +36,7 @@ | ||
1884 | volatile unsigned long tail_pos; | ||
1885 | unsigned long buffer_size; | ||
1886 | struct task_struct * last_task; | ||
1887 | - int last_is_kernel; | ||
1888 | + int last_cpu_mode; | ||
1889 | int tracing; | ||
1890 | struct op_sample * buffer; | ||
1891 | unsigned long sample_received; | ||
1892 | @@ -52,7 +52,10 @@ | ||
1893 | void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); | ||
1894 | |||
1895 | /* transient events for the CPU buffer -> event buffer */ | ||
1896 | -#define CPU_IS_KERNEL 1 | ||
1897 | -#define CPU_TRACE_BEGIN 2 | ||
1898 | +#define CPU_MODE_USER 0 | ||
1899 | +#define CPU_MODE_KERNEL 1 | ||
1900 | +#define CPU_MODE_XEN 2 | ||
1901 | +#define CPU_TRACE_BEGIN 3 | ||
1902 | +#define CPU_DOMAIN_SWITCH 4 | ||
1903 | |||
1904 | #endif /* OPROFILE_CPU_BUFFER_H */ | ||
1905 | --- a/drivers/oprofile/event_buffer.h | ||
1906 | +++ b/drivers/oprofile/event_buffer.h | ||
1907 | @@ -23,6 +23,9 @@ | ||
1908 | #define INVALID_COOKIE ~0UL | ||
1909 | #define NO_COOKIE 0UL | ||
1910 | |||
1911 | +/* Constant used to refer to coordinator domain (Xen) */ | ||
1912 | +#define COORDINATOR_DOMAIN -1 | ||
1913 | + | ||
1914 | extern const struct file_operations event_buffer_fops; | ||
1915 | |||
1916 | /* mutex between sync_cpu_buffers() and the | ||
1917 | --- a/drivers/oprofile/oprof.c | ||
1918 | +++ b/drivers/oprofile/oprof.c | ||
1919 | @@ -5,6 +5,10 @@ | ||
1920 | * @remark Read the file COPYING | ||
1921 | * | ||
1922 | * @author John Levon <levon@movementarian.org> | ||
1923 | + * | ||
1924 | + * Modified by Aravind Menon for Xen | ||
1925 | + * These modifications are: | ||
1926 | + * Copyright (C) 2005 Hewlett-Packard Co. | ||
1927 | */ | ||
1928 | |||
1929 | #include <linux/kernel.h> | ||
1930 | @@ -33,6 +37,32 @@ | ||
1931 | */ | ||
1932 | static int timer = 0; | ||
1933 | |||
1934 | +int oprofile_set_active(int active_domains[], unsigned int adomains) | ||
1935 | +{ | ||
1936 | + int err; | ||
1937 | + | ||
1938 | + if (!oprofile_ops.set_active) | ||
1939 | + return -EINVAL; | ||
1940 | + | ||
1941 | + mutex_lock(&start_mutex); | ||
1942 | + err = oprofile_ops.set_active(active_domains, adomains); | ||
1943 | + mutex_unlock(&start_mutex); | ||
1944 | + return err; | ||
1945 | +} | ||
1946 | + | ||
1947 | +int oprofile_set_passive(int passive_domains[], unsigned int pdomains) | ||
1948 | +{ | ||
1949 | + int err; | ||
1950 | + | ||
1951 | + if (!oprofile_ops.set_passive) | ||
1952 | + return -EINVAL; | ||
1953 | + | ||
1954 | + mutex_lock(&start_mutex); | ||
1955 | + err = oprofile_ops.set_passive(passive_domains, pdomains); | ||
1956 | + mutex_unlock(&start_mutex); | ||
1957 | + return err; | ||
1958 | +} | ||
1959 | + | ||
1960 | int oprofile_setup(void) | ||
1961 | { | ||
1962 | int err; | ||
1963 | --- a/drivers/oprofile/oprof.h | ||
1964 | +++ b/drivers/oprofile/oprof.h | ||
1965 | @@ -35,5 +35,8 @@ | ||
1966 | void oprofile_timer_init(struct oprofile_operations * ops); | ||
1967 | |||
1968 | int oprofile_set_backtrace(unsigned long depth); | ||
1969 | + | ||
1970 | +int oprofile_set_active(int active_domains[], unsigned int adomains); | ||
1971 | +int oprofile_set_passive(int passive_domains[], unsigned int pdomains); | ||
1972 | |||
1973 | #endif /* OPROF_H */ | ||
1974 | --- a/drivers/oprofile/oprofile_files.c | ||
1975 | +++ b/drivers/oprofile/oprofile_files.c | ||
1976 | @@ -5,15 +5,21 @@ | ||
1977 | * @remark Read the file COPYING | ||
1978 | * | ||
1979 | * @author John Levon <levon@movementarian.org> | ||
1980 | + * | ||
1981 | + * Modified by Aravind Menon for Xen | ||
1982 | + * These modifications are: | ||
1983 | + * Copyright (C) 2005 Hewlett-Packard Co. | ||
1984 | */ | ||
1985 | |||
1986 | #include <linux/fs.h> | ||
1987 | #include <linux/oprofile.h> | ||
1988 | +#include <asm/uaccess.h> | ||
1989 | +#include <linux/ctype.h> | ||
1990 | |||
1991 | #include "event_buffer.h" | ||
1992 | #include "oprofile_stats.h" | ||
1993 | #include "oprof.h" | ||
1994 | - | ||
1995 | + | ||
1996 | unsigned long fs_buffer_size = 131072; | ||
1997 | unsigned long fs_cpu_buffer_size = 8192; | ||
1998 | unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ | ||
1999 | @@ -117,11 +123,202 @@ | ||
2000 | static const struct file_operations dump_fops = { | ||
2001 | .write = dump_write, | ||
2002 | }; | ||
2003 | - | ||
2004 | + | ||
2005 | +#define TMPBUFSIZE 512 | ||
2006 | + | ||
2007 | +static unsigned int adomains = 0; | ||
2008 | +static int active_domains[MAX_OPROF_DOMAINS + 1]; | ||
2009 | +static DEFINE_MUTEX(adom_mutex); | ||
2010 | + | ||
2011 | +static ssize_t adomain_write(struct file * file, char const __user * buf, | ||
2012 | + size_t count, loff_t * offset) | ||
2013 | +{ | ||
2014 | + char *tmpbuf; | ||
2015 | + char *startp, *endp; | ||
2016 | + int i; | ||
2017 | + unsigned long val; | ||
2018 | + ssize_t retval = count; | ||
2019 | + | ||
2020 | + if (*offset) | ||
2021 | + return -EINVAL; | ||
2022 | + if (count > TMPBUFSIZE - 1) | ||
2023 | + return -EINVAL; | ||
2024 | + | ||
2025 | + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) | ||
2026 | + return -ENOMEM; | ||
2027 | + | ||
2028 | + if (copy_from_user(tmpbuf, buf, count)) { | ||
2029 | + kfree(tmpbuf); | ||
2030 | + return -EFAULT; | ||
2031 | + } | ||
2032 | + tmpbuf[count] = 0; | ||
2033 | + | ||
2034 | + mutex_lock(&adom_mutex); | ||
2035 | + | ||
2036 | + startp = tmpbuf; | ||
2037 | + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ | ||
2038 | + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { | ||
2039 | + val = simple_strtoul(startp, &endp, 0); | ||
2040 | + if (endp == startp) | ||
2041 | + break; | ||
2042 | + while (ispunct(*endp) || isspace(*endp)) | ||
2043 | + endp++; | ||
2044 | + active_domains[i] = val; | ||
2045 | + if (active_domains[i] != val) | ||
2046 | + /* Overflow, force error below */ | ||
2047 | + i = MAX_OPROF_DOMAINS + 1; | ||
2048 | + startp = endp; | ||
2049 | + } | ||
2050 | + /* Force error on trailing junk */ | ||
2051 | + adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; | ||
2052 | + | ||
2053 | + kfree(tmpbuf); | ||
2054 | + | ||
2055 | + if (adomains > MAX_OPROF_DOMAINS | ||
2056 | + || oprofile_set_active(active_domains, adomains)) { | ||
2057 | + adomains = 0; | ||
2058 | + retval = -EINVAL; | ||
2059 | + } | ||
2060 | + | ||
2061 | + mutex_unlock(&adom_mutex); | ||
2062 | + return retval; | ||
2063 | +} | ||
2064 | + | ||
2065 | +static ssize_t adomain_read(struct file * file, char __user * buf, | ||
2066 | + size_t count, loff_t * offset) | ||
2067 | +{ | ||
2068 | + char * tmpbuf; | ||
2069 | + size_t len; | ||
2070 | + int i; | ||
2071 | + ssize_t retval; | ||
2072 | + | ||
2073 | + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) | ||
2074 | + return -ENOMEM; | ||
2075 | + | ||
2076 | + mutex_lock(&adom_mutex); | ||
2077 | + | ||
2078 | + len = 0; | ||
2079 | + for (i = 0; i < adomains; i++) | ||
2080 | + len += snprintf(tmpbuf + len, | ||
2081 | + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, | ||
2082 | + "%u ", active_domains[i]); | ||
2083 | + WARN_ON(len > TMPBUFSIZE); | ||
2084 | + if (len != 0 && len <= TMPBUFSIZE) | ||
2085 | + tmpbuf[len-1] = '\n'; | ||
2086 | + | ||
2087 | + mutex_unlock(&adom_mutex); | ||
2088 | + | ||
2089 | + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); | ||
2090 | + | ||
2091 | + kfree(tmpbuf); | ||
2092 | + return retval; | ||
2093 | +} | ||
2094 | + | ||
2095 | + | ||
2096 | +static struct file_operations active_domain_ops = { | ||
2097 | + .read = adomain_read, | ||
2098 | + .write = adomain_write, | ||
2099 | +}; | ||
2100 | + | ||
2101 | +static unsigned int pdomains = 0; | ||
2102 | +static int passive_domains[MAX_OPROF_DOMAINS]; | ||
2103 | +static DEFINE_MUTEX(pdom_mutex); | ||
2104 | + | ||
2105 | +static ssize_t pdomain_write(struct file * file, char const __user * buf, | ||
2106 | + size_t count, loff_t * offset) | ||
2107 | +{ | ||
2108 | + char *tmpbuf; | ||
2109 | + char *startp, *endp; | ||
2110 | + int i; | ||
2111 | + unsigned long val; | ||
2112 | + ssize_t retval = count; | ||
2113 | + | ||
2114 | + if (*offset) | ||
2115 | + return -EINVAL; | ||
2116 | + if (count > TMPBUFSIZE - 1) | ||
2117 | + return -EINVAL; | ||
2118 | + | ||
2119 | + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) | ||
2120 | + return -ENOMEM; | ||
2121 | + | ||
2122 | + if (copy_from_user(tmpbuf, buf, count)) { | ||
2123 | + kfree(tmpbuf); | ||
2124 | + return -EFAULT; | ||
2125 | + } | ||
2126 | + tmpbuf[count] = 0; | ||
2127 | + | ||
2128 | + mutex_lock(&pdom_mutex); | ||
2129 | + | ||
2130 | + startp = tmpbuf; | ||
2131 | + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ | ||
2132 | + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { | ||
2133 | + val = simple_strtoul(startp, &endp, 0); | ||
2134 | + if (endp == startp) | ||
2135 | + break; | ||
2136 | + while (ispunct(*endp) || isspace(*endp)) | ||
2137 | + endp++; | ||
2138 | + passive_domains[i] = val; | ||
2139 | + if (passive_domains[i] != val) | ||
2140 | + /* Overflow, force error below */ | ||
2141 | + i = MAX_OPROF_DOMAINS + 1; | ||
2142 | + startp = endp; | ||
2143 | + } | ||
2144 | + /* Force error on trailing junk */ | ||
2145 | + pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; | ||
2146 | + | ||
2147 | + kfree(tmpbuf); | ||
2148 | + | ||
2149 | + if (pdomains > MAX_OPROF_DOMAINS | ||
2150 | + || oprofile_set_passive(passive_domains, pdomains)) { | ||
2151 | + pdomains = 0; | ||
2152 | + retval = -EINVAL; | ||
2153 | + } | ||
2154 | + | ||
2155 | + mutex_unlock(&pdom_mutex); | ||
2156 | + return retval; | ||
2157 | +} | ||
2158 | + | ||
2159 | +static ssize_t pdomain_read(struct file * file, char __user * buf, | ||
2160 | + size_t count, loff_t * offset) | ||
2161 | +{ | ||
2162 | + char * tmpbuf; | ||
2163 | + size_t len; | ||
2164 | + int i; | ||
2165 | + ssize_t retval; | ||
2166 | + | ||
2167 | + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) | ||
2168 | + return -ENOMEM; | ||
2169 | + | ||
2170 | + mutex_lock(&pdom_mutex); | ||
2171 | + | ||
2172 | + len = 0; | ||
2173 | + for (i = 0; i < pdomains; i++) | ||
2174 | + len += snprintf(tmpbuf + len, | ||
2175 | + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, | ||
2176 | + "%u ", passive_domains[i]); | ||
2177 | + WARN_ON(len > TMPBUFSIZE); | ||
2178 | + if (len != 0 && len <= TMPBUFSIZE) | ||
2179 | + tmpbuf[len-1] = '\n'; | ||
2180 | + | ||
2181 | + mutex_unlock(&pdom_mutex); | ||
2182 | + | ||
2183 | + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); | ||
2184 | + | ||
2185 | + kfree(tmpbuf); | ||
2186 | + return retval; | ||
2187 | +} | ||
2188 | + | ||
2189 | +static struct file_operations passive_domain_ops = { | ||
2190 | + .read = pdomain_read, | ||
2191 | + .write = pdomain_write, | ||
2192 | +}; | ||
2193 | + | ||
2194 | void oprofile_create_files(struct super_block * sb, struct dentry * root) | ||
2195 | { | ||
2196 | oprofilefs_create_file(sb, root, "enable", &enable_fops); | ||
2197 | oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); | ||
2198 | + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); | ||
2199 | + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); | ||
2200 | oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); | ||
2201 | oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); | ||
2202 | oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); | ||
2203 | --- a/drivers/pci/bus.c | ||
2204 | +++ b/drivers/pci/bus.c | ||
2205 | @@ -17,6 +17,8 @@ | ||
2206 | |||
2207 | #include "pci.h" | ||
2208 | |||
2209 | +extern int pci_mem_align; | ||
2210 | + | ||
2211 | /** | ||
2212 | * pci_bus_alloc_resource - allocate a resource from a parent bus | ||
2213 | * @bus: PCI bus | ||
2214 | @@ -44,6 +46,11 @@ | ||
2215 | |||
2216 | type_mask |= IORESOURCE_IO | IORESOURCE_MEM; | ||
2217 | |||
2218 | + /* If the boot parameter 'pci-mem-align' was specified then we need to | ||
2219 | + align the memory addresses, at page size alignment. */ | ||
2220 | + if (pci_mem_align && (align < (PAGE_SIZE-1))) | ||
2221 | + align = PAGE_SIZE - 1; | ||
2222 | + | ||
2223 | for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) { | ||
2224 | struct resource *r = bus->resource[i]; | ||
2225 | if (!r) | ||
2226 | --- a/drivers/pci/quirks.c | ||
2227 | +++ b/drivers/pci/quirks.c | ||
2228 | @@ -24,6 +24,40 @@ | ||
2229 | #include <linux/kallsyms.h> | ||
2230 | #include "pci.h" | ||
2231 | |||
2232 | +/* A global flag which signals if we should page-align PCI mem windows. */ | ||
2233 | +int pci_mem_align = 0; | ||
2234 | + | ||
2235 | +static int __init set_pci_mem_align(char *str) | ||
2236 | +{ | ||
2237 | + pci_mem_align = 1; | ||
2238 | + return 1; | ||
2239 | +} | ||
2240 | +__setup("pci-mem-align", set_pci_mem_align); | ||
2241 | + | ||
2242 | +/* This quirk function enables us to force all memory resources which are | ||
2243 | + * assigned to PCI devices, to be page-aligned. | ||
2244 | + */ | ||
2245 | +static void __devinit quirk_align_mem_resources(struct pci_dev *dev) | ||
2246 | +{ | ||
2247 | + int i; | ||
2248 | + struct resource *r; | ||
2249 | + resource_size_t old_start; | ||
2250 | + | ||
2251 | + if (!pci_mem_align) | ||
2252 | + return; | ||
2253 | + | ||
2254 | + for (i=0; i < DEVICE_COUNT_RESOURCE; i++) { | ||
2255 | + r = &dev->resource[i]; | ||
2256 | + if ((r == NULL) || !(r->flags & IORESOURCE_MEM)) | ||
2257 | + continue; | ||
2258 | + | ||
2259 | + old_start = r->start; | ||
2260 | + r->start = (r->start + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); | ||
2261 | + r->end = r->end - (old_start - r->start); | ||
2262 | + } | ||
2263 | +} | ||
2264 | +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_align_mem_resources); | ||
2265 | + | ||
2266 | /* The Mellanox Tavor device gives false positive parity errors | ||
2267 | * Mark this device with a broken_parity_status, to allow | ||
2268 | * PCI scanning code to "skip" this now blacklisted device. | ||
2269 | --- a/fs/aio.c | ||
2270 | +++ b/fs/aio.c | ||
2271 | @@ -36,6 +36,11 @@ | ||
2272 | #include <asm/uaccess.h> | ||
2273 | #include <asm/mmu_context.h> | ||
2274 | |||
2275 | +#ifdef CONFIG_EPOLL | ||
2276 | +#include <linux/poll.h> | ||
2277 | +#include <linux/eventpoll.h> | ||
2278 | +#endif | ||
2279 | + | ||
2280 | #if DEBUG > 1 | ||
2281 | #define dprintk printk | ||
2282 | #else | ||
2283 | @@ -1008,6 +1013,11 @@ | ||
2284 | if (waitqueue_active(&ctx->wait)) | ||
2285 | wake_up(&ctx->wait); | ||
2286 | |||
2287 | +#ifdef CONFIG_EPOLL | ||
2288 | + if (ctx->file && waitqueue_active(&ctx->poll_wait)) | ||
2289 | + wake_up(&ctx->poll_wait); | ||
2290 | +#endif | ||
2291 | + | ||
2292 | spin_unlock_irqrestore(&ctx->ctx_lock, flags); | ||
2293 | return ret; | ||
2294 | } | ||
2295 | @@ -1015,6 +1025,8 @@ | ||
2296 | /* aio_read_evt | ||
2297 | * Pull an event off of the ioctx's event ring. Returns the number of | ||
2298 | * events fetched (0 or 1 ;-) | ||
2299 | + * If ent parameter is 0, just returns the number of events that would | ||
2300 | + * be fetched. | ||
2301 | * FIXME: make this use cmpxchg. | ||
2302 | * TODO: make the ringbuffer user mmap()able (requires FIXME). | ||
2303 | */ | ||
2304 | @@ -1037,13 +1049,18 @@ | ||
2305 | |||
2306 | head = ring->head % info->nr; | ||
2307 | if (head != ring->tail) { | ||
2308 | - struct io_event *evp = aio_ring_event(info, head, KM_USER1); | ||
2309 | - *ent = *evp; | ||
2310 | - head = (head + 1) % info->nr; | ||
2311 | - smp_mb(); /* finish reading the event before updatng the head */ | ||
2312 | - ring->head = head; | ||
2313 | - ret = 1; | ||
2314 | - put_aio_ring_event(evp, KM_USER1); | ||
2315 | + if (ent) { /* event requested */ | ||
2316 | + struct io_event *evp = | ||
2317 | + aio_ring_event(info, head, KM_USER1); | ||
2318 | + *ent = *evp; | ||
2319 | + head = (head + 1) % info->nr; | ||
2320 | + /* finish reading the event before updatng the head */ | ||
2321 | + smp_mb(); | ||
2322 | + ring->head = head; | ||
2323 | + ret = 1; | ||
2324 | + put_aio_ring_event(evp, KM_USER1); | ||
2325 | + } else /* only need to know availability */ | ||
2326 | + ret = 1; | ||
2327 | } | ||
2328 | spin_unlock(&info->ring_lock); | ||
2329 | |||
2330 | @@ -1234,6 +1251,13 @@ | ||
2331 | |||
2332 | aio_cancel_all(ioctx); | ||
2333 | wait_for_all_aios(ioctx); | ||
2334 | +#ifdef CONFIG_EPOLL | ||
2335 | + /* forget the poll file, but it's up to the user to close it */ | ||
2336 | + if (ioctx->file) { | ||
2337 | + ioctx->file->private_data = 0; | ||
2338 | + ioctx->file = 0; | ||
2339 | + } | ||
2340 | +#endif | ||
2341 | |||
2342 | /* | ||
2343 | * Wake up any waiters. The setting of ctx->dead must be seen | ||
2344 | @@ -1244,6 +1268,68 @@ | ||
2345 | put_ioctx(ioctx); /* once for the lookup */ | ||
2346 | } | ||
2347 | |||
2348 | +#ifdef CONFIG_EPOLL | ||
2349 | + | ||
2350 | +static int aio_queue_fd_close(struct inode *inode, struct file *file) | ||
2351 | +{ | ||
2352 | + struct kioctx *ioctx = file->private_data; | ||
2353 | + if (ioctx) { | ||
2354 | + file->private_data = 0; | ||
2355 | + spin_lock_irq(&ioctx->ctx_lock); | ||
2356 | + ioctx->file = 0; | ||
2357 | + spin_unlock_irq(&ioctx->ctx_lock); | ||
2358 | + } | ||
2359 | + return 0; | ||
2360 | +} | ||
2361 | + | ||
2362 | +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) | ||
2363 | +{ unsigned int pollflags = 0; | ||
2364 | + struct kioctx *ioctx = file->private_data; | ||
2365 | + | ||
2366 | + if (ioctx) { | ||
2367 | + | ||
2368 | + spin_lock_irq(&ioctx->ctx_lock); | ||
2369 | + /* Insert inside our poll wait queue */ | ||
2370 | + poll_wait(file, &ioctx->poll_wait, wait); | ||
2371 | + | ||
2372 | + /* Check our condition */ | ||
2373 | + if (aio_read_evt(ioctx, 0)) | ||
2374 | + pollflags = POLLIN | POLLRDNORM; | ||
2375 | + spin_unlock_irq(&ioctx->ctx_lock); | ||
2376 | + } | ||
2377 | + | ||
2378 | + return pollflags; | ||
2379 | +} | ||
2380 | + | ||
2381 | +static const struct file_operations aioq_fops = { | ||
2382 | + .release = aio_queue_fd_close, | ||
2383 | + .poll = aio_queue_fd_poll | ||
2384 | +}; | ||
2385 | + | ||
2386 | +/* make_aio_fd: | ||
2387 | + * Create a file descriptor that can be used to poll the event queue. | ||
2388 | + * Based and piggybacked on the excellent epoll code. | ||
2389 | + */ | ||
2390 | + | ||
2391 | +static int make_aio_fd(struct kioctx *ioctx) | ||
2392 | +{ | ||
2393 | + int error, fd; | ||
2394 | + struct inode *inode; | ||
2395 | + struct file *file; | ||
2396 | + | ||
2397 | + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); | ||
2398 | + if (error) | ||
2399 | + return error; | ||
2400 | + | ||
2401 | + /* associate the file with the IO context */ | ||
2402 | + file->private_data = ioctx; | ||
2403 | + ioctx->file = file; | ||
2404 | + init_waitqueue_head(&ioctx->poll_wait); | ||
2405 | + return fd; | ||
2406 | +} | ||
2407 | +#endif | ||
2408 | + | ||
2409 | + | ||
2410 | /* sys_io_setup: | ||
2411 | * Create an aio_context capable of receiving at least nr_events. | ||
2412 | * ctxp must not point to an aio_context that already exists, and | ||
2413 | @@ -1256,18 +1342,30 @@ | ||
2414 | * resources are available. May fail with -EFAULT if an invalid | ||
2415 | * pointer is passed for ctxp. Will fail with -ENOSYS if not | ||
2416 | * implemented. | ||
2417 | + * | ||
2418 | + * To request a selectable fd, the user context has to be initialized | ||
2419 | + * to 1, instead of 0, and the return value is the fd. | ||
2420 | + * This keeps the system call compatible, since a non-zero value | ||
2421 | + * was not allowed so far. | ||
2422 | */ | ||
2423 | asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) | ||
2424 | { | ||
2425 | struct kioctx *ioctx = NULL; | ||
2426 | unsigned long ctx; | ||
2427 | long ret; | ||
2428 | + int make_fd = 0; | ||
2429 | |||
2430 | ret = get_user(ctx, ctxp); | ||
2431 | if (unlikely(ret)) | ||
2432 | goto out; | ||
2433 | |||
2434 | ret = -EINVAL; | ||
2435 | +#ifdef CONFIG_EPOLL | ||
2436 | + if (ctx == 1) { | ||
2437 | + make_fd = 1; | ||
2438 | + ctx = 0; | ||
2439 | + } | ||
2440 | +#endif | ||
2441 | if (unlikely(ctx || nr_events == 0)) { | ||
2442 | pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", | ||
2443 | ctx, nr_events); | ||
2444 | @@ -1278,8 +1376,12 @@ | ||
2445 | ret = PTR_ERR(ioctx); | ||
2446 | if (!IS_ERR(ioctx)) { | ||
2447 | ret = put_user(ioctx->user_id, ctxp); | ||
2448 | - if (!ret) | ||
2449 | - return 0; | ||
2450 | +#ifdef CONFIG_EPOLL | ||
2451 | + if (make_fd && ret >= 0) | ||
2452 | + ret = make_aio_fd(ioctx); | ||
2453 | +#endif | ||
2454 | + if (ret >= 0) | ||
2455 | + return ret; | ||
2456 | |||
2457 | get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ | ||
2458 | io_destroy(ioctx); | ||
2459 | --- a/fs/compat_ioctl.c | ||
2460 | +++ b/fs/compat_ioctl.c | ||
2461 | @@ -114,6 +114,13 @@ | ||
2462 | #include <asm/fbio.h> | ||
2463 | #endif | ||
2464 | |||
2465 | +#ifdef CONFIG_XEN | ||
2466 | +#include <xen/interface/xen.h> | ||
2467 | +#include <xen/public/evtchn.h> | ||
2468 | +#include <xen/public/privcmd.h> | ||
2469 | +#include <xen/compat_ioctl.h> | ||
2470 | +#endif | ||
2471 | + | ||
2472 | static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, | ||
2473 | unsigned long arg, struct file *f) | ||
2474 | { | ||
2475 | @@ -2834,6 +2841,18 @@ | ||
2476 | IGNORE_IOCTL(FBIOSCURSOR32) | ||
2477 | IGNORE_IOCTL(FBIOGCURSOR32) | ||
2478 | #endif | ||
2479 | + | ||
2480 | +#ifdef CONFIG_XEN | ||
2481 | +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32) | ||
2482 | +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32) | ||
2483 | +COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL) | ||
2484 | +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ) | ||
2485 | +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN) | ||
2486 | +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT) | ||
2487 | +COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND) | ||
2488 | +COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY) | ||
2489 | +COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET) | ||
2490 | +#endif | ||
2491 | }; | ||
2492 | |||
2493 | #define IOCTL_HASHSIZE 256 | ||
2494 | --- a/fs/splice.c | ||
2495 | +++ b/fs/splice.c | ||
2496 | @@ -1218,6 +1218,9 @@ | ||
2497 | if (!access_ok(VERIFY_READ, base, len)) | ||
2498 | break; | ||
2499 | |||
2500 | + if (unlikely(!access_ok(VERIFY_READ, base, len))) | ||
2501 | + break; | ||
2502 | + | ||
2503 | /* | ||
2504 | * Get this base offset and number of pages, then map | ||
2505 | * in the user pages. | ||
2506 | --- a/include/asm-generic/pci.h | ||
2507 | +++ b/include/asm-generic/pci.h | ||
2508 | @@ -43,7 +43,9 @@ | ||
2509 | return root; | ||
2510 | } | ||
2511 | |||
2512 | +#ifndef pcibios_scan_all_fns | ||
2513 | #define pcibios_scan_all_fns(a, b) 0 | ||
2514 | +#endif | ||
2515 | |||
2516 | #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ | ||
2517 | static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) | ||
2518 | --- a/include/asm-generic/pgtable.h | ||
2519 | +++ b/include/asm-generic/pgtable.h | ||
2520 | @@ -99,6 +99,10 @@ | ||
2521 | } | ||
2522 | #endif | ||
2523 | |||
2524 | +#ifndef arch_change_pte_range | ||
2525 | +#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0 | ||
2526 | +#endif | ||
2527 | + | ||
2528 | #ifndef __HAVE_ARCH_PTE_SAME | ||
2529 | #define pte_same(A,B) (pte_val(A) == pte_val(B)) | ||
2530 | #endif | ||
2531 | --- a/include/linux/aio.h | ||
2532 | +++ b/include/linux/aio.h | ||
2533 | @@ -200,6 +200,11 @@ | ||
2534 | struct aio_ring_info ring_info; | ||
2535 | |||
2536 | struct delayed_work wq; | ||
2537 | +#ifdef CONFIG_EPOLL | ||
2538 | + // poll integration | ||
2539 | + wait_queue_head_t poll_wait; | ||
2540 | + struct file *file; | ||
2541 | +#endif | ||
2542 | }; | ||
2543 | |||
2544 | /* prototypes */ | ||
2545 | --- a/include/linux/interrupt.h | ||
2546 | +++ b/include/linux/interrupt.h | ||
2547 | @@ -194,6 +194,12 @@ | ||
2548 | } | ||
2549 | #endif /* CONFIG_GENERIC_HARDIRQS */ | ||
2550 | |||
2551 | +#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED | ||
2552 | +int irq_ignore_unhandled(unsigned int irq); | ||
2553 | +#else | ||
2554 | +#define irq_ignore_unhandled(irq) 0 | ||
2555 | +#endif | ||
2556 | + | ||
2557 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | ||
2558 | #define set_softirq_pending(x) (local_softirq_pending() = (x)) | ||
2559 | #define or_softirq_pending(x) (local_softirq_pending() |= (x)) | ||
2560 | --- a/include/linux/kexec.h | ||
2561 | +++ b/include/linux/kexec.h | ||
2562 | @@ -46,6 +46,13 @@ | ||
2563 | KEXEC_CORE_NOTE_NAME_BYTES + \ | ||
2564 | KEXEC_CORE_NOTE_DESC_BYTES ) | ||
2565 | |||
2566 | +#ifndef KEXEC_ARCH_HAS_PAGE_MACROS | ||
2567 | +#define kexec_page_to_pfn(page) page_to_pfn(page) | ||
2568 | +#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) | ||
2569 | +#define kexec_virt_to_phys(addr) virt_to_phys(addr) | ||
2570 | +#define kexec_phys_to_virt(addr) phys_to_virt(addr) | ||
2571 | +#endif | ||
2572 | + | ||
2573 | /* | ||
2574 | * This structure is used to hold the arguments that are used when loading | ||
2575 | * kernel binaries. | ||
2576 | @@ -106,6 +113,12 @@ | ||
2577 | extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET; | ||
2578 | extern int machine_kexec_prepare(struct kimage *image); | ||
2579 | extern void machine_kexec_cleanup(struct kimage *image); | ||
2580 | +#ifdef CONFIG_XEN | ||
2581 | +extern int xen_machine_kexec_load(struct kimage *image); | ||
2582 | +extern void xen_machine_kexec_unload(struct kimage *image); | ||
2583 | +extern void xen_machine_kexec_setup_resources(void); | ||
2584 | +extern void xen_machine_kexec_register_resources(struct resource *res); | ||
2585 | +#endif | ||
2586 | extern asmlinkage long sys_kexec_load(unsigned long entry, | ||
2587 | unsigned long nr_segments, | ||
2588 | struct kexec_segment __user *segments, | ||
2589 | @@ -154,6 +167,10 @@ | ||
2590 | |||
2591 | #ifndef kexec_flush_icache_page | ||
2592 | #define kexec_flush_icache_page(page) | ||
2593 | +#endif | ||
2594 | + | ||
2595 | +#ifndef kexec_flush_icache_page | ||
2596 | +#define kexec_flush_icache_page(page) | ||
2597 | #endif | ||
2598 | |||
2599 | #define KEXEC_ON_CRASH 0x00000001 | ||
2600 | --- a/include/linux/mm.h | ||
2601 | +++ b/include/linux/mm.h | ||
2602 | @@ -100,6 +100,9 @@ | ||
2603 | #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ | ||
2604 | #define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ | ||
2605 | #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ | ||
2606 | +#ifdef CONFIG_XEN | ||
2607 | +#define VM_FOREIGN 0x00200000 /* Has pages belonging to another VM */ | ||
2608 | +#endif | ||
2609 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ | ||
2610 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ | ||
2611 | #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ | ||
2612 | @@ -172,6 +175,10 @@ | ||
2613 | /* notification that a previously read-only page is about to become | ||
2614 | * writable, if an error is returned it will cause a SIGBUS */ | ||
2615 | int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); | ||
2616 | + /* Area-specific function for clearing the PTE at @ptep. Returns the | ||
2617 | + * original value of @ptep. */ | ||
2618 | + pte_t (*zap_pte)(struct vm_area_struct *vma, | ||
2619 | + unsigned long addr, pte_t *ptep, int is_fullmm); | ||
2620 | #ifdef CONFIG_NUMA | ||
2621 | int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); | ||
2622 | struct mempolicy *(*get_policy)(struct vm_area_struct *vma, | ||
2623 | --- a/include/linux/oprofile.h | ||
2624 | +++ b/include/linux/oprofile.h | ||
2625 | @@ -16,6 +16,8 @@ | ||
2626 | #include <linux/types.h> | ||
2627 | #include <linux/spinlock.h> | ||
2628 | #include <asm/atomic.h> | ||
2629 | + | ||
2630 | +#include <xen/interface/xenoprof.h> | ||
2631 | |||
2632 | /* Each escaped entry is prefixed by ESCAPE_CODE | ||
2633 | * then one of the following codes, then the | ||
2634 | @@ -28,7 +30,7 @@ | ||
2635 | #define CPU_SWITCH_CODE 2 | ||
2636 | #define COOKIE_SWITCH_CODE 3 | ||
2637 | #define KERNEL_ENTER_SWITCH_CODE 4 | ||
2638 | -#define KERNEL_EXIT_SWITCH_CODE 5 | ||
2639 | +#define USER_ENTER_SWITCH_CODE 5 | ||
2640 | #define MODULE_LOADED_CODE 6 | ||
2641 | #define CTX_TGID_CODE 7 | ||
2642 | #define TRACE_BEGIN_CODE 8 | ||
2643 | @@ -36,6 +38,7 @@ | ||
2644 | #define XEN_ENTER_SWITCH_CODE 10 | ||
2645 | #define SPU_PROFILING_CODE 11 | ||
2646 | #define SPU_CTX_SWITCH_CODE 12 | ||
2647 | +#define DOMAIN_SWITCH_CODE 13 | ||
2648 | |||
2649 | struct super_block; | ||
2650 | struct dentry; | ||
2651 | @@ -47,6 +50,11 @@ | ||
2652 | /* create any necessary configuration files in the oprofile fs. | ||
2653 | * Optional. */ | ||
2654 | int (*create_files)(struct super_block * sb, struct dentry * root); | ||
2655 | + /* setup active domains with Xen */ | ||
2656 | + int (*set_active)(int *active_domains, unsigned int adomains); | ||
2657 | + /* setup passive domains with Xen */ | ||
2658 | + int (*set_passive)(int *passive_domains, unsigned int pdomains); | ||
2659 | + | ||
2660 | /* Do any necessary interrupt setup. Optional. */ | ||
2661 | int (*setup)(void); | ||
2662 | /* Do any necessary interrupt shutdown. Optional. */ | ||
2663 | @@ -113,6 +121,8 @@ | ||
2664 | /* add a backtrace entry, to be called from the ->backtrace callback */ | ||
2665 | void oprofile_add_trace(unsigned long eip); | ||
2666 | |||
2667 | +/* add a domain switch entry */ | ||
2668 | +int oprofile_add_domain_switch(int32_t domain_id); | ||
2669 | |||
2670 | /** | ||
2671 | * Create a file of the given name as a child of the given root, with | ||
2672 | --- a/include/linux/page-flags.h | ||
2673 | +++ b/include/linux/page-flags.h | ||
2674 | @@ -97,6 +97,8 @@ | ||
2675 | #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ | ||
2676 | #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ | ||
2677 | |||
2678 | +#define PG_foreign 20 /* Page is owned by foreign allocator. */ | ||
2679 | + | ||
2680 | #if (BITS_PER_LONG > 32) | ||
2681 | /* | ||
2682 | * 64-bit-only flags build down from bit 31 | ||
2683 | @@ -296,6 +298,19 @@ | ||
2684 | #define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) | ||
2685 | #define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags) | ||
2686 | |||
2687 | +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) | ||
2688 | +#define SetPageForeign(_page, dtor) do { \ | ||
2689 | + set_bit(PG_foreign, &(_page)->flags); \ | ||
2690 | + BUG_ON((dtor) == (void (*)(struct page *))0); \ | ||
2691 | + (_page)->index = (long)(dtor); \ | ||
2692 | +} while (0) | ||
2693 | +#define ClearPageForeign(page) do { \ | ||
2694 | + clear_bit(PG_foreign, &(page)->flags); \ | ||
2695 | + (page)->index = 0; \ | ||
2696 | +} while (0) | ||
2697 | +#define PageForeignDestructor(_page) \ | ||
2698 | + ((void (*)(struct page *))(_page)->index)(_page) | ||
2699 | + | ||
2700 | struct page; /* forward declaration */ | ||
2701 | |||
2702 | extern void cancel_dirty_page(struct page *page, unsigned int account_size); | ||
2703 | --- a/include/linux/sched.h | ||
2704 | +++ b/include/linux/sched.h | ||
2705 | @@ -290,6 +290,7 @@ | ||
2706 | extern void sched_show_task(struct task_struct *p); | ||
2707 | |||
2708 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
2709 | +extern unsigned long softlockup_get_next_event(void); | ||
2710 | extern void softlockup_tick(void); | ||
2711 | extern void spawn_softlockup_task(void); | ||
2712 | extern void touch_softlockup_watchdog(void); | ||
2713 | @@ -299,6 +300,10 @@ | ||
2714 | extern unsigned long sysctl_hung_task_timeout_secs; | ||
2715 | extern unsigned long sysctl_hung_task_warnings; | ||
2716 | #else | ||
2717 | +static inline unsigned long softlockup_get_next_event(void) | ||
2718 | +{ | ||
2719 | + return MAX_JIFFY_OFFSET; | ||
2720 | +} | ||
2721 | static inline void softlockup_tick(void) | ||
2722 | { | ||
2723 | } | ||
2724 | --- a/include/linux/skbuff.h | ||
2725 | +++ b/include/linux/skbuff.h | ||
2726 | @@ -217,6 +217,8 @@ | ||
2727 | * @local_df: allow local fragmentation | ||
2728 | * @cloned: Head may be cloned (check refcnt to be sure) | ||
2729 | * @nohdr: Payload reference only, must not modify header | ||
2730 | + * @proto_data_valid: Protocol data validated since arriving at localhost | ||
2731 | + * @proto_csum_blank: Protocol csum must be added before leaving localhost | ||
2732 | * @pkt_type: Packet class | ||
2733 | * @fclone: skbuff clone status | ||
2734 | * @ip_summed: Driver fed us an IP checksum | ||
2735 | @@ -310,7 +312,13 @@ | ||
2736 | __u16 tc_verd; /* traffic control verdict */ | ||
2737 | #endif | ||
2738 | #endif | ||
2739 | +#ifndef CONFIG_XEN | ||
2740 | /* 2 byte hole */ | ||
2741 | +#else | ||
2742 | + __u8 proto_data_valid:1, | ||
2743 | + proto_csum_blank:1; | ||
2744 | + /* 1 byte hole */ | ||
2745 | +#endif | ||
2746 | |||
2747 | #ifdef CONFIG_NET_DMA | ||
2748 | dma_cookie_t dma_cookie; | ||
2749 | --- a/include/linux/vermagic.h | ||
2750 | +++ b/include/linux/vermagic.h | ||
2751 | @@ -17,6 +17,11 @@ | ||
2752 | #else | ||
2753 | #define MODULE_VERMAGIC_MODULE_UNLOAD "" | ||
2754 | #endif | ||
2755 | +#ifdef CONFIG_XEN | ||
2756 | +#define MODULE_VERMAGIC_XEN "Xen " | ||
2757 | +#else | ||
2758 | +#define MODULE_VERMAGIC_XEN | ||
2759 | +#endif | ||
2760 | #ifndef MODULE_ARCH_VERMAGIC | ||
2761 | #define MODULE_ARCH_VERMAGIC "" | ||
2762 | #endif | ||
2763 | @@ -24,5 +29,6 @@ | ||
2764 | #define VERMAGIC_STRING \ | ||
2765 | UTS_RELEASE " " \ | ||
2766 | MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ | ||
2767 | - MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC | ||
2768 | + MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_XEN \ | ||
2769 | + MODULE_ARCH_VERMAGIC | ||
2770 | |||
2771 | --- a/kernel/irq/spurious.c | ||
2772 | +++ b/kernel/irq/spurious.c | ||
2773 | @@ -182,7 +182,7 @@ | ||
2774 | */ | ||
2775 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) | ||
2776 | desc->irqs_unhandled = 1; | ||
2777 | - else | ||
2778 | + else if (!irq_ignore_unhandled(irq)) | ||
2779 | desc->irqs_unhandled++; | ||
2780 | desc->last_unhandled = jiffies; | ||
2781 | if (unlikely(action_ret != IRQ_NONE)) | ||
2782 | --- a/kernel/kexec.c | ||
2783 | +++ b/kernel/kexec.c | ||
2784 | @@ -340,13 +340,26 @@ | ||
2785 | return 0; | ||
2786 | } | ||
2787 | |||
2788 | -static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | ||
2789 | +static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit) | ||
2790 | { | ||
2791 | struct page *pages; | ||
2792 | |||
2793 | pages = alloc_pages(gfp_mask, order); | ||
2794 | if (pages) { | ||
2795 | unsigned int count, i; | ||
2796 | +#ifdef CONFIG_XEN | ||
2797 | + int address_bits; | ||
2798 | + | ||
2799 | + if (limit == ~0UL) | ||
2800 | + address_bits = BITS_PER_LONG; | ||
2801 | + else | ||
2802 | + address_bits = long_log2(limit); | ||
2803 | + | ||
2804 | + if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) { | ||
2805 | + __free_pages(pages, order); | ||
2806 | + return NULL; | ||
2807 | + } | ||
2808 | +#endif | ||
2809 | pages->mapping = NULL; | ||
2810 | set_page_private(pages, order); | ||
2811 | count = 1 << order; | ||
2812 | @@ -365,6 +378,9 @@ | ||
2813 | count = 1 << order; | ||
2814 | for (i = 0; i < count; i++) | ||
2815 | ClearPageReserved(page + i); | ||
2816 | +#ifdef CONFIG_XEN | ||
2817 | + xen_destroy_contiguous_region((unsigned long)page_address(page), order); | ||
2818 | +#endif | ||
2819 | __free_pages(page, order); | ||
2820 | } | ||
2821 | |||
2822 | @@ -410,10 +426,10 @@ | ||
2823 | do { | ||
2824 | unsigned long pfn, epfn, addr, eaddr; | ||
2825 | |||
2826 | - pages = kimage_alloc_pages(GFP_KERNEL, order); | ||
2827 | + pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT); | ||
2828 | if (!pages) | ||
2829 | break; | ||
2830 | - pfn = page_to_pfn(pages); | ||
2831 | + pfn = kexec_page_to_pfn(pages); | ||
2832 | epfn = pfn + count; | ||
2833 | addr = pfn << PAGE_SHIFT; | ||
2834 | eaddr = epfn << PAGE_SHIFT; | ||
2835 | @@ -447,6 +463,7 @@ | ||
2836 | return pages; | ||
2837 | } | ||
2838 | |||
2839 | +#ifndef CONFIG_XEN | ||
2840 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | ||
2841 | unsigned int order) | ||
2842 | { | ||
2843 | @@ -500,7 +517,7 @@ | ||
2844 | } | ||
2845 | /* If I don't overlap any segments I have found my hole! */ | ||
2846 | if (i == image->nr_segments) { | ||
2847 | - pages = pfn_to_page(hole_start >> PAGE_SHIFT); | ||
2848 | + pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT); | ||
2849 | break; | ||
2850 | } | ||
2851 | } | ||
2852 | @@ -527,6 +544,13 @@ | ||
2853 | |||
2854 | return pages; | ||
2855 | } | ||
2856 | +#else /* !CONFIG_XEN */ | ||
2857 | +struct page *kimage_alloc_control_pages(struct kimage *image, | ||
2858 | + unsigned int order) | ||
2859 | +{ | ||
2860 | + return kimage_alloc_normal_control_pages(image, order); | ||
2861 | +} | ||
2862 | +#endif | ||
2863 | |||
2864 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) | ||
2865 | { | ||
2866 | @@ -542,7 +566,7 @@ | ||
2867 | return -ENOMEM; | ||
2868 | |||
2869 | ind_page = page_address(page); | ||
2870 | - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; | ||
2871 | + *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION; | ||
2872 | image->entry = ind_page; | ||
2873 | image->last_entry = ind_page + | ||
2874 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); | ||
2875 | @@ -603,13 +627,13 @@ | ||
2876 | #define for_each_kimage_entry(image, ptr, entry) \ | ||
2877 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ | ||
2878 | ptr = (entry & IND_INDIRECTION)? \ | ||
2879 | - phys_to_virt((entry & PAGE_MASK)): ptr +1) | ||
2880 | + kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1) | ||
2881 | |||
2882 | static void kimage_free_entry(kimage_entry_t entry) | ||
2883 | { | ||
2884 | struct page *page; | ||
2885 | |||
2886 | - page = pfn_to_page(entry >> PAGE_SHIFT); | ||
2887 | + page = kexec_pfn_to_page(entry >> PAGE_SHIFT); | ||
2888 | kimage_free_pages(page); | ||
2889 | } | ||
2890 | |||
2891 | @@ -621,6 +645,10 @@ | ||
2892 | if (!image) | ||
2893 | return; | ||
2894 | |||
2895 | +#ifdef CONFIG_XEN | ||
2896 | + xen_machine_kexec_unload(image); | ||
2897 | +#endif | ||
2898 | + | ||
2899 | kimage_free_extra_pages(image); | ||
2900 | for_each_kimage_entry(image, ptr, entry) { | ||
2901 | if (entry & IND_INDIRECTION) { | ||
2902 | @@ -696,7 +724,7 @@ | ||
2903 | * have a match. | ||
2904 | */ | ||
2905 | list_for_each_entry(page, &image->dest_pages, lru) { | ||
2906 | - addr = page_to_pfn(page) << PAGE_SHIFT; | ||
2907 | + addr = kexec_page_to_pfn(page) << PAGE_SHIFT; | ||
2908 | if (addr == destination) { | ||
2909 | list_del(&page->lru); | ||
2910 | return page; | ||
2911 | @@ -707,16 +735,16 @@ | ||
2912 | kimage_entry_t *old; | ||
2913 | |||
2914 | /* Allocate a page, if we run out of memory give up */ | ||
2915 | - page = kimage_alloc_pages(gfp_mask, 0); | ||
2916 | + page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT); | ||
2917 | if (!page) | ||
2918 | return NULL; | ||
2919 | /* If the page cannot be used file it away */ | ||
2920 | - if (page_to_pfn(page) > | ||
2921 | + if (kexec_page_to_pfn(page) > | ||
2922 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { | ||
2923 | list_add(&page->lru, &image->unuseable_pages); | ||
2924 | continue; | ||
2925 | } | ||
2926 | - addr = page_to_pfn(page) << PAGE_SHIFT; | ||
2927 | + addr = kexec_page_to_pfn(page) << PAGE_SHIFT; | ||
2928 | |||
2929 | /* If it is the destination page we want use it */ | ||
2930 | if (addr == destination) | ||
2931 | @@ -739,7 +767,7 @@ | ||
2932 | struct page *old_page; | ||
2933 | |||
2934 | old_addr = *old & PAGE_MASK; | ||
2935 | - old_page = pfn_to_page(old_addr >> PAGE_SHIFT); | ||
2936 | + old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT); | ||
2937 | copy_highpage(page, old_page); | ||
2938 | *old = addr | (*old & ~PAGE_MASK); | ||
2939 | |||
2940 | @@ -789,7 +817,7 @@ | ||
2941 | result = -ENOMEM; | ||
2942 | goto out; | ||
2943 | } | ||
2944 | - result = kimage_add_page(image, page_to_pfn(page) | ||
2945 | + result = kimage_add_page(image, kexec_page_to_pfn(page) | ||
2946 | << PAGE_SHIFT); | ||
2947 | if (result < 0) | ||
2948 | goto out; | ||
2949 | @@ -821,6 +849,7 @@ | ||
2950 | return result; | ||
2951 | } | ||
2952 | |||
2953 | +#ifndef CONFIG_XEN | ||
2954 | static int kimage_load_crash_segment(struct kimage *image, | ||
2955 | struct kexec_segment *segment) | ||
2956 | { | ||
2957 | @@ -843,7 +872,7 @@ | ||
2958 | char *ptr; | ||
2959 | size_t uchunk, mchunk; | ||
2960 | |||
2961 | - page = pfn_to_page(maddr >> PAGE_SHIFT); | ||
2962 | + page = kexec_pfn_to_page(maddr >> PAGE_SHIFT); | ||
2963 | if (!page) { | ||
2964 | result = -ENOMEM; | ||
2965 | goto out; | ||
2966 | @@ -892,6 +921,13 @@ | ||
2967 | |||
2968 | return result; | ||
2969 | } | ||
2970 | +#else /* CONFIG_XEN */ | ||
2971 | +static int kimage_load_segment(struct kimage *image, | ||
2972 | + struct kexec_segment *segment) | ||
2973 | +{ | ||
2974 | + return kimage_load_normal_segment(image, segment); | ||
2975 | +} | ||
2976 | +#endif | ||
2977 | |||
2978 | /* | ||
2979 | * Exec Kernel system call: for obvious reasons only root may call it. | ||
2980 | @@ -1002,6 +1038,13 @@ | ||
2981 | if (result) | ||
2982 | goto out; | ||
2983 | } | ||
2984 | +#ifdef CONFIG_XEN | ||
2985 | + if (image) { | ||
2986 | + result = xen_machine_kexec_load(image); | ||
2987 | + if (result) | ||
2988 | + goto out; | ||
2989 | + } | ||
2990 | +#endif | ||
2991 | /* Install the new kernel, and Uninstall the old */ | ||
2992 | image = xchg(dest_image, image); | ||
2993 | |||
2994 | --- a/kernel/softlockup.c | ||
2995 | +++ b/kernel/softlockup.c | ||
2996 | @@ -39,6 +39,19 @@ | ||
2997 | .notifier_call = softlock_panic, | ||
2998 | }; | ||
2999 | |||
3000 | +unsigned long softlockup_get_next_event(void) | ||
3001 | +{ | ||
3002 | + int this_cpu = smp_processor_id(); | ||
3003 | + unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); | ||
3004 | + | ||
3005 | + if (per_cpu(print_timestamp, this_cpu) == touch_timestamp || | ||
3006 | + did_panic || | ||
3007 | + !per_cpu(watchdog_task, this_cpu)) | ||
3008 | + return MAX_JIFFY_OFFSET; | ||
3009 | + | ||
3010 | + return max_t(long, 0, touch_timestamp + HZ - jiffies); | ||
3011 | +} | ||
3012 | + | ||
3013 | /* | ||
3014 | * Returns seconds, approximately. We don't need nanosecond | ||
3015 | * resolution, and we don't need to waste time with a big divide when | ||
3016 | --- a/kernel/sysctl.c | ||
3017 | +++ b/kernel/sysctl.c | ||
3018 | @@ -742,7 +742,7 @@ | ||
3019 | .proc_handler = &proc_dointvec, | ||
3020 | }, | ||
3021 | #endif | ||
3022 | -#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) | ||
3023 | +#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP) | ||
3024 | { | ||
3025 | .procname = "acpi_video_flags", | ||
3026 | .data = &acpi_realmode_flags, | ||
3027 | --- a/kernel/timer.c | ||
3028 | +++ b/kernel/timer.c | ||
3029 | @@ -802,7 +802,7 @@ | ||
3030 | unsigned long get_next_timer_interrupt(unsigned long now) | ||
3031 | { | ||
3032 | struct tvec_base *base = __get_cpu_var(tvec_bases); | ||
3033 | - unsigned long expires; | ||
3034 | + unsigned long expires, sl_next; | ||
3035 | |||
3036 | spin_lock(&base->lock); | ||
3037 | expires = __next_timer_interrupt(base); | ||
3038 | @@ -811,7 +811,11 @@ | ||
3039 | if (time_before_eq(expires, now)) | ||
3040 | return now; | ||
3041 | |||
3042 | - return cmp_next_hrtimer_event(now, expires); | ||
3043 | + expires = cmp_next_hrtimer_event(now, expires); | ||
3044 | + sl_next = softlockup_get_next_event(); | ||
3045 | + | ||
3046 | + return expires <= now || expires - now < sl_next | ||
3047 | + ? expires : now + sl_next; | ||
3048 | } | ||
3049 | |||
3050 | #ifdef CONFIG_NO_IDLE_HZ | ||
3051 | --- a/mm/memory.c | ||
3052 | +++ b/mm/memory.c | ||
3053 | @@ -402,6 +402,12 @@ | ||
3054 | return NULL; | ||
3055 | } | ||
3056 | |||
3057 | +#if defined(CONFIG_XEN) && defined(CONFIG_X86) | ||
3058 | + /* XEN: Covers user-space grant mappings (even of local pages). */ | ||
3059 | + if (unlikely(vma->vm_flags & VM_FOREIGN)) | ||
3060 | + return NULL; | ||
3061 | +#endif | ||
3062 | + | ||
3063 | #ifdef CONFIG_DEBUG_VM | ||
3064 | /* | ||
3065 | * Add some anal sanity checks for now. Eventually, | ||
3066 | @@ -410,7 +416,8 @@ | ||
3067 | * and that the resulting page looks ok. | ||
3068 | */ | ||
3069 | if (unlikely(!pfn_valid(pfn))) { | ||
3070 | - print_bad_pte(vma, pte, addr); | ||
3071 | + if (!(vma->vm_flags & VM_RESERVED)) | ||
3072 | + print_bad_pte(vma, pte, addr); | ||
3073 | return NULL; | ||
3074 | } | ||
3075 | #endif | ||
3076 | @@ -668,8 +675,12 @@ | ||
3077 | page->index > details->last_index)) | ||
3078 | continue; | ||
3079 | } | ||
3080 | - ptent = ptep_get_and_clear_full(mm, addr, pte, | ||
3081 | - tlb->fullmm); | ||
3082 | + if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte)) | ||
3083 | + ptent = vma->vm_ops->zap_pte(vma, addr, pte, | ||
3084 | + tlb->fullmm); | ||
3085 | + else | ||
3086 | + ptent = ptep_get_and_clear_full(mm, addr, pte, | ||
3087 | + tlb->fullmm); | ||
3088 | tlb_remove_tlb_entry(tlb, pte, addr); | ||
3089 | if (unlikely(!page)) | ||
3090 | continue; | ||
3091 | @@ -902,6 +913,7 @@ | ||
3092 | tlb_finish_mmu(tlb, address, end); | ||
3093 | return end; | ||
3094 | } | ||
3095 | +EXPORT_SYMBOL(zap_page_range); | ||
3096 | |||
3097 | /* | ||
3098 | * Do a quick page-table lookup for a single page. | ||
3099 | @@ -1043,6 +1055,26 @@ | ||
3100 | continue; | ||
3101 | } | ||
3102 | |||
3103 | +#ifdef CONFIG_XEN | ||
3104 | + if (vma && (vma->vm_flags & VM_FOREIGN)) { | ||
3105 | + struct page **map = vma->vm_private_data; | ||
3106 | + int offset = (start - vma->vm_start) >> PAGE_SHIFT; | ||
3107 | + if (map[offset] != NULL) { | ||
3108 | + if (pages) { | ||
3109 | + struct page *page = map[offset]; | ||
3110 | + | ||
3111 | + pages[i] = page; | ||
3112 | + get_page(page); | ||
3113 | + } | ||
3114 | + if (vmas) | ||
3115 | + vmas[i] = vma; | ||
3116 | + i++; | ||
3117 | + start += PAGE_SIZE; | ||
3118 | + len--; | ||
3119 | + continue; | ||
3120 | + } | ||
3121 | + } | ||
3122 | +#endif | ||
3123 | if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
3124 | || !(vm_flags & vma->vm_flags)) | ||
3125 | return i ? : -EFAULT; | ||
3126 | --- a/mm/mprotect.c | ||
3127 | +++ b/mm/mprotect.c | ||
3128 | @@ -86,6 +86,8 @@ | ||
3129 | next = pmd_addr_end(addr, end); | ||
3130 | if (pmd_none_or_clear_bad(pmd)) | ||
3131 | continue; | ||
3132 | + if (arch_change_pte_range(mm, pmd, addr, next, newprot)) | ||
3133 | + continue; | ||
3134 | change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); | ||
3135 | } while (pmd++, addr = next, addr != end); | ||
3136 | } | ||
3137 | --- a/mm/page_alloc.c | ||
3138 | +++ b/mm/page_alloc.c | ||
3139 | @@ -245,7 +245,11 @@ | ||
3140 | 1 << PG_slab | | ||
3141 | 1 << PG_swapcache | | ||
3142 | 1 << PG_writeback | | ||
3143 | - 1 << PG_buddy ); | ||
3144 | + 1 << PG_buddy | | ||
3145 | +#ifdef CONFIG_X86_XEN | ||
3146 | + 1 << PG_pinned | | ||
3147 | +#endif | ||
3148 | + 1 << PG_foreign ); | ||
3149 | set_page_count(page, 0); | ||
3150 | reset_page_mapcount(page); | ||
3151 | page->mapping = NULL; | ||
3152 | @@ -471,7 +475,11 @@ | ||
3153 | 1 << PG_swapcache | | ||
3154 | 1 << PG_writeback | | ||
3155 | 1 << PG_reserved | | ||
3156 | - 1 << PG_buddy )))) | ||
3157 | + 1 << PG_buddy | | ||
3158 | +#ifdef CONFIG_X86_XEN | ||
3159 | + 1 << PG_pinned | | ||
3160 | +#endif | ||
3161 | + 1 << PG_foreign )))) | ||
3162 | bad_page(page); | ||
3163 | if (PageDirty(page)) | ||
3164 | __ClearPageDirty(page); | ||
3165 | @@ -527,6 +535,12 @@ | ||
3166 | int i; | ||
3167 | int reserved = 0; | ||
3168 | |||
3169 | +#ifdef CONFIG_XEN | ||
3170 | + if (PageForeign(page)) { | ||
3171 | + PageForeignDestructor(page); | ||
3172 | + return; | ||
3173 | + } | ||
3174 | +#endif | ||
3175 | for (i = 0 ; i < (1 << order) ; ++i) | ||
3176 | reserved += free_pages_check(page + i); | ||
3177 | if (reserved) | ||
3178 | @@ -622,7 +636,11 @@ | ||
3179 | 1 << PG_swapcache | | ||
3180 | 1 << PG_writeback | | ||
3181 | 1 << PG_reserved | | ||
3182 | - 1 << PG_buddy )))) | ||
3183 | + 1 << PG_buddy | | ||
3184 | +#ifdef CONFIG_X86_XEN | ||
3185 | + 1 << PG_pinned | | ||
3186 | +#endif | ||
3187 | + 1 << PG_foreign )))) | ||
3188 | bad_page(page); | ||
3189 | |||
3190 | /* | ||
3191 | @@ -990,6 +1008,12 @@ | ||
3192 | struct per_cpu_pages *pcp; | ||
3193 | unsigned long flags; | ||
3194 | |||
3195 | +#ifdef CONFIG_XEN | ||
3196 | + if (PageForeign(page)) { | ||
3197 | + PageForeignDestructor(page); | ||
3198 | + return; | ||
3199 | + } | ||
3200 | +#endif | ||
3201 | if (PageAnon(page)) | ||
3202 | page->mapping = NULL; | ||
3203 | if (free_pages_check(page)) | ||
3204 | --- a/net/core/dev.c | ||
3205 | +++ b/net/core/dev.c | ||
3206 | @@ -122,6 +122,12 @@ | ||
3207 | |||
3208 | #include "net-sysfs.h" | ||
3209 | |||
3210 | +#ifdef CONFIG_XEN | ||
3211 | +#include <net/ip.h> | ||
3212 | +#include <linux/tcp.h> | ||
3213 | +#include <linux/udp.h> | ||
3214 | +#endif | ||
3215 | + | ||
3216 | /* | ||
3217 | * The list of packet types we will receive (as opposed to discard) | ||
3218 | * and the routines to invoke. | ||
3219 | @@ -1580,6 +1586,42 @@ | ||
3220 | return 0; | ||
3221 | } | ||
3222 | |||
3223 | +#ifdef CONFIG_XEN | ||
3224 | +inline int skb_checksum_setup(struct sk_buff *skb) | ||
3225 | +{ | ||
3226 | + if (skb->proto_csum_blank) { | ||
3227 | + if (skb->protocol != htons(ETH_P_IP)) | ||
3228 | + goto out; | ||
3229 | + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; | ||
3230 | + if (skb->h.raw >= skb->tail) | ||
3231 | + goto out; | ||
3232 | + switch (skb->nh.iph->protocol) { | ||
3233 | + case IPPROTO_TCP: | ||
3234 | + skb->csum = offsetof(struct tcphdr, check); | ||
3235 | + break; | ||
3236 | + case IPPROTO_UDP: | ||
3237 | + skb->csum = offsetof(struct udphdr, check); | ||
3238 | + break; | ||
3239 | + default: | ||
3240 | + if (net_ratelimit()) | ||
3241 | + printk(KERN_ERR "Attempting to checksum a non-" | ||
3242 | + "TCP/UDP packet, dropping a protocol" | ||
3243 | + " %d packet", skb->nh.iph->protocol); | ||
3244 | + goto out; | ||
3245 | + } | ||
3246 | + if ((skb->h.raw + skb->csum + 2) > skb->tail) | ||
3247 | + goto out; | ||
3248 | + skb->ip_summed = CHECKSUM_HW; | ||
3249 | + skb->proto_csum_blank = 0; | ||
3250 | + } | ||
3251 | + return 0; | ||
3252 | +out: | ||
3253 | + return -EPROTO; | ||
3254 | +} | ||
3255 | +#else | ||
3256 | +inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } | ||
3257 | +#endif | ||
3258 | + | ||
3259 | /** | ||
3260 | * dev_queue_xmit - transmit a buffer | ||
3261 | * @skb: buffer to transmit | ||
3262 | @@ -1612,6 +1654,12 @@ | ||
3263 | struct Qdisc *q; | ||
3264 | int rc = -ENOMEM; | ||
3265 | |||
3266 | + /* If a checksum-deferred packet is forwarded to a device that needs a | ||
3267 | + * checksum, correct the pointers and force checksumming. | ||
3268 | + */ | ||
3269 | + if (skb_checksum_setup(skb)) | ||
3270 | + goto out_kfree_skb; | ||
3271 | + | ||
3272 | /* GSO will handle the following emulations directly. */ | ||
3273 | if (netif_needs_gso(dev, skb)) | ||
3274 | goto gso; | ||
3275 | @@ -2062,6 +2110,19 @@ | ||
3276 | } | ||
3277 | #endif | ||
3278 | |||
3279 | +#ifdef CONFIG_XEN | ||
3280 | + switch (skb->ip_summed) { | ||
3281 | + case CHECKSUM_UNNECESSARY: | ||
3282 | + skb->proto_data_valid = 1; | ||
3283 | + break; | ||
3284 | + case CHECKSUM_HW: | ||
3285 | + /* XXX Implement me. */ | ||
3286 | + default: | ||
3287 | + skb->proto_data_valid = 0; | ||
3288 | + break; | ||
3289 | + } | ||
3290 | +#endif | ||
3291 | + | ||
3292 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
3293 | if (!ptype->dev || ptype->dev == skb->dev) { | ||
3294 | if (pt_prev) | ||
3295 | @@ -4587,6 +4648,7 @@ | ||
3296 | EXPORT_SYMBOL(net_enable_timestamp); | ||
3297 | EXPORT_SYMBOL(net_disable_timestamp); | ||
3298 | EXPORT_SYMBOL(dev_get_flags); | ||
3299 | +EXPORT_SYMBOL(skb_checksum_setup); | ||
3300 | |||
3301 | #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) | ||
3302 | EXPORT_SYMBOL(br_handle_frame_hook); | ||
3303 | --- a/net/core/skbuff.c | ||
3304 | +++ b/net/core/skbuff.c | ||
3305 | @@ -454,6 +454,10 @@ | ||
3306 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; | ||
3307 | n->cloned = 1; | ||
3308 | n->nohdr = 0; | ||
3309 | +#ifdef CONFIG_XEN | ||
3310 | + C(proto_data_valid); | ||
3311 | + C(proto_csum_blank); | ||
3312 | +#endif | ||
3313 | n->destructor = NULL; | ||
3314 | C(iif); | ||
3315 | C(tail); | ||
3316 | --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c | ||
3317 | +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c | ||
3318 | @@ -132,6 +132,9 @@ | ||
3319 | if (hdrsize < sizeof(*hdr)) | ||
3320 | return 1; | ||
3321 | |||
3322 | + if (skb_checksum_setup(skb)) | ||
3323 | + return 0; | ||
3324 | + | ||
3325 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | ||
3326 | inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); | ||
3327 | return 1; | ||
3328 | --- a/net/ipv4/netfilter/nf_nat_proto_udp.c | ||
3329 | +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c | ||
3330 | @@ -116,6 +116,10 @@ | ||
3331 | newport = tuple->dst.u.udp.port; | ||
3332 | portptr = &hdr->dest; | ||
3333 | } | ||
3334 | + | ||
3335 | + if (skb_checksum_setup(skb)) | ||
3336 | + return 0; | ||
3337 | + | ||
3338 | if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { | ||
3339 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | ||
3340 | inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, | ||
3341 | --- a/net/ipv4/xfrm4_output.c | ||
3342 | +++ b/net/ipv4/xfrm4_output.c | ||
3343 | @@ -81,7 +81,7 @@ | ||
3344 | #endif | ||
3345 | |||
3346 | skb->protocol = htons(ETH_P_IP); | ||
3347 | - return xfrm_output(skb); | ||
3348 | + return skb_checksum_setup(skb) ?: xfrm_output(skb); | ||
3349 | } | ||
3350 | |||
3351 | int xfrm4_output(struct sk_buff *skb) | ||
3352 | --- a/scripts/Makefile.build | ||
3353 | +++ b/scripts/Makefile.build | ||
3354 | @@ -73,6 +73,20 @@ | ||
3355 | $(warning kbuild: Makefile.build is included improperly) | ||
3356 | endif | ||
3357 | |||
3358 | +ifeq ($(CONFIG_XEN),y) | ||
3359 | +$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build | ||
3360 | + @echo ' Updating $@' | ||
3361 | + $(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\ | ||
3362 | + ,$(error 'Your awk program does not define gensub. Use gawk or another awk with gensub')) | ||
3363 | + @$(AWK) -f $< $(filter-out $<,$^) >$@ | ||
3364 | + | ||
3365 | +xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c)))) | ||
3366 | +xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o) | ||
3367 | +single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m)) | ||
3368 | + | ||
3369 | +-include $(objtree)/scripts/Makefile.xen | ||
3370 | +endif | ||
3371 | + | ||
3372 | # =========================================================================== | ||
3373 | |||
3374 | ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),) | ||
3375 | --- a/scripts/Makefile.lib | ||
3376 | +++ b/scripts/Makefile.lib | ||
3377 | @@ -17,6 +17,12 @@ | ||
3378 | |||
3379 | lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m))) | ||
3380 | |||
3381 | +# Remove objects forcibly disabled | ||
3382 | + | ||
3383 | +obj-y := $(filter-out $(disabled-obj-y),$(obj-y)) | ||
3384 | +obj-m := $(filter-out $(disabled-obj-y),$(obj-m)) | ||
3385 | +lib-y := $(filter-out $(disabled-obj-y),$(lib-y)) | ||
3386 | + | ||
3387 | |||
3388 | # Handle objects in subdirs | ||
3389 | # --------------------------------------------------------------------------- |