From: www.kernel.org Subject: Linux 2.6.21 Patch-mainline: 2.6.21 Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py Acked-by: jbeulich@novell.com --- arch/x86/Kconfig | 4 arch/x86/ia32/ia32entry-xen.S | 5 arch/x86/kernel/Makefile | 4 arch/x86/kernel/acpi/sleep_64-xen.c | 6 arch/x86/kernel/apic_32-xen.c | 65 ---- arch/x86/kernel/cpu/common-xen.c | 14 arch/x86/kernel/e820_32-xen.c | 18 - arch/x86/kernel/e820_64-xen.c | 40 ++ arch/x86/kernel/entry_32-xen.S | 80 +++-- arch/x86/kernel/entry_64-xen.S | 3 arch/x86/kernel/genapic_64-xen.c | 4 arch/x86/kernel/head64-xen.c | 8 arch/x86/kernel/head_32-xen.S | 9 arch/x86/kernel/io_apic_32-xen.c | 43 +- arch/x86/kernel/io_apic_64-xen.c | 413 +++++++++++++------------- arch/x86/kernel/irq_32-xen.c | 22 + arch/x86/kernel/irq_64-xen.c | 13 arch/x86/kernel/microcode-xen.c | 2 arch/x86/kernel/mpparse_32-xen.c | 4 arch/x86/kernel/mpparse_64-xen.c | 6 arch/x86/kernel/pci-dma_32-xen.c | 2 arch/x86/kernel/pci-swiotlb_64-xen.c | 2 arch/x86/kernel/pcspeaker.c | 5 arch/x86/kernel/process_32-xen.c | 42 +- arch/x86/kernel/process_64-xen.c | 13 arch/x86/kernel/setup_32-xen.c | 46 -- arch/x86/kernel/setup_64-xen.c | 184 +---------- arch/x86/kernel/smp_32-xen.c | 5 arch/x86/kernel/time_32-xen.c | 275 +---------------- arch/x86/kernel/traps_32-xen.c | 27 + arch/x86/kernel/vsyscall_64-xen.c | 127 ++++--- arch/x86/mm/fault_32-xen.c | 44 -- arch/x86/mm/fault_64-xen.c | 39 -- arch/x86/mm/highmem_32-xen.c | 9 arch/x86/mm/init_32-xen.c | 2 arch/x86/mm/init_64-xen.c | 24 + arch/x86/mm/pageattr_64-xen.c | 6 arch/x86/mm/pgtable_32-xen.c | 28 + drivers/char/tpm/tpm_xen.c | 5 drivers/xen/balloon/sysfs.c | 1 drivers/xen/blkback/xenbus.c | 4 drivers/xen/blkfront/blkfront.c | 1 drivers/xen/blktap/xenbus.c | 4 drivers/xen/core/evtchn.c | 4 drivers/xen/core/smpboot.c | 18 - drivers/xen/fbfront/xenfb.c | 1 drivers/xen/fbfront/xenkbd.c | 1 drivers/xen/netback/xenbus.c | 4 drivers/xen/netfront/netfront.c | 49 +-- drivers/xen/pciback/xenbus.c | 1 drivers/xen/pcifront/xenbus.c | 1 drivers/xen/tpmback/common.h | 4 drivers/xen/tpmback/interface.c | 5 drivers/xen/tpmback/tpmback.c | 16 - drivers/xen/tpmback/xenbus.c | 5 drivers/xen/xenbus/xenbus_probe.c | 17 - drivers/xen/xenbus/xenbus_probe.h | 4 drivers/xen/xenbus/xenbus_probe_backend.c | 8 include/asm-x86/i8253.h | 4 include/asm-x86/mach-xen/asm/desc_32.h | 2 include/asm-x86/mach-xen/asm/dma-mapping_64.h | 4 include/asm-x86/mach-xen/asm/e820_64.h | 2 include/asm-x86/mach-xen/asm/hw_irq_64.h | 33 +- include/asm-x86/mach-xen/asm/hypervisor.h | 2 include/asm-x86/mach-xen/asm/io_32.h | 6 include/asm-x86/mach-xen/asm/io_64.h | 8 include/asm-x86/mach-xen/asm/mmu_context_32.h | 10 include/asm-x86/mach-xen/asm/pgalloc_32.h | 21 + include/asm-x86/mach-xen/asm/pgtable_32.h | 25 + include/asm-x86/mach-xen/asm/pgtable_64.h | 9 include/asm-x86/mach-xen/asm/processor_32.h | 6 include/asm-x86/mach-xen/asm/segment_32.h | 23 + include/asm-x86/mach-xen/asm/smp_32.h | 5 include/asm-x86/mach-xen/asm/smp_64.h | 3 include/xen/xenbus.h | 24 + lib/swiotlb-xen.c | 19 - 76 files changed, 889 insertions(+), 1113 deletions(-) --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -48,13 +48,15 @@ config CLOCKSOURCE_WATCHDOG def_bool y + depends on !X86_XEN config GENERIC_CLOCKEVENTS def_bool y + depends on !X86_XEN config GENERIC_CLOCKEVENTS_BROADCAST def_bool y - depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN) config LOCKDEP_SUPPORT def_bool y --- a/arch/x86/ia32/ia32entry-xen.S +++ b/arch/x86/ia32/ia32entry-xen.S @@ -465,7 +465,7 @@ .quad sys32_vm86_warning /* vm86old */ .quad compat_sys_wait4 .quad sys_swapoff /* 115 */ - .quad sys32_sysinfo + .quad compat_sys_sysinfo .quad sys32_ipc .quad sys_fsync .quad stub32_sigreturn @@ -510,7 +510,7 @@ .quad sys_sched_yield .quad sys_sched_get_priority_max .quad sys_sched_get_priority_min /* 160 */ - .quad sys_sched_rr_get_interval + .quad sys32_sched_rr_get_interval .quad compat_sys_nanosleep .quad sys_mremap .quad sys_setresuid16 @@ -668,4 +668,5 @@ .quad compat_sys_vmsplice .quad compat_sys_move_pages .quad sys_getcpu + .quad sys_epoll_pwait ia32_syscall_end: --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -104,6 +104,6 @@ pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o endif -disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \ - smpboot_$(BITS).o tsc_$(BITS).o +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ + smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := --- a/arch/x86/kernel/acpi/sleep_64-xen.c +++ b/arch/x86/kernel/acpi/sleep_64-xen.c @@ -59,7 +59,7 @@ unsigned long acpi_video_flags; extern char wakeup_start, wakeup_end; -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); +extern unsigned long acpi_copy_wakeup_routine(unsigned long); static pgd_t low_ptr; @@ -67,8 +67,10 @@ { pgd_t *slot0 = pgd_offset(current->mm, 0UL); low_ptr = *slot0; + /* FIXME: We're playing with the current task's page tables here, which + * is potentially dangerous on SMP systems. + */ set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); - WARN_ON(num_online_cpus() != 1); local_flush_tlb(); } #endif --- a/arch/x86/kernel/apic_32-xen.c +++ b/arch/x86/kernel/apic_32-xen.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -56,83 +58,26 @@ */ /* - * Debug level + * Debug level, exported for io_apic.c */ int apic_verbosity; #ifndef CONFIG_XEN static int modern_apic(void) { - unsigned int lvr, version; /* AMD systems use old APIC versions, so check the CPU */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) + boot_cpu_data.x86 >= 0xf) return 1; - lvr = apic_read(APIC_LVR); - version = GET_APIC_VERSION(lvr); - return version >= 0x14; + return lapic_get_version() >= 0x14; } #endif /* !CONFIG_XEN */ -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk("unexpected IRQ trap at vector %02x\n", irq); - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - if (cpu_has_apic) - ack_APIC_irq(); -} - int get_physical_broadcast(void) { return 0xff; } -#ifndef CONFIG_XEN -#ifndef CONFIG_SMP -static void up_apic_timer_interrupt_call(void) -{ - int cpu = smp_processor_id(); - - /* - * the NMI deadlock-detector uses this. - */ - per_cpu(irq_stat, cpu).apic_timer_irqs++; - - smp_local_timer_interrupt(); -} -#endif - -void smp_send_timer_broadcast_ipi(void) -{ - cpumask_t mask; - - cpus_and(mask, cpu_online_map, timer_bcast_ipi); - if (!cpus_empty(mask)) { -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#else - /* - * We can directly call the apic timer interrupt handler - * in UP case. Minus all irq related functions - */ - up_apic_timer_interrupt_call(); -#endif - } -} -#endif - int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; --- a/arch/x86/kernel/cpu/common-xen.c +++ b/arch/x86/kernel/cpu/common-xen.c @@ -610,7 +610,7 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); - regs->xgs = __KERNEL_PDA; + regs->xfs = __KERNEL_PDA; return regs; } @@ -667,12 +667,12 @@ .pcurrent = &init_task, }; -static inline void set_kernel_gs(void) +static inline void set_kernel_fs(void) { - /* Set %gs for this CPU's PDA. Memory clobber is to create a + /* Set %fs for this CPU's PDA. Memory clobber is to create a barrier with respect to any PDA operations, so the compiler doesn't move any before here. */ - asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); } /* Initialize the CPU's GDT and PDA. The boot CPU does this for @@ -730,7 +730,7 @@ } BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); - set_kernel_gs(); + set_kernel_fs(); } /* Common CPU init for both boot and secondary CPUs */ @@ -775,8 +775,8 @@ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %fs. */ - asm volatile ("mov %0, %%fs" : : "r" (0)); + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); --- a/arch/x86/kernel/e820_32-xen.c +++ b/arch/x86/kernel/e820_32-xen.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #ifdef CONFIG_EFI @@ -157,21 +158,22 @@ .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -static int romsignature(const unsigned char *x) +#define ROMSIGNATURE 0xaa55 + +static int __init romsignature(const unsigned char *rom) { unsigned short sig; - int ret = 0; - if (probe_kernel_address((const unsigned short *)x, sig) == 0) - ret = (sig == 0xaa55); - return ret; + + return probe_kernel_address((const unsigned short *)rom, sig) == 0 && + sig == ROMSIGNATURE; } static int __init romchecksum(unsigned char *rom, unsigned long length) { - unsigned char *p, sum = 0; + unsigned char sum; - for (p = rom; p < rom + length; p++) - sum += *p; + for (sum = 0; length; length--) + sum += *rom++; return sum == 0; } --- a/arch/x86/kernel/e820_64-xen.c +++ b/arch/x86/kernel/e820_64-xen.c @@ -88,6 +88,13 @@ return 1; } +#ifdef CONFIG_NUMA + /* NUMA memory to node map */ + if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { + *addrp = nodemap_addr + nodemap_size; + return 1; + } +#endif /* XXX ramdisk image here? */ #else if (last < (table_end<type != E820_RAM || + ei->addr+ei->size <= start || + ei->addr >= end) + continue; + + addr = round_up(ei->addr, PAGE_SIZE); + if (addr < start) + addr = start; + + last = round_down(ei->addr + ei->size, PAGE_SIZE); + if (last >= end) + last = end; + + if (last > addr) + ram += last - addr; + } + return ((end - start) - ram); +} + +/* * Mark e820 reserved areas as busy for the resource manager. */ void __init e820_reserve_resources(struct e820entry *e820, int nr_map) @@ -725,7 +763,7 @@ } early_param("memmap", parse_memmap_opt); -void finish_e820_parsing(void) +void __init finish_e820_parsing(void) { if (userdef) { printk(KERN_INFO "user-defined physical RAM map:\n"); --- a/arch/x86/kernel/entry_32-xen.S +++ b/arch/x86/kernel/entry_32-xen.S @@ -30,7 +30,7 @@ * 18(%esp) - %eax * 1C(%esp) - %ds * 20(%esp) - %es - * 24(%esp) - %gs + * 24(%esp) - %fs * 28(%esp) - orig_eax * 2C(%esp) - %eip * 30(%esp) - %cs @@ -102,9 +102,9 @@ #define SAVE_ALL \ cld; \ - pushl %gs; \ + pushl %fs; \ CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET gs, 0;*/\ + /*CFI_REL_OFFSET fs, 0;*/\ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ /*CFI_REL_OFFSET es, 0;*/\ @@ -136,7 +136,7 @@ movl %edx, %ds; \ movl %edx, %es; \ movl $(__KERNEL_PDA), %edx; \ - movl %edx, %gs + movl %edx, %fs #define RESTORE_INT_REGS \ popl %ebx; \ @@ -169,9 +169,9 @@ 2: popl %es; \ CFI_ADJUST_CFA_OFFSET -4;\ /*CFI_RESTORE es;*/\ -3: popl %gs; \ +3: popl %fs; \ CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE gs;*/\ + /*CFI_RESTORE fs;*/\ .pushsection .fixup,"ax"; \ 4: movl $0,(%esp); \ jmp 1b; \ @@ -230,6 +230,7 @@ CFI_ADJUST_CFA_OFFSET -4 jmp syscall_exit CFI_ENDPROC +END(ret_from_fork) /* * Return to user mode is not as complex as all this looks, @@ -261,6 +262,7 @@ # int/exception return? jne work_pending jmp restore_all +END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) @@ -275,6 +277,7 @@ jz restore_all call preempt_schedule_irq jmp need_resched +END(resume_kernel) #endif CFI_ENDPROC @@ -352,16 +355,17 @@ movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON -1: mov PT_GS(%esp), %gs +1: mov PT_FS(%esp), %fs ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC .pushsection .fixup,"ax" -2: movl $0,PT_GS(%esp) +2: movl $0,PT_FS(%esp) jmp 1b .section __ex_table,"a" .align 4 .long 1b,2b .popsection +ENDPROC(sysenter_entry) # pv sysenter call handler stub ENTRY(sysenter_entry_pv) @@ -533,6 +537,7 @@ jmp hypercall_page + (__HYPERVISOR_iret * 32) #endif CFI_ENDPROC +ENDPROC(system_call) # perform work that needs to be done immediately before resumption ALIGN @@ -578,6 +583,7 @@ xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig +END(work_pending) # perform syscall exit tracing ALIGN @@ -593,6 +599,7 @@ cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit +END(syscall_trace_entry) # perform syscall exit tracing ALIGN @@ -606,6 +613,7 @@ movl $1, %edx call do_syscall_trace jmp resume_userspace +END(syscall_exit_work) CFI_ENDPROC RING0_INT_FRAME # can't unwind into user space anyway @@ -616,16 +624,18 @@ GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace +END(syscall_fault) syscall_badsys: movl $-ENOSYS,PT_EAX(%esp) jmp resume_userspace +END(syscall_badsys) CFI_ENDPROC #ifndef CONFIG_XEN #define FIXUP_ESPFIX_STACK \ /* since we are on a wrong stack, we cant make it a C code :( */ \ - movl %gs:PDA_cpu, %ebx; \ + movl %fs:PDA_cpu, %ebx; \ PER_CPU(cpu_gdt_descr, %ebx); \ movl GDS_address(%ebx), %ebx; \ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ @@ -656,9 +666,9 @@ ENTRY(interrupt) .text -vector=0 ENTRY(irq_entries_start) RING0_INT_FRAME +vector=0 .rept NR_IRQS ALIGN .if vector @@ -667,11 +677,16 @@ 1: pushl $~(vector) CFI_ADJUST_CFA_OFFSET 4 jmp common_interrupt -.data + .previous .long 1b -.text + .text vector=vector+1 .endr +END(irq_entries_start) + +.previous +END(interrupt) +.previous /* * the CPU automatically disables interrupts when executing an IRQ vector, @@ -684,6 +699,7 @@ movl %esp,%eax call do_IRQ jmp ret_from_intr +ENDPROC(common_interrupt) CFI_ENDPROC #define BUILD_INTERRUPT(name, nr) \ @@ -696,10 +712,16 @@ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; \ - CFI_ENDPROC + CFI_ENDPROC; \ +ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" + +/* This alternate entry is needed because we hijack the apic LVTT */ +#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) +BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) +#endif #else #define UNWIND_ESPFIX_STACK #endif @@ -710,7 +732,7 @@ CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: - /* the function address is in %gs's slot on the stack */ + /* the function address is in %fs's slot on the stack */ pushl %es CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET es, 0*/ @@ -739,20 +761,20 @@ CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %gs + pushl %fs CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET gs, 0*/ + /*CFI_REL_OFFSET fs, 0*/ movl $(__KERNEL_PDA), %ecx - movl %ecx, %gs + movl %ecx, %fs UNWIND_ESPFIX_STACK popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ - movl PT_GS(%esp), %edi # get the function address + movl PT_FS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_GS(%esp) - /*CFI_REL_OFFSET gs, ES*/ + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es @@ -839,7 +861,7 @@ .byte 0x18 # pop %eax .byte 0x1c # pop %ds .byte 0x20 # pop %es - .byte 0x24,0x24 # pop %gs + .byte 0x24,0x24 # pop %fs .byte 0x28,0x28,0x28 # add $4,%esp .byte 0x2c # iret .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) @@ -905,6 +927,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(coprocessor_error) ENTRY(simd_coprocessor_error) RING0_INT_FRAME @@ -914,6 +937,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(simd_coprocessor_error) ENTRY(device_not_available) RING0_INT_FRAME @@ -936,6 +960,7 @@ call math_state_restore jmp ret_from_exception CFI_ENDPROC +END(device_not_available) #ifndef CONFIG_XEN /* @@ -1097,10 +1122,12 @@ .align 4 .long 1b,iret_exc .previous +END(native_iret) ENTRY(native_irq_enable_sysexit) sti sysexit +END(native_irq_enable_sysexit) #endif KPROBE_ENTRY(int3) @@ -1123,6 +1150,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(overflow) ENTRY(bounds) RING0_INT_FRAME @@ -1132,6 +1160,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(bounds) ENTRY(invalid_op) RING0_INT_FRAME @@ -1141,6 +1170,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(invalid_op) ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME @@ -1150,6 +1180,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(coprocessor_segment_overrun) ENTRY(invalid_TSS) RING0_EC_FRAME @@ -1157,6 +1188,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(invalid_TSS) ENTRY(segment_not_present) RING0_EC_FRAME @@ -1164,6 +1196,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(segment_not_present) ENTRY(stack_segment) RING0_EC_FRAME @@ -1171,6 +1204,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(stack_segment) KPROBE_ENTRY(general_protection) RING0_EC_FRAME @@ -1186,6 +1220,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(alignment_check) ENTRY(divide_error) RING0_INT_FRAME @@ -1195,6 +1230,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) @@ -1205,6 +1241,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(machine_check) #endif #ifndef CONFIG_XEN @@ -1224,6 +1261,7 @@ CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC +END(spurious_interrupt_bug) ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder --- a/arch/x86/kernel/entry_64-xen.S +++ b/arch/x86/kernel/entry_64-xen.S @@ -629,6 +629,9 @@ ENTRY(call_function_interrupt) apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt END(call_function_interrupt) +ENTRY(irq_move_cleanup_interrupt) + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt +END(irq_move_cleanup_interrupt) #endif ENTRY(apic_timer_interrupt) --- a/arch/x86/kernel/genapic_64-xen.c +++ b/arch/x86/kernel/genapic_64-xen.c @@ -65,8 +65,8 @@ * Some x86_64 machines use physical APIC mode regardless of how many * procs/clusters are present (x86_64 ES7000 is an example). */ - if (acpi_fadt.revision > FADT2_REVISION_ID) - if (acpi_fadt.force_apic_physical_destination_mode) { + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) + if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { genapic = &apic_cluster; goto print; } --- a/arch/x86/kernel/head64-xen.c +++ b/arch/x86/kernel/head64-xen.c @@ -42,8 +42,6 @@ #define OLD_CL_BASE_ADDR 0x90000 #define OLD_CL_OFFSET 0x90022 -extern char saved_command_line[]; - static void __init copy_bootdata(char *real_mode_data) { #ifndef CONFIG_XEN @@ -59,14 +57,14 @@ new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; } command_line = (char *) ((u64)(new_data)); - memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); #else int max_cmdline; if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) max_cmdline = COMMAND_LINE_SIZE; - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); - saved_command_line[max_cmdline-1] = '\0'; + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline); + boot_command_line[max_cmdline-1] = '\0'; #endif } --- a/arch/x86/kernel/head_32-xen.S +++ b/arch/x86/kernel/head_32-xen.S @@ -27,6 +27,7 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id +.section .text.head,"ax",@progbits #define VIRT_ENTRY_OFFSET 0x0 .org VIRT_ENTRY_OFFSET ENTRY(startup_32) @@ -60,11 +61,11 @@ movb $1,X86_HARD_MATH - xorl %eax,%eax # Clear FS - movl %eax,%fs + xorl %eax,%eax # Clear GS + movl %eax,%gs movl $(__KERNEL_PDA),%eax - mov %eax,%gs + mov %eax,%fs cld # gcc2 wants the direction flag cleared at all times @@ -75,7 +76,7 @@ * Point the GDT at this CPU's PDA. This will be * cpu_gdt_table and boot_pda. */ -setup_pda: +ENTRY(setup_pda) /* get the PDA pointer */ movl $boot_pda, %eax --- a/arch/x86/kernel/io_apic_32-xen.c +++ b/arch/x86/kernel/io_apic_32-xen.c @@ -164,7 +164,7 @@ */ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { - volatile struct io_apic *io_apic = io_apic_base(apic); + volatile struct io_apic __iomem *io_apic = io_apic_base(apic); if (sis_apic_bug) writel(reg, &io_apic->index); writel(value, &io_apic->data); @@ -387,7 +387,7 @@ break; entry = irq_2_pin + entry->next; } - set_native_irq_info(irq, cpumask); + irq_desc[irq].affinity = cpumask; spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -526,8 +526,8 @@ package_index = CPU_TO_PACKAGEINDEX(i); for (j = 0; j < NR_IRQS; j++) { unsigned long value_now, delta; - /* Is this an active IRQ? */ - if (!irq_desc[j].action) + /* Is this an active IRQ or balancing disabled ? */ + if (!irq_desc[j].action || irq_balancing_disabled(j)) continue; if ( package_index == i ) IRQ_DELTA(package_index,j) = 0; @@ -780,7 +780,7 @@ return 0; } -int __init irqbalance_disable(char *str) +int __devinit irqbalance_disable(char *str) { irqbalance_disabled = 1; return 1; @@ -1319,11 +1319,9 @@ trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); - else { - irq_desc[irq].status |= IRQ_DELAYED_DISABLE; + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); - } set_intr_gate(vector, interrupt[irq]); } #else @@ -1397,7 +1395,6 @@ } spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, entry); - set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1628,7 +1625,7 @@ v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); @@ -1962,7 +1959,7 @@ #endif #ifndef CONFIG_XEN -static int no_timer_check __initdata; +int no_timer_check __initdata; static int __init notimercheck(char *s) { @@ -2355,7 +2352,7 @@ disable_8259A_irq(0); set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, - "fasteio"); + "fasteoi"); apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ enable_8259A_irq(0); @@ -2646,7 +2643,7 @@ msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - set_native_irq_info(irq, mask); + irq_desc[irq].affinity = mask; } #endif /* CONFIG_SMP */ @@ -2665,25 +2662,32 @@ .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; - int ret; + int irq, ret; + irq = create_irq(); + if (irq < 0) + return irq; + + set_irq_msi(irq, desc); ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) + if (ret < 0) { + destroy_irq(irq); return ret; + } write_msi_msg(irq, &msg); set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - return 0; + return irq; } void arch_teardown_msi_irq(unsigned int irq) { - return; + destroy_irq(irq); } #endif /* CONFIG_PCI_MSI */ @@ -2723,7 +2727,7 @@ dest = cpu_mask_to_apicid(mask); target_ht_irq(irq, dest); - set_native_irq_info(irq, mask); + irq_desc[irq].affinity = mask; } #endif @@ -2931,7 +2935,6 @@ spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(ioapic, pin, entry); - set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; --- a/arch/x86/kernel/io_apic_64-xen.c +++ b/arch/x86/kernel/io_apic_64-xen.c @@ -36,6 +36,7 @@ #include #endif +#include #include #include #include @@ -47,7 +48,20 @@ #include #include -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); +struct irq_cfg { +#ifndef CONFIG_XEN + cpumask_t domain; + cpumask_t old_domain; +#endif + unsigned move_cleanup_count; + u8 vector; + u8 move_in_progress : 1; +}; + +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly; + +static int assign_irq_vector(int irq, cpumask_t mask); #define __apicdebuginit __init @@ -88,7 +102,7 @@ * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ -#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS +#define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) /* @@ -259,21 +273,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg = irq_cfg + irq; unsigned long flags; unsigned int dest; cpumask_t tmp; - int vector; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(mask, tmp, CPU_MASK_ALL); + return; - vector = assign_irq_vector(irq, mask, &tmp); - if (vector < 0) + if (assign_irq_vector(irq, mask)) return; + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); /* @@ -282,8 +294,8 @@ dest = SET_APIC_LOGICAL_ID(dest); spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, vector); - set_native_irq_info(irq, mask); + __target_IO_APIC_irq(irq, dest, cfg->vector); + irq_desc[irq].affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } #endif @@ -329,11 +341,11 @@ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ + FINAL; \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ } \ - FINAL; \ } #define DO_ACTION(name,R,ACTION, FINAL) \ @@ -666,74 +678,58 @@ return irq; } -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic,pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; - -static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) +static int __assign_irq_vector(int irq, cpumask_t mask) { - int vector; struct physdev_irq irq_op; + struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); + BUG_ON((unsigned)irq >= NR_IRQS); + cfg = &irq_cfg[irq]; - cpus_and(*result, mask, cpu_online_map); + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; - if (irq_vector[irq] > 0) - return irq_vector[irq]; + if (cfg->vector) + return 0; irq_op.irq = irq; if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) return -ENOSPC; - vector = irq_op.vector; - irq_vector[irq] = vector; + cfg->vector = irq_op.vector; - return vector; + return 0; } -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) +static int assign_irq_vector(int irq, cpumask_t mask) { - int vector; + int err; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq, mask, result); + err = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); - return vector; + return err; } #ifndef CONFIG_XEN static void __clear_irq_vector(int irq) { + struct irq_cfg *cfg; cpumask_t mask; int cpu, vector; - BUG_ON(!irq_vector[irq]); + BUG_ON((unsigned)irq >= NR_IRQS); + cfg = &irq_cfg[irq]; + BUG_ON(!cfg->vector); - vector = irq_vector[irq]; - cpus_and(mask, irq_domain[irq], cpu_online_map); + vector = cfg->vector; + cpus_and(mask, cfg->domain, cpu_online_map); for_each_cpu_mask(cpu, mask) per_cpu(vector_irq, cpu)[vector] = -1; - irq_vector[irq] = 0; - irq_domain[irq] = CPU_MASK_NONE; + cfg->vector = 0; + cfg->domain = CPU_MASK_NONE; } void __setup_vector_irq(int cpu) @@ -743,10 +739,10 @@ int irq, vector; /* Mark the inuse vectors */ - for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) { - if (!cpu_isset(cpu, irq_domain[irq])) + for (irq = 0; irq < NR_IRQS; ++irq) { + if (!cpu_isset(cpu, irq_cfg[irq].domain)) continue; - vector = irq_vector[irq]; + vector = irq_cfg[irq].vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -754,41 +750,49 @@ irq = per_cpu(vector_irq, cpu)[vector]; if (irq < 0) continue; - if (!cpu_isset(cpu, irq_domain[irq])) + if (!cpu_isset(cpu, irq_cfg[irq].domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } -extern void (*interrupt[NR_IRQS])(void); - static struct irq_chip ioapic_chip; -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) +static void ioapic_register_intr(int irq, unsigned long trigger) { - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) + if (trigger) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); - else { - irq_desc[irq].status |= IRQ_DELAYED_DISABLE; + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); - } } #else -#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) +#define ioapic_register_intr(irq,trigger) ((void)0) #endif /* !CONFIG_XEN */ -static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, + int trigger, int polarity) { + struct irq_cfg *cfg = irq_cfg + irq; struct IO_APIC_route_entry entry; - int vector; - unsigned long flags; + cpumask_t mask; + if (!IO_APIC_IRQ(irq)) + return; + + mask = TARGET_CPUS; + if (assign_irq_vector(irq, mask)) + return; + +#ifndef CONFIG_XEN + cpus_and(mask, cfg->domain, mask); +#endif + + apic_printk(APIC_VERBOSE,KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", + apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, + irq, trigger, polarity); /* * add it to the IO-APIC irq-routing table: @@ -797,41 +801,23 @@ entry.delivery_mode = INT_DELIVERY_MODE; entry.dest_mode = INT_DEST_MODE; + entry.dest = cpu_mask_to_apicid(mask); entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.trigger = trigger; + entry.polarity = polarity; + entry.vector = cfg->vector; - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) entry.mask = 1; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - } - if (/* !apic && */ !IO_APIC_IRQ(irq)) - return; - - if (IO_APIC_IRQ(irq)) { - cpumask_t mask; - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); - if (vector < 0) - return; - - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.vector = vector; - - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } + ioapic_register_intr(irq, trigger); + if (irq < 16) + disable_8259A_irq(irq); ioapic_write_entry(apic, pin, entry); - - spin_lock_irqsave(&ioapic_lock, flags); - set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); - } static void __init setup_IO_APIC_irqs(void) @@ -856,8 +842,8 @@ irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); - setup_IO_APIC_irq(apic, pin, idx, irq); - + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); } } @@ -888,7 +874,7 @@ */ entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* unmask IRQ now */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; @@ -988,18 +974,17 @@ printk(KERN_DEBUG ".... IRQ redirection table:\n"); - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" + " Stat Dmod Deli Vect: \n"); for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; entry = ioapic_read_entry(apic, i); - printk(KERN_DEBUG " %02x %03X %02X ", + printk(KERN_DEBUG " %02x %03X ", i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest + entry.dest ); printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", @@ -1263,8 +1248,7 @@ entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(apic_read(APIC_ID)); + entry.dest = GET_APIC_ID(apic_read(APIC_ID)); /* * Add it to the IO-APIC irq-routing table: @@ -1349,16 +1333,15 @@ static int ioapic_retrigger_irq(unsigned int irq) { + struct irq_cfg *cfg = &irq_cfg[irq]; cpumask_t mask; - unsigned vector; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - vector = irq_vector[irq]; cpus_clear(mask); - cpu_set(first_cpu(irq_domain[irq]), mask); + cpu_set(first_cpu(cfg->domain), mask); - send_IPI_mask(mask, vector); + send_IPI_mask(mask, cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -1373,8 +1356,68 @@ * races. */ +#ifdef CONFIG_SMP +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + if (irq >= NR_IRQS) + continue; + + desc = irq_desc + irq; + cfg = irq_cfg + irq; + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_rax; + me = smp_processor_id(); + if ((vector == cfg->vector) && + cpu_isset(smp_processor_id(), cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } +} +#else +static inline void irq_complete_move(unsigned int irq) {} +#endif + static void ack_apic_edge(unsigned int irq) { + irq_complete_move(irq); move_native_irq(irq); ack_APIC_irq(); } @@ -1383,6 +1426,7 @@ { int do_unmask_irq = 0; + irq_complete_move(irq); #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) /* If we are moving the irq we need to mask it */ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { @@ -1434,7 +1478,7 @@ */ for (irq = 0; irq < NR_IRQS ; irq++) { int tmp = irq; - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { + if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -1532,7 +1576,7 @@ entry1.dest_mode = 0; /* physical delivery */ entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); + entry1.dest = hard_smp_processor_id(); entry1.delivery_mode = dest_ExtINT; entry1.polarity = entry0.polarity; entry1.trigger = 0; @@ -1576,15 +1620,14 @@ */ static inline void check_timer(void) { + struct irq_cfg *cfg = irq_cfg + 0; int apic1, pin1, apic2, pin2; - int vector; - cpumask_t mask; /* * get/set the timer IRQ vector: */ disable_8259A_irq(0); - vector = assign_irq_vector(0, TARGET_CPUS, &mask); + assign_irq_vector(0, TARGET_CPUS); /* * Subtle, code in do_timer_interrupt() expects an AEOI @@ -1604,7 +1647,7 @@ apic2 = ioapic_i8259.apic; apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + cfg->vector, apic1, pin1, apic2, pin2); if (pin1 != -1) { /* @@ -1635,7 +1678,7 @@ /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); + setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); nmi_watchdog_default(); @@ -1660,14 +1703,14 @@ disable_8259A_irq(0); irq_desc[0].chip = &lapic_irq_type; - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); return; } - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_VERBOSE," failed.\n"); apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); @@ -1821,19 +1864,16 @@ /* Allocate an unused irq */ int irq; int new; - int vector = 0; unsigned long flags; - cpumask_t mask; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); for (new = (NR_IRQS - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_vector[new] != 0) + if (irq_cfg[new].vector != 0) continue; - vector = __assign_irq_vector(new, TARGET_CPUS, &mask); - if (likely(vector > 0)) + if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; } @@ -1863,12 +1903,15 @@ #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { - int vector; + struct irq_cfg *cfg = irq_cfg + irq; + int err; unsigned dest; cpumask_t tmp; - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); - if (vector >= 0) { + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (!err) { + cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); msg->address_hi = MSI_ADDR_BASE_HI; @@ -1888,40 +1931,38 @@ ((INT_DELIVERY_MODE != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); + MSI_DATA_VECTOR(cfg->vector); } - return vector; + return err; } #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - int vector; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(mask, tmp, CPU_MASK_ALL); + return; - vector = assign_irq_vector(irq, mask, &tmp); - if (vector < 0) + if (assign_irq_vector(irq, mask)) return; + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); read_msi_msg(irq, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(vector); + msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - set_native_irq_info(irq, mask); + irq_desc[irq].affinity = mask; } #endif /* CONFIG_SMP */ @@ -1940,24 +1981,31 @@ .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; - int ret; + int irq, ret; + irq = create_irq(); + if (irq < 0) + return irq; + + set_irq_msi(irq, desc); ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) + if (ret < 0) { + destroy_irq(irq); return ret; + } write_msi_msg(irq, &msg); set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - return 0; + return irq; } void arch_teardown_msi_irq(unsigned int irq) { - return; + destroy_irq(irq); } #endif /* CONFIG_PCI_MSI */ @@ -1985,24 +2033,22 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg = irq_cfg + irq; unsigned int dest; cpumask_t tmp; - int vector; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(mask, tmp, CPU_MASK_ALL); + return; - vector = assign_irq_vector(irq, mask, &tmp); - if (vector < 0) + if (assign_irq_vector(irq, mask)) return; + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - target_ht_irq(irq, dest, vector); - set_native_irq_info(irq, mask); + target_ht_irq(irq, dest, cfg->vector); + irq_desc[irq].affinity = mask; } #endif @@ -2019,14 +2065,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { - int vector; + struct irq_cfg *cfg = irq_cfg + irq; + int err; cpumask_t tmp; - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); - if (vector >= 0) { + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (!err) { struct ht_irq_msg msg; unsigned dest; + cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -2034,7 +2083,7 @@ msg.address_lo = HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(vector) | + HT_IRQ_LOW_VECTOR(cfg->vector) | ((INT_DEST_MODE == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | @@ -2049,7 +2098,7 @@ set_irq_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); } - return vector; + return err; } #endif /* CONFIG_HT_IRQ */ @@ -2074,13 +2123,8 @@ } -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { - struct IO_APIC_route_entry entry; - unsigned long flags; - int vector; - cpumask_t mask; - if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); @@ -2093,42 +2137,7 @@ if (irq >= 16) add_pin_to_irq(irq, ioapic, pin); - - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); - if (vector < 0) - return vector; - - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.trigger = edge_level; - entry.polarity = active_high_low; - entry.mask = 1; /* Disabled (masked) */ - entry.vector = vector & 0xff; - - apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, - edge_level, active_high_low); - - ioapic_register_intr(irq, entry.vector, edge_level); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - ioapic_write_entry(ioapic, pin, entry); - - spin_lock_irqsave(&ioapic_lock, flags); - set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); return 0; } @@ -2161,8 +2170,10 @@ * when you have too many devices, because at that time only boot * cpu is online. */ - if(!irq_vector[irq]) - setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); + if (!irq_cfg[irq].vector) + setup_IO_APIC_irq(ioapic, pin, irq, + irq_trigger(irq_entry), + irq_polarity(irq_entry)); else set_ioapic_affinity_irq(irq, TARGET_CPUS); } --- a/arch/x86/kernel/irq_32-xen.c +++ b/arch/x86/kernel/irq_32-xen.c @@ -10,7 +10,6 @@ * io_apic.c.) */ -#include #include #include #include @@ -19,19 +18,34 @@ #include #include +#include +#include + DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); -#ifndef CONFIG_X86_LOCAL_APIC /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. */ void ack_bad_irq(unsigned int irq) { - printk("unexpected IRQ trap at vector %02x\n", irq); -} + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN) + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But only ack when the APIC is enabled -AK + */ + if (cpu_has_apic) + ack_APIC_irq(); #endif +} #ifdef CONFIG_4KSTACKS /* --- a/arch/x86/kernel/irq_64-xen.c +++ b/arch/x86/kernel/irq_64-xen.c @@ -18,6 +18,7 @@ #include #include #include +#include atomic_t irq_err_count; @@ -120,9 +121,15 @@ if (likely(irq < NR_IRQS)) generic_handle_irq(irq); - else if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", - __func__, smp_processor_id(), irq); + else { +#ifndef CONFIG_XEN + if (!disable_apic) + ack_APIC_irq(); +#endif + if (printk_ratelimit()) + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", + __func__, smp_processor_id(), irq); + } irq_exit(); --- a/arch/x86/kernel/microcode-xen.c +++ b/arch/x86/kernel/microcode-xen.c @@ -108,7 +108,7 @@ return ret; } -static struct file_operations microcode_fops = { +static const struct file_operations microcode_fops = { .owner = THIS_MODULE, .write = microcode_write, .open = microcode_open, --- a/arch/x86/kernel/mpparse_32-xen.c +++ b/arch/x86/kernel/mpparse_32-xen.c @@ -1079,7 +1079,7 @@ static int gsi_to_irq[MAX_GSI_NUM]; /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_fadt.sci_int == gsi) + if (acpi_gbl_FADT.sci_interrupt == gsi) return gsi; ioapic = mp_find_ioapic(gsi); @@ -1136,7 +1136,7 @@ /* * Don't assign IRQ used by ACPI SCI */ - if (gsi == acpi_fadt.sci_int) + if (gsi == acpi_gbl_FADT.sci_interrupt) gsi = pci_irq++; gsi_to_irq[irq] = gsi; } else { --- a/arch/x86/kernel/mpparse_64-xen.c +++ b/arch/x86/kernel/mpparse_64-xen.c @@ -60,9 +60,9 @@ /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; /* Internal processor count */ -unsigned int num_processors __initdata = 0; +unsigned int num_processors __cpuinitdata = 0; -unsigned disabled_cpus __initdata; +unsigned disabled_cpus __cpuinitdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; @@ -808,7 +808,7 @@ return gsi; /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_fadt.sci_int == gsi) + if (acpi_gbl_FADT.sci_interrupt == gsi) return gsi; ioapic = mp_find_ioapic(gsi); --- a/arch/x86/kernel/pci-dma_32-xen.c +++ b/arch/x86/kernel/pci-dma_32-xen.c @@ -317,7 +317,7 @@ return DMA_MEMORY_IO; free1_out: - kfree(dev->dma_mem->bitmap); + kfree(dev->dma_mem); out: if (mem_base) iounmap(mem_base); --- a/arch/x86/kernel/pci-swiotlb_64-xen.c +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c @@ -35,7 +35,7 @@ #endif }; -void pci_swiotlb_init(void) +void __init pci_swiotlb_init(void) { #if 0 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c @@ -7,6 +7,11 @@ struct platform_device *pd; int ret; +#ifdef CONFIG_XEN + if (!is_initial_xendomain()) + return 0; +#endif + pd = platform_device_alloc("pcspkr", -1); if (!pd) return -ENOMEM; --- a/arch/x86/kernel/process_32-xen.c +++ b/arch/x86/kernel/process_32-xen.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -160,6 +161,7 @@ /* endless idle loop with no priority at all */ while (1) { + tick_nohz_stop_sched_tick(); while (!need_resched()) { void (*idle)(void); @@ -175,6 +177,7 @@ __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -247,8 +250,8 @@ regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x GS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); + printk(" DS: %04x ES: %04x FS: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); cr0 = read_cr0(); cr2 = read_cr2(); @@ -279,7 +282,7 @@ regs.xds = __USER_DS; regs.xes = __USER_DS; - regs.xgs = __KERNEL_PDA; + regs.xfs = __KERNEL_PDA; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -356,7 +359,7 @@ p->thread.eip = (unsigned long) ret_from_fork; - savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -434,8 +437,8 @@ dump->regs.eax = regs->eax; dump->regs.ds = regs->xds; dump->regs.es = regs->xes; - savesegment(fs,dump->regs.fs); - dump->regs.gs = regs->xgs; + dump->regs.fs = regs->xfs; + savesegment(gs,dump->regs.gs); dump->regs.orig_eax = regs->orig_eax; dump->regs.eip = regs->eip; dump->regs.cs = regs->xcs; @@ -616,16 +619,6 @@ prefetch(&next->i387.fxsave); /* - * Restore %fs if needed. - * - * Glibc normally makes %fs be zero. - */ - if (unlikely(next->fs)) - loadsegment(fs, next->fs); - - write_pda(pcurrent, next_p); - - /* * Now maybe handle debug registers */ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) @@ -633,6 +626,15 @@ disable_tsc(prev_p, next_p); + /* + * Leave lazy mode, flushing any hypercalls made here. + * This must be done before restoring TLS segments so + * the GDT and LDT are properly updated, and must be + * done before math_state_restore, so the TS bit is up + * to date. + */ + arch_leave_lazy_cpu_mode(); + /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now @@ -640,6 +642,14 @@ if (next_p->fpu_counter > 5) math_state_restore(); + /* + * Restore %gs if needed (which is common) + */ + if (prev->gs | next->gs) + loadsegment(gs, next->gs); + + write_pda(pcurrent, next_p); + return prev_p; } --- a/arch/x86/kernel/process_64-xen.c +++ b/arch/x86/kernel/process_64-xen.c @@ -338,14 +338,17 @@ void flush_thread(void) { struct task_struct *tsk = current; - struct thread_info *t = current_thread_info(); - if (t->flags & _TIF_ABI_PENDING) { - t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); - if (t->flags & _TIF_IA32) + if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { + clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); + if (test_tsk_thread_flag(tsk, TIF_IA32)) { + clear_tsk_thread_flag(tsk, TIF_IA32); + } else { + set_tsk_thread_flag(tsk, TIF_IA32); current_thread_info()->status |= TS_COMPAT; + } } - t->flags &= ~_TIF_DEBUG; + clear_tsk_thread_flag(tsk, TIF_DEBUG); tsk->thread.debugreg0 = 0; tsk->thread.debugreg1 = 0; --- a/arch/x86/kernel/setup_32-xen.c +++ b/arch/x86/kernel/setup_32-xen.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -151,7 +150,7 @@ #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 -static char command_line[COMMAND_LINE_SIZE]; +static char __initdata command_line[COMMAND_LINE_SIZE]; unsigned char __initdata boot_params[PARAM_SIZE]; @@ -650,8 +649,8 @@ if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) i = COMMAND_LINE_SIZE; - memcpy(saved_command_line, xen_start_info->cmd_line, i); - saved_command_line[i - 1] = '\0'; + memcpy(boot_command_line, xen_start_info->cmd_line, i); + boot_command_line[i - 1] = '\0'; parse_early_param(); if (user_defined_memmap) { @@ -659,11 +658,19 @@ print_memory_map("user"); } - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; max_low_pfn = setup_memory(); +#ifdef CONFIG_VMI + /* + * Must be after max_low_pfn is determined, and before kernel + * pagetables are setup. + */ + vmi_init(); +#endif + /* * NOTE: before this point _nobody_ is allowed to allocate * any memory using the bootmem allocator. Although the @@ -826,7 +833,6 @@ conswitchp = &dummy_con; #endif } - tsc_init(); } static int @@ -836,31 +842,3 @@ /* we're never actually going to get here... */ return NOTIFY_DONE; } - -static __init int add_pcspkr(void) -{ - struct platform_device *pd; - int ret; - - if (!is_initial_xendomain()) - return 0; - - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; - - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; -} -device_initcall(add_pcspkr); - -/* - * Local Variables: - * mode:c - * c-file-style:"k&r" - * c-basic-offset:8 - * End: - */ --- a/arch/x86/kernel/setup_64-xen.c +++ b/arch/x86/kernel/setup_64-xen.c @@ -144,7 +144,7 @@ extern int root_mountflags; -char command_line[COMMAND_LINE_SIZE]; +char __initdata command_line[COMMAND_LINE_SIZE]; struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, @@ -182,134 +182,6 @@ .flags = IORESOURCE_RAM, }; -#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM) - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_ROM, -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_ROM, -}; - -static struct resource adapter_rom_resources[] = { - { .name = "Adapter ROM", .start = 0xc8000, .end = 0, - .flags = IORESOURCE_ROM }, - { .name = "Adapter ROM", .start = 0, .end = 0, - .flags = IORESOURCE_ROM }, - { .name = "Adapter ROM", .start = 0, .end = 0, - .flags = IORESOURCE_ROM }, - { .name = "Adapter ROM", .start = 0, .end = 0, - .flags = IORESOURCE_ROM }, - { .name = "Adapter ROM", .start = 0, .end = 0, - .flags = IORESOURCE_ROM }, - { .name = "Adapter ROM", .start = 0, .end = 0, - .flags = IORESOURCE_ROM } -}; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_ROM, -}; - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_RAM, -}; - -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) - -static int __init romchecksum(unsigned char *rom, unsigned long length) -{ - unsigned char *p, sum = 0; - - for (p = rom; p < rom + length; p++) - sum += *p; - return sum == 0; -} - -static void __init probe_roms(void) -{ - unsigned long start, length, upper; - unsigned char *rom; - int i; - -#ifdef CONFIG_XEN - /* Nothing to do if not running in dom0. */ - if (!is_initial_xendomain()) - return; -#endif - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; - start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. This option will be passed @@ -406,7 +278,7 @@ #ifdef CONFIG_XEN extern struct e820map machine_e820; - printk(KERN_INFO "Command line: %s\n", saved_command_line); + printk(KERN_INFO "Command line: %s\n", boot_command_line); /* Register a call for panic conditions. */ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); @@ -433,7 +305,7 @@ ARCH_SETUP #else - printk(KERN_INFO "Command line: %s\n", saved_command_line); + printk(KERN_INFO "Command line: %s\n", boot_command_line); ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); screen_info = SCREEN_INFO; @@ -464,7 +336,7 @@ early_identify_cpu(&boot_cpu_data); - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; parse_early_param(); @@ -534,6 +406,11 @@ /* reserve ebda region */ if (ebda_addr) reserve_bootmem_generic(ebda_addr, ebda_size); +#ifdef CONFIG_NUMA + /* reserve nodemap region */ + if (nodemap_addr) + reserve_bootmem_generic(nodemap_addr, nodemap_size); +#endif #ifdef CONFIG_SMP /* @@ -734,10 +611,8 @@ #endif /* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. + * We trust e820 completely. No explicit ROM probing in memory. */ - probe_roms(); #ifdef CONFIG_XEN if (is_initial_xendomain()) { struct xen_memory_map memmap; @@ -756,8 +631,6 @@ e820_mark_nosave_regions(); #endif - request_resource(&iomem_resource, &video_ram_resource); - { unsigned i; /* request I/O space for devices used on all i[345]86 PCs */ @@ -1334,7 +1207,8 @@ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, @@ -1352,7 +1226,7 @@ /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -1362,8 +1236,10 @@ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", + "altmovcr8", "abm", "sse4a", + "misalignsse", "3dnowprefetch", + "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; @@ -1374,6 +1250,9 @@ "ttp", /* thermal trip */ "tm", "stc", + "100mhzsteps", + "hwpstate", + NULL, /* tsc invariant mapped to constant_tsc */ NULL, /* nothing */ /* constant_tsc - moved to flags */ }; @@ -1490,26 +1369,3 @@ .stop = c_stop, .show = show_cpuinfo, }; - -#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) -#include -static __init int add_pcspkr(void) -{ - struct platform_device *pd; - int ret; - - if (!is_initial_xendomain()) - return 0; - - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; - - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; -} -device_initcall(add_pcspkr); -#endif --- a/arch/x86/kernel/smp_32-xen.c +++ b/arch/x86/kernel/smp_32-xen.c @@ -335,8 +335,7 @@ /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. - * Temporarily this turns IRQs off, so that lockups are - * detected by the NMI watchdog. + * AK: x86-64 has a faster method that could be ported. */ spin_lock(&tlbstate_lock); @@ -361,7 +360,7 @@ while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; --- a/arch/x86/kernel/time_32-xen.c +++ b/arch/x86/kernel/time_32-xen.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -75,25 +76,17 @@ #include #include -#if defined (__i386__) -#include +#ifdef CONFIG_X86_32 #include DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -#endif - -#define XEN_SHIFT 22 - int pit_latch_buggy; /* extern */ - -#if defined(__x86_64__) -unsigned long vxtime_hz = PIT_TICK_RATE; -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ +#else volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -struct timespec __xtime __section_xtime; -struct timezone __sys_tz __section_sys_tz; #endif +#define XEN_SHIFT 22 + unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); @@ -113,9 +106,6 @@ static struct timespec shadow_tv; static u32 shadow_tv_version; -static struct timeval monotonic_tv; -static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED; - /* Keep track of last time we did processing/updating of jiffies and xtime. */ static u64 processed_system_time; /* System time (ns) at last processing. */ static DEFINE_PER_CPU(u64, processed_system_time); @@ -228,7 +218,7 @@ } #endif -void init_cpu_khz(void) +static void init_cpu_khz(void) { u64 __cpu_khz = 1000000ULL << 32; struct vcpu_time_info *info = &vcpu_info(0)->time; @@ -247,16 +237,6 @@ return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); } -#ifdef CONFIG_X86_64 -static unsigned long get_usec_offset(struct shadow_time_info *shadow) -{ - u64 now, delta; - rdtscll(now); - delta = now - shadow->tsc_timestamp; - return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift); -} -#endif - static void __update_wallclock(time_t sec, long nsec) { long wtm_nsec, xtime_nsec; @@ -364,138 +344,6 @@ } EXPORT_SYMBOL(rtc_cmos_write); -#ifdef CONFIG_X86_64 - -/* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq; - unsigned long usec, sec; - unsigned long flags; - s64 nsec; - unsigned int cpu; - struct shadow_time_info *shadow; - u32 local_time_version; - - cpu = get_cpu(); - shadow = &per_cpu(shadow_time, cpu); - - do { - local_time_version = shadow->version; - seq = read_seqbegin(&xtime_lock); - - usec = get_usec_offset(shadow); - - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / NSEC_PER_USEC); - - nsec = shadow->system_timestamp - processed_system_time; - __normalize_time(&sec, &nsec); - usec += (long)nsec / NSEC_PER_USEC; - - if (unlikely(!time_values_up_to_date(cpu))) { - /* - * We may have blocked for a long time, - * rendering our calculations invalid - * (e.g. the time delta may have - * overflowed). Detect that and recalculate - * with fresh values. - */ - get_time_values_from_xen(cpu); - continue; - } - } while (read_seqretry(&xtime_lock, seq) || - (local_time_version != shadow->version)); - - put_cpu(); - - while (usec >= USEC_PER_SEC) { - usec -= USEC_PER_SEC; - sec++; - } - - spin_lock_irqsave(&monotonic_lock, flags); - if ((sec > monotonic_tv.tv_sec) || - ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec))) - { - monotonic_tv.tv_sec = sec; - monotonic_tv.tv_usec = usec; - } else { - sec = monotonic_tv.tv_sec; - usec = monotonic_tv.tv_usec; - } - spin_unlock_irqrestore(&monotonic_lock, flags); - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t sec; - s64 nsec; - unsigned int cpu; - struct shadow_time_info *shadow; - struct xen_platform_op op; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - cpu = get_cpu(); - shadow = &per_cpu(shadow_time, cpu); - - write_seqlock_irq(&xtime_lock); - - /* - * Ensure we don't get blocked for a long time so that our time delta - * overflows. If that were to happen then our shadow time values would - * be stale, so we can retry with fresh ones. - */ - for (;;) { - nsec = tv->tv_nsec - get_nsec_offset(shadow); - if (time_values_up_to_date(cpu)) - break; - get_time_values_from_xen(cpu); - } - sec = tv->tv_sec; - __normalize_time(&sec, &nsec); - - if (is_initial_xendomain() && !independent_wallclock) { - op.cmd = XENPF_settime; - op.u.settime.secs = sec; - op.u.settime.nsecs = nsec; - op.u.settime.system_time = shadow->system_timestamp; - WARN_ON(HYPERVISOR_platform_op(&op)); - update_wallclock(); - } else if (independent_wallclock) { - nsec -= shadow->system_timestamp; - __normalize_time(&sec, &nsec); - __update_wallclock(sec, nsec); - } - - /* Reset monotonic gettimeofday() timeval. */ - spin_lock(&monotonic_lock); - monotonic_tv.tv_sec = 0; - monotonic_tv.tv_usec = 0; - spin_unlock(&monotonic_lock); - - write_sequnlock_irq(&xtime_lock); - - put_cpu(); - - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - -#endif - static void sync_xen_wallclock(unsigned long dummy); static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); static void sync_xen_wallclock(unsigned long dummy) @@ -544,15 +392,7 @@ return retval; } -#ifdef CONFIG_X86_64 -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -#else unsigned long long sched_clock(void) -#endif { unsigned int cpu = get_cpu(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); @@ -572,21 +412,18 @@ return time; } -#ifdef CONFIG_X86_64 -EXPORT_SYMBOL(monotonic_clock); - -unsigned long long sched_clock(void) -{ - return monotonic_clock(); -} -#endif unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); #if defined(CONFIG_SMP) || defined(__x86_64__) - if (!user_mode_vm(regs) && in_lock_functions(pc)) { +# ifdef __i386__ + if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) +# else + if (!user_mode(regs) +# endif + && in_lock_functions(pc)) { # ifdef CONFIG_FRAME_POINTER # ifdef __i386__ return ((unsigned long *)regs->ebp)[1]; @@ -595,14 +432,11 @@ # endif # else # ifdef __i386__ - unsigned long *sp; - if ((regs->xcs & 2) == 0) - sp = (unsigned long *)®s->esp; - else - sp = (unsigned long *)regs->esp; + unsigned long *sp = (unsigned long *)®s->esp; # else unsigned long *sp = (unsigned long *)regs->rsp; # endif + /* Return address is either directly at stack pointer or above a saved eflags. Eflags has bits 22-31 zero, kernel addresses don't. */ @@ -755,19 +589,6 @@ return IRQ_HANDLED; } -#ifndef CONFIG_X86_64 - -void tsc_init(void) -{ - init_cpu_khz(); - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - use_tsc_delay(); -} - -#include - void mark_tsc_unstable(void) { #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ @@ -821,21 +642,9 @@ .mask = CLOCKSOURCE_MASK(64), .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ .shift = XEN_SHIFT, - .is_continuous = 1, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static int __init init_xen_clocksource(void) -{ - clocksource_xen.mult = clocksource_khz2mult(cpu_khz, - clocksource_xen.shift); - - return clocksource_register(&clocksource_xen); -} - -module_init(init_xen_clocksource); - -#endif - static void init_missing_ticks_accounting(unsigned int cpu) { struct vcpu_register_runstate_memory_area area; @@ -856,7 +665,7 @@ } /* not static: needed by APM */ -unsigned long get_cmos_time(void) +unsigned long read_persistent_clock(void) { unsigned long retval; unsigned long flags; @@ -869,11 +678,11 @@ return retval; } -EXPORT_SYMBOL(get_cmos_time); static void sync_cmos_clock(unsigned long dummy); static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); +int no_sync_cmos_clock; static void sync_cmos_clock(unsigned long dummy) { @@ -917,7 +726,8 @@ void notify_arch_cmos_timer(void) { - mod_timer(&sync_cmos_timer, jiffies + 1); + if (!no_sync_cmos_clock) + mod_timer(&sync_cmos_timer, jiffies + 1); mod_timer(&sync_xen_wallclock_timer, jiffies + 1); } @@ -950,29 +760,11 @@ device_initcall(time_init_device); -#ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); -/* Duplicate of time_init() below, with hpet_enable part added */ -static void __init hpet_time_init(void) -{ - struct timespec ts; - ts.tv_sec = get_cmos_time(); - ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - - do_settimeofday(&ts); - - if ((hpet_enable() >= 0) && hpet_use_timer) { - printk("Using HPET for base-timer\n"); - } - - do_time_init(); -} -#endif /* Dynamically-mapped IRQ. */ DEFINE_PER_CPU(int, timer_irq); -extern void (*late_time_init)(void); static void setup_cpu0_timer_irq(void) { per_cpu(timer_irq, 0) = @@ -992,16 +784,9 @@ void __init time_init(void) { -#ifdef CONFIG_HPET_TIMER - if (is_hpet_capable()) { - /* - * HPET initialization needs to do memory-mapped io. So, let - * us do a late initialization after mem_init(). - */ - late_time_init = hpet_time_init; - return; - } -#endif + init_cpu_khz(); + printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, &xen_set_periodic_tick)) { @@ -1020,18 +805,12 @@ per_cpu(processed_system_time, 0) = processed_system_time; init_missing_ticks_accounting(0); - update_wallclock(); + clocksource_register(&clocksource_xen); -#ifdef CONFIG_X86_64 - init_cpu_khz(); - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); + update_wallclock(); - vxtime.mode = VXTIME_TSC; - vxtime.quot = (1000000L << 32) / vxtime_hz; - vxtime.tsc_quot = (1000L << 32) / cpu_khz; - sync_core(); - rdtscll(vxtime.last_tsc); +#ifndef CONFIG_X86_64 + use_tsc_delay(); #endif /* Cannot request_irq() until kmem is initialised. */ @@ -1277,7 +1056,7 @@ }; static int __init xen_sysctl_init(void) { - (void)register_sysctl_table(xen_table, 0); + (void)register_sysctl_table(xen_table); return 0; } __initcall(xen_sysctl_init); --- a/arch/x86/kernel/traps_32-xen.c +++ b/arch/x86/kernel/traps_32-xen.c @@ -100,6 +100,7 @@ asmlinkage void machine_check(void); int kstack_depth_to_print = 24; +static unsigned int code_bytes = 64; ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) @@ -297,10 +298,11 @@ int i; int in_kernel = 1; unsigned long esp; - unsigned short ss; + unsigned short ss, gs; esp = (unsigned long) (®s->esp); savesegment(ss, ss); + savesegment(gs, gs); if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; @@ -319,8 +321,8 @@ regs->eax, regs->ebx, regs->ecx, regs->edx); printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, current->pid, current_thread_info(), current, current->thread_info); @@ -330,7 +332,8 @@ */ if (in_kernel) { u8 *eip; - int code_bytes = 64; + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); @@ -338,14 +341,14 @@ printk(KERN_EMERG "Code: "); - eip = (u8 *)regs->eip - 43; + eip = (u8 *)regs->eip - code_prologue; if (eip < (u8 *)PAGE_OFFSET || probe_kernel_address(eip, c)) { /* try starting at EIP */ eip = (u8 *)regs->eip; - code_bytes = 32; + code_len = code_len - code_prologue + 1; } - for (i = 0; i < code_bytes; i++, eip++) { + for (i = 0; i < code_len; i++, eip++) { if (eip < (u8 *)PAGE_OFFSET || probe_kernel_address(eip, c)) { printk(" Bad EIP value."); @@ -1134,3 +1137,13 @@ return 1; } __setup("kstack=", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); --- a/arch/x86/kernel/vsyscall_64-xen.c +++ b/arch/x86/kernel/vsyscall_64-xen.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -44,56 +46,41 @@ #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) #define __syscall_clobber "r11","rcx","memory" -int __sysctl_vsyscall __section_sysctl_vsyscall = 1; -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +struct vsyscall_gtod_data_t { + seqlock_t lock; + int sysctl_enabled; + struct timeval wall_time_tv; + struct timezone sys_tz; + cycle_t offset_base; + struct clocksource clock; +}; int __vgetcpu_mode __section_vgetcpu_mode; -#include - -static __always_inline void timeval_normalize(struct timeval * tv) +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = { - time_t __sec; - - __sec = tv->tv_usec / 1000000; - if (__sec) { - tv->tv_usec %= 1000000; - tv->tv_sec += __sec; - } -} + .lock = SEQLOCK_UNLOCKED, + .sysctl_enabled = 1, +}; -static __always_inline void do_vgettimeofday(struct timeval * tv) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { - long sequence, t; - unsigned long sec, usec; + unsigned long flags; - do { - sequence = read_seqbegin(&__xtime_lock); - - sec = __xtime.tv_sec; - usec = __xtime.tv_nsec / 1000; - - if (__vxtime.mode != VXTIME_HPET) { - t = get_cycles_sync(); - if (t < __vxtime.last_tsc) - t = __vxtime.last_tsc; - usec += ((t - __vxtime.last_tsc) * - __vxtime.tsc_quot) >> 32; - /* See comment in x86_64 do_gettimeofday. */ - } else { - usec += ((readl((void __iomem *) - fix_to_virt(VSYSCALL_HPET) + 0xf0) - - __vxtime.last) * __vxtime.quot) >> 32; - } - } while (read_seqretry(&__xtime_lock, sequence)); - - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* copy vsyscall data */ + vsyscall_gtod_data.clock = *clock; + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } -/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ +/* RED-PEN may want to readd seq locking, but then the variable should be + * write-once. + */ static __always_inline void do_get_tz(struct timezone * tz) { - *tz = __sys_tz; + *tz = __vsyscall_gtod_data.sys_tz; } static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) @@ -101,7 +88,8 @@ int ret; asm volatile("vsysc2: syscall" : "=a" (ret) - : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber ); + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) + : __syscall_clobber ); return ret; } @@ -114,10 +102,44 @@ return secs; } +static __always_inline void do_vgettimeofday(struct timeval * tv) +{ + cycle_t now, base, mask, cycle_delta; + unsigned long seq, mult, shift, nsec_delta; + cycle_t (*vread)(void); + do { + seq = read_seqbegin(&__vsyscall_gtod_data.lock); + + vread = __vsyscall_gtod_data.clock.vread; + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { + gettimeofday(tv,NULL); + return; + } + now = vread(); + base = __vsyscall_gtod_data.clock.cycle_last; + mask = __vsyscall_gtod_data.clock.mask; + mult = __vsyscall_gtod_data.clock.mult; + shift = __vsyscall_gtod_data.clock.shift; + + *tv = __vsyscall_gtod_data.wall_time_tv; + + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); + + /* calculate interval: */ + cycle_delta = (now - base) & mask; + /* convert to nsecs: */ + nsec_delta = (cycle_delta * mult) >> shift; + + /* convert to usecs and add to timespec: */ + tv->tv_usec += nsec_delta / NSEC_PER_USEC; + while (tv->tv_usec > USEC_PER_SEC) { + tv->tv_sec += 1; + tv->tv_usec -= USEC_PER_SEC; + } +} + int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) { - if (!__sysctl_vsyscall) - return gettimeofday(tv,tz); if (tv) do_vgettimeofday(tv); if (tz) @@ -129,11 +151,11 @@ * unlikely */ time_t __vsyscall(1) vtime(time_t *t) { - if (!__sysctl_vsyscall) + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); else if (t) - *t = __xtime.tv_sec; - return __xtime.tv_sec; + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; + return __vsyscall_gtod_data.wall_time_tv.tv_sec; } /* Fast way to get current CPU and node. @@ -210,7 +232,7 @@ ret = -ENOMEM; goto out; } - if (!sysctl_vsyscall) { + if (!vsyscall_gtod_data.sysctl_enabled) { writew(SYSCALL, map1); writew(SYSCALL, map2); } else { @@ -232,16 +254,17 @@ static ctl_table kernel_table2[] = { { .ctl_name = 99, .procname = "vsyscall64", - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), + .mode = 0644, .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, - { 0, } + {} }; static ctl_table kernel_root_table2[] = { { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = kernel_table2 }, - { 0 }, + {} }; #endif @@ -304,14 +327,14 @@ BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); map_vsyscall(); #ifdef CONFIG_XEN - sysctl_vsyscall = 0; /* disable vgettimeofay() */ + vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */ if (boot_cpu_has(X86_FEATURE_RDTSCP)) vgetcpu_mode = VGETCPU_RDTSCP; else vgetcpu_mode = VGETCPU_LSL; #endif #ifdef CONFIG_SYSCTL - register_sysctl_table(kernel_root_table2, 0); + register_sysctl_table(kernel_root_table2); #endif on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); hotcpu_notifier(cpu_vsyscall_notifier, 0); --- a/arch/x86/mm/fault_32-xen.c +++ b/arch/x86/mm/fault_32-xen.c @@ -46,43 +46,17 @@ } EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) +static inline int notify_page_fault(struct pt_regs *regs, long err) { struct die_args args = { .regs = regs, - .str = str, + .str = "page fault", .err = err, - .trapnr = trap, - .signr = sig + .trapnr = 14, + .signr = SIGSEGV }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); -} - -/* - * Unlock any spinlocks which will prevent us from getting the - * message out - */ -void bust_spinlocks(int yes) -{ - int loglevel_save = console_loglevel; - - if (yes) { - oops_in_progress = 1; - return; - } -#ifdef CONFIG_VT - unblank_screen(); -#endif - oops_in_progress = 0; - /* - * OK, the message is on the console. Now we call printk() - * without oops_in_progress set so that printk will give klogd - * a poke. Hold onto your hats... - */ - console_loglevel = 15; /* NMI oopser may have shut the console up */ - printk(" "); - console_loglevel = loglevel_save; + return atomic_notifier_call_chain(¬ify_page_fault_chain, + DIE_PAGE_FAULT, &args); } /* @@ -476,8 +450,7 @@ /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -486,8 +459,7 @@ goto bad_area_nosemaphore; } - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) return; /* It's safe to allow irq's after cr2 has been saved and the vmalloc --- a/arch/x86/mm/fault_64-xen.c +++ b/arch/x86/mm/fault_64-xen.c @@ -56,38 +56,17 @@ } EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) +static inline int notify_page_fault(struct pt_regs *regs, long err) { struct die_args args = { .regs = regs, - .str = str, + .str = "page fault", .err = err, - .trapnr = trap, - .signr = sig + .trapnr = 14, + .signr = SIGSEGV }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); -} - -void bust_spinlocks(int yes) -{ - int loglevel_save = console_loglevel; - if (yes) { - oops_in_progress = 1; - } else { -#ifdef CONFIG_VT - unblank_screen(); -#endif - oops_in_progress = 0; - /* - * OK, the message is on the console. Now we call printk() - * without oops_in_progress set so that printk will give klogd - * a poke. Hold onto your hats... - */ - console_loglevel = 15; /* NMI oopser may have shut the console up */ - printk(" "); - console_loglevel = loglevel_save; - } + return atomic_notifier_call_chain(¬ify_page_fault_chain, + DIE_PAGE_FAULT, &args); } /* Sometimes the CPU reports invalid exceptions on prefetch. @@ -437,8 +416,7 @@ /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -447,8 +425,7 @@ goto bad_area_nosemaphore; } - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) return; if (likely(regs->eflags & X86_EFLAGS_IF)) --- a/arch/x86/mm/highmem_32-xen.c +++ b/arch/x86/mm/highmem_32-xen.c @@ -33,14 +33,16 @@ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + BUG_ON(!pte_none(*(kmap_pte-idx))); + if (!PageHighMem(page)) return page_address(page); - idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - if (!pte_none(*(kmap_pte-idx))) - BUG(); set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); + arch_flush_lazy_mmu_mode(); return (void*) vaddr; } @@ -94,6 +96,7 @@ idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); + arch_flush_lazy_mmu_mode(); return (void*) vaddr; } --- a/arch/x86/mm/init_32-xen.c +++ b/arch/x86/mm/init_32-xen.c @@ -68,6 +68,7 @@ #ifdef CONFIG_X86_PAE pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); pud = pud_offset(pgd, 0); @@ -89,6 +90,7 @@ { if (pmd_none(*pmd)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); make_lowmem_page_readonly(page_table, XENFEAT_writable_page_tables); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); --- a/arch/x86/mm/init_64-xen.c +++ b/arch/x86/mm/init_64-xen.c @@ -1111,20 +1111,30 @@ extern int exception_trace, page_fault_trace; static ctl_table debug_table2[] = { - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, - proc_dointvec }, - { 0, } + { + .ctl_name = 99, + .procname = "exception-trace", + .data = &exception_trace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} }; static ctl_table debug_root_table2[] = { - { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, - .child = debug_table2 }, - { 0 }, + { + .ctl_name = CTL_DEBUG, + .procname = "debug", + .mode = 0555, + .child = debug_table2 + }, + {} }; static __init int x8664_sysctl_init(void) { - register_sysctl_table(debug_root_table2, 1); + register_sysctl_table(debug_root_table2); return 0; } __initcall(x8664_sysctl_init); --- a/arch/x86/mm/pageattr_64-xen.c +++ b/arch/x86/mm/pageattr_64-xen.c @@ -344,8 +344,8 @@ void *adr = page_address(pg); if (cpu_has_clflush) cache_flush_page(adr); - __flush_tlb_one(adr); } + __flush_tlb_all(); } static inline void flush_map(struct list_head *l) @@ -370,6 +370,7 @@ pud_t *pud; pmd_t *pmd; pte_t large_pte; + unsigned long pfn; pgd = pgd_offset_k(address); BUG_ON(pgd_none(*pgd)); @@ -377,7 +378,8 @@ BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); BUG_ON(__pmd_val(*pmd) & _PAGE_PSE); - large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); + pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; + large_pte = pfn_pte(pfn, ref_prot); large_pte = pte_mkhuge(large_pte); set_pte((pte_t *)pmd, large_pte); } --- a/arch/x86/mm/pgtable_32-xen.c +++ b/arch/x86/mm/pgtable_32-xen.c @@ -149,6 +149,8 @@ void __init reserve_top_address(unsigned long reserve) { BUG_ON(fixmaps > 0); + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", + (int)-reserve); __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; } @@ -252,6 +254,12 @@ swapper_pg_dir + USER_PTRS_PER_PGD, KERNEL_PGD_PTRS); memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + + /* must happen under lock */ + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, + __pa(swapper_pg_dir) >> PAGE_SHIFT, + USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); + pgd_list_add(pgd); spin_unlock_irqrestore(&pgd_lock, flags); } @@ -262,6 +270,7 @@ { unsigned long flags; /* can be called from interrupt context */ + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); spin_lock_irqsave(&pgd_lock, flags); pgd_list_del(pgd); spin_unlock_irqrestore(&pgd_lock, flags); @@ -286,6 +295,7 @@ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); if (!pmd) goto out_oom; + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); } return pgd; @@ -308,6 +318,7 @@ pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); if (!pmd[i]) goto out_oom; + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); } spin_lock_irqsave(&pgd_lock, flags); @@ -348,12 +359,17 @@ out_oom: if (HAVE_SHARED_KERNEL_PMD) { - for (i--; i >= 0; i--) - kmem_cache_free(pmd_cache, - (void *)__va(pgd_val(pgd[i])-1)); + for (i--; i >= 0; i--) { + pgd_t pgdent = pgd[i]; + void* pmd = (void *)__va(pgd_val(pgdent)-1); + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); + kmem_cache_free(pmd_cache, pmd); + } } else { - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); kmem_cache_free(pmd_cache, pmd[i]); + } kfree(pmd); } kmem_cache_free(pgd_cache, pgd); @@ -377,7 +393,9 @@ /* in the PAE case user pgd entries are overwritten before usage */ if (PTRS_PER_PMD > 1) { for (i = 0; i < USER_PTRS_PER_PGD; ++i) { - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); + pgd_t pgdent = pgd[i]; + void* pmd = (void *)__va(pgd_val(pgdent)-1); + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); kmem_cache_free(pmd_cache, pmd); } --- a/drivers/char/tpm/tpm_xen.c +++ b/drivers/char/tpm/tpm_xen.c @@ -481,7 +481,6 @@ static struct xenbus_driver tpmfront = { .name = "vtpm", - .owner = THIS_MODULE, .ids = tpmfront_ids, .probe = tpmfront_probe, .remove = tpmfront_remove, @@ -491,9 +490,9 @@ .suspend_cancel = tpmfront_suspend_cancel, }; -static void __init init_tpm_xenbus(void) +static int __init init_tpm_xenbus(void) { - xenbus_register_frontend(&tpmfront); + return xenbus_register_frontend(&tpmfront); } static int tpmif_allocate_tx_buffers(struct tpm_private *tp) --- a/drivers/xen/balloon/sysfs.c +++ b/drivers/xen/balloon/sysfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "common.h" #ifdef HAVE_XEN_PLATFORM_COMPAT_H --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -519,7 +519,6 @@ static struct xenbus_driver blkback = { .name = "vbd", - .owner = THIS_MODULE, .ids = blkback_ids, .probe = blkback_probe, .remove = blkback_remove, @@ -529,5 +528,6 @@ void blkif_xenbus_init(void) { - xenbus_register_backend(&blkback); + if (xenbus_register_backend(&blkback)) + BUG(); } --- a/drivers/xen/blkfront/blkfront.c +++ b/drivers/xen/blkfront/blkfront.c @@ -893,7 +893,6 @@ static struct xenbus_driver blkfront = { .name = "vbd", - .owner = THIS_MODULE, .ids = blkfront_ids, .probe = blkfront_probe, .remove = blkfront_remove, --- a/drivers/xen/blktap/xenbus.c +++ b/drivers/xen/blktap/xenbus.c @@ -463,7 +463,6 @@ static struct xenbus_driver blktap = { .name = "tap", - .owner = THIS_MODULE, .ids = blktap_ids, .probe = blktap_probe, .remove = blktap_remove, @@ -473,5 +472,6 @@ void tap_blkif_xenbus_init(void) { - xenbus_register_backend(&blktap); + if (xenbus_register_backend(&blktap)) + BUG(); } --- a/drivers/xen/core/evtchn.c +++ b/drivers/xen/core/evtchn.c @@ -133,7 +133,7 @@ BUG_ON(!test_bit(chn, s->evtchn_mask)); if (irq != -1) - set_native_irq_info(irq, cpumask_of_cpu(cpu)); + irq_desc[irq].affinity = cpumask_of_cpu(cpu); clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); @@ -146,7 +146,7 @@ /* By default all event channels notify CPU#0. */ for (i = 0; i < NR_IRQS; i++) - set_native_irq_info(i, cpumask_of_cpu(0)); + irq_desc[i].affinity = cpumask_of_cpu(0); memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); --- a/drivers/xen/core/smpboot.c +++ b/drivers/xen/core/smpboot.c @@ -261,7 +261,7 @@ { unsigned int cpu; struct task_struct *idle; - int apicid, acpiid; + int apicid; struct vcpu_get_physid cpu_id; #ifdef __x86_64__ struct desc_ptr *gdt_descr; @@ -270,14 +270,8 @@ #endif apicid = 0; - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) { + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); -#ifdef CONFIG_ACPI - if (acpiid != 0xff) - x86_acpiid_to_apicid[acpiid] = apicid; -#endif - } boot_cpu_data.apicid = apicid; cpu_data[0] = boot_cpu_data; @@ -333,14 +327,8 @@ XENFEAT_writable_descriptor_tables); apicid = cpu; - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); -#ifdef CONFIG_ACPI - if (acpiid != 0xff) - x86_acpiid_to_apicid[acpiid] = apicid; -#endif - } cpu_data[cpu] = boot_cpu_data; cpu_data[cpu].apicid = apicid; --- a/drivers/xen/fbfront/xenfb.c +++ b/drivers/xen/fbfront/xenfb.c @@ -856,7 +856,6 @@ static struct xenbus_driver xenfb_driver = { .name = "vfb", - .owner = THIS_MODULE, .ids = xenfb_ids, .probe = xenfb_probe, .remove = xenfb_remove, --- a/drivers/xen/fbfront/xenkbd.c +++ b/drivers/xen/fbfront/xenkbd.c @@ -323,7 +323,6 @@ static struct xenbus_driver xenkbd_driver = { .name = "vkbd", - .owner = THIS_MODULE, .ids = xenkbd_ids, .probe = xenkbd_probe, .remove = xenkbd_remove, --- a/drivers/xen/netback/xenbus.c +++ b/drivers/xen/netback/xenbus.c @@ -437,7 +437,6 @@ static struct xenbus_driver netback = { .name = "vif", - .owner = THIS_MODULE, .ids = netback_ids, .probe = netback_probe, .remove = netback_remove, @@ -448,5 +447,6 @@ void netif_xenbus_init(void) { - xenbus_register_backend(&netback); + if (xenbus_register_backend(&netback)) + BUG(); } --- a/drivers/xen/netfront/netfront.c +++ b/drivers/xen/netfront/netfront.c @@ -1893,20 +1893,19 @@ }; #ifdef CONFIG_SYSFS -static ssize_t show_rxbuf_min(struct class_device *cd, char *buf) +static ssize_t show_rxbuf_min(struct device *dev, + struct device_attribute *attr, char *buf) { - struct net_device *netdev = container_of(cd, struct net_device, - class_dev); - struct netfront_info *info = netdev_priv(netdev); + struct netfront_info *info = netdev_priv(to_net_dev(dev)); return sprintf(buf, "%u\n", info->rx_min_target); } -static ssize_t store_rxbuf_min(struct class_device *cd, +static ssize_t store_rxbuf_min(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - struct net_device *netdev = container_of(cd, struct net_device, - class_dev); + struct net_device *netdev = to_net_dev(dev); struct netfront_info *np = netdev_priv(netdev); char *endp; unsigned long target; @@ -1936,20 +1935,19 @@ return len; } -static ssize_t show_rxbuf_max(struct class_device *cd, char *buf) +static ssize_t show_rxbuf_max(struct device *dev, + struct device_attribute *attr, char *buf) { - struct net_device *netdev = container_of(cd, struct net_device, - class_dev); - struct netfront_info *info = netdev_priv(netdev); + struct netfront_info *info = netdev_priv(to_net_dev(dev)); return sprintf(buf, "%u\n", info->rx_max_target); } -static ssize_t store_rxbuf_max(struct class_device *cd, +static ssize_t store_rxbuf_max(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - struct net_device *netdev = container_of(cd, struct net_device, - class_dev); + struct net_device *netdev = to_net_dev(dev); struct netfront_info *np = netdev_priv(netdev); char *endp; unsigned long target; @@ -1979,16 +1977,15 @@ return len; } -static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf) +static ssize_t show_rxbuf_cur(struct device *dev, + struct device_attribute *attr, char *buf) { - struct net_device *netdev = container_of(cd, struct net_device, - class_dev); - struct netfront_info *info = netdev_priv(netdev); + struct netfront_info *info = netdev_priv(to_net_dev(dev)); return sprintf(buf, "%u\n", info->rx_target); } -static const struct class_device_attribute xennet_attrs[] = { +static struct device_attribute xennet_attrs[] = { __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), @@ -2000,8 +1997,8 @@ int error = 0; for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { - error = class_device_create_file(&netdev->class_dev, - &xennet_attrs[i]); + error = device_create_file(&netdev->dev, + &xennet_attrs[i]); if (error) goto fail; } @@ -2009,8 +2006,7 @@ fail: while (--i >= 0) - class_device_remove_file(&netdev->class_dev, - &xennet_attrs[i]); + device_remove_file(&netdev->dev, &xennet_attrs[i]); return error; } @@ -2018,10 +2014,8 @@ { int i; - for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { - class_device_remove_file(&netdev->class_dev, - &xennet_attrs[i]); - } + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) + device_remove_file(&netdev->dev, &xennet_attrs[i]); } #endif /* CONFIG_SYSFS */ @@ -2187,7 +2181,6 @@ static struct xenbus_driver netfront_driver = { .name = "vif", - .owner = THIS_MODULE, .ids = netfront_ids, .probe = netfront_probe, .remove = __devexit_p(netfront_remove), --- a/drivers/xen/pciback/xenbus.c +++ b/drivers/xen/pciback/xenbus.c @@ -663,7 +663,6 @@ static struct xenbus_driver xenbus_pciback_driver = { .name = "pciback", - .owner = THIS_MODULE, .ids = xenpci_ids, .probe = pciback_xenbus_probe, .remove = pciback_xenbus_remove, --- a/drivers/xen/pcifront/xenbus.c +++ b/drivers/xen/pcifront/xenbus.c @@ -435,7 +435,6 @@ static struct xenbus_driver xenbus_pcifront_driver = { .name = "pcifront", - .owner = THIS_MODULE, .ids = xenpci_ids, .probe = pcifront_xenbus_probe, .remove = pcifront_xenbus_remove, --- a/drivers/xen/tpmback/common.h +++ b/drivers/xen/tpmback/common.h @@ -54,11 +54,11 @@ void tpmif_disconnect_complete(tpmif_t * tpmif); tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi); -void tpmif_interface_init(void); +int tpmif_interface_init(void); void tpmif_interface_exit(void); void tpmif_schedule_work(tpmif_t * tpmif); void tpmif_deschedule_work(tpmif_t * tpmif); -void tpmif_xenbus_init(void); +int tpmif_xenbus_init(void); void tpmif_xenbus_exit(void); int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); irqreturn_t tpmif_be_int(int irq, void *dev_id); --- a/drivers/xen/tpmback/interface.c +++ b/drivers/xen/tpmback/interface.c @@ -156,13 +156,14 @@ free_tpmif(tpmif); } -void __init tpmif_interface_init(void) +int __init tpmif_interface_init(void) { tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t), 0, 0, NULL, NULL); + return tpmif_cachep ? 0 : -ENOMEM; } -void __exit tpmif_interface_exit(void) +void tpmif_interface_exit(void) { kmem_cache_destroy(tpmif_cachep); } --- a/drivers/xen/tpmback/tpmback.c +++ b/drivers/xen/tpmback/tpmback.c @@ -923,22 +923,30 @@ spin_lock_init(&tpm_schedule_list_lock); INIT_LIST_HEAD(&tpm_schedule_list); - tpmif_interface_init(); - tpmif_xenbus_init(); + rc = tpmif_interface_init(); + if (!rc) { + rc = tpmif_xenbus_init(); + if (rc) + tpmif_interface_exit(); + } + if (rc) { + misc_deregister(&vtpms_miscdevice); + return rc; + } printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); return 0; } - module_init(tpmback_init); -void __exit tpmback_exit(void) +static void __exit tpmback_exit(void) { vtpm_release_packets(NULL, 0); tpmif_xenbus_exit(); tpmif_interface_exit(); misc_deregister(&vtpms_miscdevice); } +module_exit(tpmback_exit) MODULE_LICENSE("Dual BSD/GPL"); --- a/drivers/xen/tpmback/xenbus.c +++ b/drivers/xen/tpmback/xenbus.c @@ -270,7 +270,6 @@ static struct xenbus_driver tpmback = { .name = "vtpm", - .owner = THIS_MODULE, .ids = tpmback_ids, .probe = tpmback_probe, .remove = tpmback_remove, @@ -278,9 +277,9 @@ }; -void tpmif_xenbus_init(void) +int tpmif_xenbus_init(void) { - xenbus_register_backend(&tpmback); + return xenbus_register_backend(&tpmback); } void tpmif_xenbus_exit(void) --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -362,7 +362,9 @@ } int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus) + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name) { int ret; @@ -372,7 +374,10 @@ drv->driver.name = drv->name; drv->driver.bus = &bus->bus; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) - drv->driver.owner = drv->owner; + drv->driver.owner = owner; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) + drv->driver.mod_name = mod_name; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) drv->driver.probe = xenbus_dev_probe; @@ -386,13 +391,15 @@ return ret; } -int xenbus_register_frontend(struct xenbus_driver *drv) +int __xenbus_register_frontend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) { int ret; drv->read_otherend_details = read_backend_details; - ret = xenbus_register_driver_common(drv, &xenbus_frontend); + ret = xenbus_register_driver_common(drv, &xenbus_frontend, + owner, mod_name); if (ret) return ret; @@ -401,7 +408,7 @@ return 0; } -EXPORT_SYMBOL_GPL(xenbus_register_frontend); +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); void xenbus_unregister_driver(struct xenbus_driver *drv) { --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -63,7 +63,9 @@ extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus); + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -172,13 +172,15 @@ return 0; } -int xenbus_register_backend(struct xenbus_driver *drv) +int __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) { drv->read_otherend_details = read_frontend_details; - return xenbus_register_driver_common(drv, &xenbus_backend); + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); } -EXPORT_SYMBOL_GPL(xenbus_register_backend); +EXPORT_SYMBOL_GPL(__xenbus_register_backend); /* backend/// */ static int xenbus_probe_backend_unit(const char *dir, --- a/include/asm-x86/i8253.h +++ b/include/asm-x86/i8253.h @@ -8,10 +8,14 @@ extern spinlock_t i8253_lock; +#ifdef CONFIG_GENERIC_CLOCKEVENTS + extern struct clock_event_device *global_clock_event; extern void setup_pit_timer(void); +#endif + #define inb_pit inb_p #define outb_pit outb_p --- a/include/asm-x86/mach-xen/asm/desc_32.h +++ b/include/asm-x86/mach-xen/asm/desc_32.h @@ -21,7 +21,7 @@ extern struct Xgt_desc_struct idt_descr; DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); - +extern struct Xgt_desc_struct early_gdt_descr; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h @@ -9,7 +9,6 @@ #include #include -#include struct dma_mapping_ops { int (*mapping_error)(dma_addr_t dma_addr); @@ -66,6 +65,9 @@ #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) extern void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); --- a/include/asm-x86/mach-xen/asm/e820_64.h +++ b/include/asm-x86/mach-xen/asm/e820_64.h @@ -46,6 +46,7 @@ extern void e820_print_map(char *who); extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern unsigned long e820_hole_size(unsigned long start, unsigned long end); extern void e820_setup_gap(struct e820entry *e820, int nr_map); extern void e820_register_active_regions(int nid, @@ -56,6 +57,7 @@ extern struct e820map e820; extern unsigned ebda_addr, ebda_size; +extern unsigned long nodemap_addr, nodemap_size; #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h @@ -31,10 +31,32 @@ #define IA32_SYSCALL_VECTOR 0x80 +#ifndef CONFIG_XEN + +/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * cleanup after irq migration. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* - * Vectors 0x20-0x2f are used for ISA interrupts. + * Vectors 0x30-0x3f are used for ISA interrupts. */ +#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10 +#define IRQ1_VECTOR IRQ0_VECTOR + 1 +#define IRQ2_VECTOR IRQ0_VECTOR + 2 +#define IRQ3_VECTOR IRQ0_VECTOR + 3 +#define IRQ4_VECTOR IRQ0_VECTOR + 4 +#define IRQ5_VECTOR IRQ0_VECTOR + 5 +#define IRQ6_VECTOR IRQ0_VECTOR + 6 +#define IRQ7_VECTOR IRQ0_VECTOR + 7 +#define IRQ8_VECTOR IRQ0_VECTOR + 8 +#define IRQ9_VECTOR IRQ0_VECTOR + 9 +#define IRQ10_VECTOR IRQ0_VECTOR + 10 +#define IRQ11_VECTOR IRQ0_VECTOR + 11 +#define IRQ12_VECTOR IRQ0_VECTOR + 12 +#define IRQ13_VECTOR IRQ0_VECTOR + 13 +#define IRQ14_VECTOR IRQ0_VECTOR + 14 +#define IRQ15_VECTOR IRQ0_VECTOR + 15 /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff @@ -43,7 +65,6 @@ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. */ -#ifndef CONFIG_XEN #define SPURIOUS_APIC_VECTOR 0xff #define ERROR_APIC_VECTOR 0xfe #define RESCHEDULE_VECTOR 0xfd @@ -57,7 +78,6 @@ #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ #define NUM_INVALIDATE_TLB_VECTORS 8 -#endif /* * Local APIC timer IRQ vector is on a different priority level, @@ -68,12 +88,13 @@ /* * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority + * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ -#define FIRST_DEVICE_VECTOR 0x31 +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#endif #ifndef __ASSEMBLY__ typedef int vector_irq_t[NR_VECTORS]; @@ -93,7 +114,7 @@ extern int i8259A_irq_pending(unsigned int irq); extern void make_8259A_irq(unsigned int irq); extern void init_8259A(int aeoi); -extern void FASTCALL(send_IPI_self(int vector)); +extern void send_IPI_self(int vector); extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); --- a/include/asm-x86/mach-xen/asm/hypervisor.h +++ b/include/asm-x86/mach-xen/asm/hypervisor.h @@ -171,7 +171,7 @@ return rc; } -static inline void /*__noreturn*/ +static inline void __noreturn HYPERVISOR_shutdown( unsigned int reason) { --- a/include/asm-x86/mach-xen/asm/io_32.h +++ b/include/asm-x86/mach-xen/asm/io_32.h @@ -232,12 +232,6 @@ #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) /* - * Again, i386 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) - -/* * Cache management * * This needed for two cases --- a/include/asm-x86/mach-xen/asm/io_64.h +++ b/include/asm-x86/mach-xen/asm/io_64.h @@ -101,7 +101,7 @@ #define IO_SPACE_LIMIT 0xffff -#if defined(__KERNEL__) && __x86_64__ +#if defined(__KERNEL__) && defined(__x86_64__) #include @@ -267,12 +267,6 @@ */ #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) -/* - * Again, x86-64 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) - /* Nothing to do */ #define dma_cache_inv(_start,_size) do { } while (0) --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h @@ -27,13 +27,13 @@ static inline void __prepare_arch_switch(void) { /* - * Save away %fs. No need to save %gs, as it was saved on the + * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. */ - asm volatile ( "mov %%fs,%0" - : "=m" (current->thread.fs)); - asm volatile ( "movl %0,%%fs" + asm volatile ( "mov %%gs,%0" + : "=m" (current->thread.gs)); + asm volatile ( "movl %0,%%gs" : : "r" (0) ); } @@ -95,7 +95,7 @@ } #define deactivate_mm(tsk, mm) \ - asm("movl %0,%%fs": :"r" (0)); + asm("movl %0,%%gs": :"r" (0)); static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h @@ -6,12 +6,23 @@ #include /* for struct page */ #include /* for phys_to_virt and page_to_pseudophys */ -#define pmd_populate_kernel(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) +#define paravirt_alloc_pt(pfn) do { } while (0) +#define paravirt_alloc_pd(pfn) do { } while (0) +#define paravirt_alloc_pd(pfn) do { } while (0) +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) +#define paravirt_release_pt(pfn) do { } while (0) +#define paravirt_release_pd(pfn) do { } while (0) + +#define pmd_populate_kernel(mm, pmd, pte) \ +do { \ + paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ +} while (0) #define pmd_populate(mm, pmd, pte) \ do { \ unsigned long pfn = page_to_pfn(pte); \ + paravirt_alloc_pt(pfn); \ if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \ if (!PageHighMem(pte)) \ BUG_ON(HYPERVISOR_update_va_mapping( \ @@ -42,7 +53,11 @@ extern void pte_free(struct page *pte); -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + paravirt_release_pt(page_to_pfn(pte)); \ + tlb_remove_page((tlb),(pte)); \ +} while (0) #ifdef CONFIG_X86_PAE /* --- a/include/asm-x86/mach-xen/asm/pgtable_32.h +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h @@ -275,6 +275,7 @@ */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) +#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) /* * We only update the dirty/accessed state if we set @@ -490,12 +491,24 @@ #endif #if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ - pte_index(address)) -#define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \ - pte_index(address)) +#define pte_offset_map(dir, address) \ +({ \ + pte_t *__ptep; \ + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ + paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ + __ptep = __ptep + pte_index(address); \ + __ptep; \ +}) +#define pte_offset_map_nested(dir, address) \ +({ \ + pte_t *__ptep; \ + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ + paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ + __ptep = __ptep + pte_index(address); \ + __ptep; \ +}) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else --- a/include/asm-x86/mach-xen/asm/pgtable_64.h +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h @@ -416,15 +416,6 @@ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) #define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) -/* physical address -> PTE */ -static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) -{ - unsigned long pteval; - pteval = physpage | pgprot_val(pgprot); - pteval &= __supported_pte_mask; - return __pte(pteval); -} - /* Change flags of a PTE */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { --- a/include/asm-x86/mach-xen/asm/processor_32.h +++ b/include/asm-x86/mach-xen/asm/processor_32.h @@ -431,7 +431,7 @@ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ - .gs = __KERNEL_PDA, \ + .fs = __KERNEL_PDA, \ } /* @@ -449,8 +449,8 @@ } #define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs": :"r" (0)); \ - regs->xgs = 0; \ + __asm__("movl %0,%%gs": :"r" (0)); \ + regs->xfs = 0; \ set_fs(USER_DS); \ regs->xds = __USER_DS; \ regs->xes = __USER_DS; \ --- a/include/asm-x86/mach-xen/asm/segment_32.h +++ b/include/asm-x86/mach-xen/asm/segment_32.h @@ -83,14 +83,8 @@ * The GDT has 32 entries */ #define GDT_ENTRIES 32 - #define GDT_SIZE (GDT_ENTRIES * 8) -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) - /* Simple and small GDT entries for booting only */ #define GDT_ENTRY_BOOT_CS 2 @@ -132,4 +126,21 @@ #define SEGMENT_GDT 0x0 #define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) + +/* + * Matching rules for certain types of segments. + */ + +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \ + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3)) + +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ +#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \ + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \ + || ((x) & ~3) == (FLAT_USER_CS & ~3)) + +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8) + #endif --- a/include/asm-x86/mach-xen/asm/smp_32.h +++ b/include/asm-x86/mach-xen/asm/smp_32.h @@ -52,6 +52,11 @@ extern void cpu_uninit(void); #endif +#ifndef CONFIG_PARAVIRT +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ +do { } while (0) +#endif + /* * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. We map APIC_BASE very early in page_setup(), --- a/include/asm-x86/mach-xen/asm/smp_64.h +++ b/include/asm-x86/mach-xen/asm/smp_64.h @@ -7,6 +7,7 @@ #include #include #include +#include extern int disable_apic; #ifdef CONFIG_X86_LOCAL_APIC @@ -73,7 +74,7 @@ extern void __cpu_die(unsigned int cpu); extern void prefill_possible_map(void); extern unsigned num_processors; -extern unsigned disabled_cpus; +extern unsigned __cpuinitdata disabled_cpus; #define NO_PROC_ID 0xFF /* No processor magic marker */ --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -93,8 +93,7 @@ /* A xenbus driver. */ struct xenbus_driver { - char *name; - struct module *owner; + const char *name; const struct xenbus_device_id *ids; int (*probe)(struct xenbus_device *dev, const struct xenbus_device_id *id); @@ -115,8 +114,25 @@ return container_of(drv, struct xenbus_driver, driver); } -int xenbus_register_frontend(struct xenbus_driver *drv); -int xenbus_register_backend(struct xenbus_driver *drv); +int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, + struct module *owner, + const char *mod_name); + +static inline int __must_check +xenbus_register_frontend(struct xenbus_driver *drv) +{ + return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); +} + +int __must_check __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, + const char *mod_name); +static inline int __must_check +xenbus_register_backend(struct xenbus_driver *drv) +{ + return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); +} + void xenbus_unregister_driver(struct xenbus_driver *drv); struct xenbus_transaction --- a/lib/swiotlb-xen.c +++ b/lib/swiotlb-xen.c @@ -138,8 +138,8 @@ * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the PCI DMA API. */ -void -swiotlb_init_with_default_size (size_t default_size) +void __init +swiotlb_init_with_default_size(size_t default_size) { unsigned long i, bytes; int rc; @@ -227,7 +227,7 @@ dma_bits); } -void +void __init swiotlb_init(void) { long ram_end; @@ -463,7 +463,7 @@ * When the mapping is small enough return a static buffer to limit * the damage, or panic when the transfer is too big. */ - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at " "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); if (size > io_tlb_overflow && do_panic) { @@ -608,7 +608,7 @@ sg[0].dma_length = 0; return 0; } - sg->dma_address = (dma_addr_t)virt_to_bus(map); + sg->dma_address = virt_to_bus(map); } else sg->dma_address = dev_addr; sg->dma_length = sg->length; @@ -630,8 +630,7 @@ for (i = 0; i < nelems; i++, sg++) if (in_swiotlb_aperture(sg->dma_address)) - unmap_single(hwdev, - (void *)bus_to_virt(sg->dma_address), + unmap_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); else gnttab_dma_unmap_page(sg->dma_address); @@ -654,8 +653,7 @@ for (i = 0; i < nelems; i++, sg++) if (in_swiotlb_aperture(sg->dma_address)) - sync_single(hwdev, - (void *)bus_to_virt(sg->dma_address), + sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); } @@ -669,8 +667,7 @@ for (i = 0; i < nelems; i++, sg++) if (in_swiotlb_aperture(sg->dma_address)) - sync_single(hwdev, - (void *)bus_to_virt(sg->dma_address), + sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); }