|
;; $Id: bcopy32.inc,v 1.1 2007-09-01 22:44:04 niro Exp $ |
|
1 |
;; ----------------------------------------------------------------------- |
;; ----------------------------------------------------------------------- |
2 |
;; |
;; |
3 |
;; Copyright 1994-2005 H. Peter Anvin - All Rights Reserved |
;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved |
4 |
|
;; Copyright 2009 Intel Corporation; author: H. Peter Anvin |
5 |
;; |
;; |
6 |
;; This program is free software; you can redistribute it and/or modify |
;; This program is free software; you can redistribute it and/or modify |
7 |
;; it under the terms of the GNU General Public License as published by |
;; it under the terms of the GNU General Public License as published by |
13 |
|
|
14 |
;; |
;; |
15 |
;; bcopy32.inc |
;; bcopy32.inc |
16 |
;; |
;; |
17 |
;; 32-bit bcopy routine for real mode |
;; 32-bit bcopy routine for real mode |
18 |
;; |
;; |
19 |
|
|
27 |
; segments, but this stuff is painful enough as it is without having to rely |
; segments, but this stuff is painful enough as it is without having to rely |
28 |
; on everything happening "as it ought to." |
; on everything happening "as it ought to." |
29 |
; |
; |
|
; NOTE: this code is relocated into low memory, just after the .earlybss |
|
|
; segment, in order to support to "bcopy over self" operation. |
|
|
; |
|
30 |
|
|
31 |
section .bcopy32 |
bits 16 |
32 |
align 8 |
section .text |
|
__bcopy_start: |
|
|
|
|
|
; This is in the .text segment since it needs to be |
|
|
; contiguous with the rest of the bcopy stuff |
|
|
|
|
|
bcopy_gdt: dw bcopy_gdt_size-1 ; Null descriptor - contains GDT |
|
|
dd bcopy_gdt ; pointer for LGDT instruction |
|
|
dw 0 |
|
|
dd 0000ffffh ; Code segment, use16, readable, |
|
|
dd 00009b00h ; present, dpl 0, cover 64K |
|
|
dd 0000ffffh ; Data segment, use16, read/write, |
|
|
dd 008f9300h ; present, dpl 0, cover all 4G |
|
|
dd 0000ffffh ; Data segment, use16, read/write, |
|
|
dd 00009300h ; present, dpl 0, cover 64K |
|
|
; The rest are used for COM32 only |
|
|
dd 0000ffffh ; Code segment, use32, readable, |
|
|
dd 00cf9b00h ; present, dpl 0, cover all 4G |
|
|
dd 0000ffffh ; Data segment, use32, read/write, |
|
|
dd 00cf9300h ; present, dpl 0, cover all 4G |
|
|
bcopy_gdt_size: equ $-bcopy_gdt |
|
33 |
|
|
34 |
; |
; |
35 |
; bcopy: |
; bcopy: |
36 |
; 32-bit copy, overlap safe |
; 32-bit copy, overlap safe |
37 |
; |
; |
38 |
; Inputs: |
; Inputs: |
39 |
; ESI - source pointer |
; ESI - source pointer (-1 means do bzero rather than bcopy) |
40 |
; EDI - target pointer |
; EDI - target pointer |
41 |
; ECX - byte count |
; ECX - byte count |
42 |
; DF - zero |
; DF - zero |
43 |
; |
; |
44 |
; Outputs: |
; Outputs: |
45 |
; ESI - first byte after source |
; ESI - first byte after source (garbage if ESI == -1 on entry) |
46 |
; EDI - first byte after target |
; EDI - first byte after target |
|
; ECX - zero |
|
47 |
; |
; |
48 |
bcopy: push eax |
bcopy: jecxz .ret |
49 |
push esi |
pushad |
50 |
push edi |
push word pm_bcopy |
51 |
push ecx |
call simple_pm_call |
52 |
pushf ; Saves, among others, the IF flag |
popad |
53 |
|
add edi,ecx |
54 |
|
add esi,ecx |
55 |
|
.ret: ret |
56 |
|
|
57 |
|
; |
58 |
|
; shuffle_and_boot_raw: |
59 |
|
; The new version of shuffle and boot. |
60 |
|
; Inputs: |
61 |
|
; ESI -> Pointer to list of (dst, src, len) pairs(*) |
62 |
|
; EDI -> Pointer to safe area for list + shuffler |
63 |
|
; (must not overlap this code nor the RM stack) |
64 |
|
; ECX -> Byte count of list area (for initial copy) |
65 |
|
; |
66 |
|
; If src == -1: then the memory pointed to by (dst, len) is bzeroed; |
67 |
|
; this is handled inside the bcopy routine. |
68 |
|
; |
69 |
|
; If len == 0: this marks the end of the list; dst indicates |
70 |
|
; the entry point and src the mode (0 = pm, 1 = rm) |
71 |
|
; |
72 |
|
shuffle_and_boot_raw: |
73 |
|
push word pm_shuffle |
74 |
|
call simple_pm_call |
75 |
|
; Never returns... |
76 |
|
jmp kaboom |
77 |
|
|
78 |
|
; |
79 |
|
; This routine is used to invoke a simple routine in 32-bit protected |
80 |
|
; mode (with 32-bit zero-based CS, DS, ES, and SS, with ESP pointing to the |
81 |
|
; real-mode stack even if the real-mode stack was in a nonzero SS.) |
82 |
|
; |
83 |
|
; No interrupt thunking services are provided; interrupts are disabled |
84 |
|
; for the duration of the routine. Don't run for too long at a time |
85 |
|
; unless you really mean it. |
86 |
|
; |
87 |
|
; Inputs: |
88 |
|
; On stack - pm entrypoint (IP only) |
89 |
|
; EAX, EBP preserved until real-mode exit |
90 |
|
; EBX, ECX, EDX, ESI and EDI passed to the called routine |
91 |
|
; |
92 |
|
; Outputs: |
93 |
|
; EAX, EBP restored from real-mode entry |
94 |
|
; All other registers as returned from called function |
95 |
|
; PM entrypoint cleaned off stack |
96 |
|
; |
97 |
|
simple_pm_call: |
98 |
|
push eax |
99 |
|
push ebp |
100 |
|
movzx ebp,sp ; BP is used as frame pointer |
101 |
|
pushfd ; Saves, among others, the IF flag |
102 |
push ds |
push ds |
103 |
push es |
push es |
104 |
|
push fs |
105 |
|
push gs |
106 |
|
|
107 |
cli |
cli |
108 |
call enable_a20 |
call enable_a20 |
109 |
|
|
110 |
|
mov byte [cs:bcopy_gdt.TSS+5],89h ; Mark TSS unbusy |
111 |
|
|
112 |
|
; Convert the stack segment to a base |
113 |
|
xor eax,eax |
114 |
|
mov ax,ss |
115 |
|
shl eax,4 |
116 |
|
add ebp,eax ; EBP is now an absolute frame ptr |
117 |
|
|
118 |
|
; Save the old segmented stack pointer |
119 |
|
mov [cs:.rm_esp],esp |
120 |
|
mov [cs:.rm_ss],ss |
121 |
|
|
122 |
o32 lgdt [cs:bcopy_gdt] |
o32 lgdt [cs:bcopy_gdt] |
123 |
mov eax,cr0 |
mov eax,cr0 |
124 |
or al,1 |
or al,1 |
125 |
mov cr0,eax ; Enter protected mode |
mov cr0,eax ; Enter protected mode |
126 |
jmp 08h:.in_pm |
jmp PM_CS32:.in_pm |
127 |
|
|
128 |
.in_pm: mov ax,10h ; Data segment selector |
bits 32 |
129 |
mov es,ax |
.in_pm: |
130 |
mov ds,ax |
mov ax,PM_DS32 |
131 |
|
mov ss,eax |
132 |
; Don't mess with ss, fs, and gs. They are never changed |
lea esp,[ebp-8*4-2*4] ; Flat mode stack |
133 |
; and should be able to make it back out of protected mode. |
mov es,eax |
134 |
; This works because (and only because) we don't take |
mov ds,eax |
135 |
; interrupt in protected mode. |
|
136 |
|
; Set fs, gs, tr, and ldtr in case we're on a virtual |
137 |
cmp esi,edi ; If source > destination, we might |
; machine running on Intel VT hardware -- it can't |
138 |
ja .reverse ; have to copy backwards |
; deal with a partial transition, for no good reason. |
139 |
|
|
140 |
.forward: |
mov al,PM_DS16 ; Real-mode-like segment |
141 |
mov al,cl ; Save low bits |
mov fs,eax |
142 |
and al,3 |
mov gs,eax |
143 |
shr ecx,2 ; Convert to dwords |
mov al,PM_TSS ; Intel VT really doesn't want |
144 |
a32 rep movsd ; Do our business |
ltr ax ; an invalid TR and LDTR, so give |
145 |
; At this point ecx == 0 |
xor eax,eax ; it something that it can use... |
146 |
|
lldt ax ; (sigh) |
|
mov cl,al ; Copy any fractional dword |
|
|
a32 rep movsb |
|
|
jmp .exit |
|
|
|
|
|
.reverse: |
|
|
std ; Reverse copy |
|
|
lea esi,[esi+ecx-1] ; Point to final byte |
|
|
lea edi,[edi+ecx-1] |
|
|
mov eax,ecx |
|
|
and ecx,3 |
|
|
shr eax,2 |
|
|
a32 rep movsb |
|
|
|
|
|
; Change ESI/EDI to point to the last dword, instead |
|
|
; of the last byte. |
|
|
sub esi,3 |
|
|
sub edi,3 |
|
|
mov ecx,eax |
|
|
a32 rep movsd |
|
147 |
|
|
148 |
cld |
movzx eax,word [ebp+2*4+2] |
149 |
|
call eax ; Call actual routine |
150 |
|
|
151 |
|
jmp PM_CS16:.exit |
152 |
|
bits 16 |
153 |
.exit: |
.exit: |
154 |
mov ax,18h ; "Real-mode-like" data segment |
mov ax,PM_DS16 ; "Real-mode-like" data segment |
155 |
mov es,ax |
mov es,eax |
156 |
mov ds,ax |
mov ds,eax |
157 |
|
mov ss,eax |
158 |
|
|
159 |
mov eax,cr0 |
mov eax,cr0 |
160 |
and al,~1 |
and al,~1 |
162 |
jmp 0:.in_rm |
jmp 0:.in_rm |
163 |
|
|
164 |
.in_rm: ; Back in real mode |
.in_rm: ; Back in real mode |
165 |
|
lss esp,[cs:.rm_esp] ; Restore the stack |
166 |
|
pop gs |
167 |
|
pop fs |
168 |
pop es |
pop es |
169 |
pop ds |
pop ds |
|
call disable_a20 |
|
170 |
|
|
171 |
popf ; Re-enables interrupts |
popfd ; Re-enables interrupts |
172 |
pop eax |
pop ebp |
|
pop edi |
|
|
pop esi |
|
|
add edi,eax |
|
|
add esi,eax |
|
173 |
pop eax |
pop eax |
174 |
ret |
ret 2 ; Drops the pm entry |
175 |
|
|
176 |
|
section .bss |
177 |
|
alignb 4 |
178 |
|
.rm_esp resd 1 |
179 |
|
.rm_ss resw 1 |
180 |
|
|
181 |
|
|
182 |
|
section .text |
183 |
; |
; |
184 |
; Routines to enable and disable (yuck) A20. These routines are gathered |
; Routines to enable and disable (yuck) A20. These routines are gathered |
185 |
; from tips from a couple of sources, including the Linux kernel and |
; from tips from a couple of sources, including the Linux kernel and |
187 |
; is indicated by Donnie Barnes of RedHat, the problematic system being an |
; is indicated by Donnie Barnes of RedHat, the problematic system being an |
188 |
; IBM ThinkPad 760EL. |
; IBM ThinkPad 760EL. |
189 |
; |
; |
|
; We typically toggle A20 twice for every 64K transferred. |
|
|
; |
|
|
%define io_delay call _io_delay |
|
|
%define IO_DELAY_PORT 80h ; Invalid port (we hope!) |
|
|
%define disable_wait 32 ; How long to wait for a disable |
|
|
|
|
|
; Note the skip of 2 here |
|
|
%define A20_DUNNO 0 ; A20 type unknown |
|
|
%define A20_NONE 2 ; A20 always on? |
|
|
%define A20_BIOS 4 ; A20 BIOS enable |
|
|
%define A20_KBC 6 ; A20 through KBC |
|
|
%define A20_FAST 8 ; A20 through port 92h |
|
190 |
|
|
191 |
slow_out: out dx, al ; Fall through |
section .data |
192 |
|
alignz 2 |
193 |
_io_delay: out IO_DELAY_PORT,al |
A20Ptr dw a20_dunno |
194 |
out IO_DELAY_PORT,al |
|
195 |
ret |
section .bss |
196 |
|
alignb 4 |
197 |
|
A20Test resd 1 ; Counter for testing A20 status |
198 |
|
A20Tries resb 1 ; Times until giving up on A20 |
199 |
|
|
200 |
|
section .text |
201 |
enable_a20: |
enable_a20: |
202 |
pushad |
pushad |
203 |
mov byte [cs:A20Tries],255 ; Times to try to make this work |
mov byte [cs:A20Tries],255 ; Times to try to make this work |
204 |
|
|
205 |
try_enable_a20: |
try_enable_a20: |
|
; |
|
|
; Flush the caches |
|
|
; |
|
|
%if DO_WBINVD |
|
|
call try_wbinvd |
|
|
%endif |
|
206 |
|
|
207 |
; |
; |
208 |
; If the A20 type is known, jump straight to type |
; First, see if we are on a system with no A20 gate, or the A20 gate |
209 |
|
; is already enabled for us... |
210 |
; |
; |
|
mov bp,[cs:A20Type] |
|
|
jmp word [cs:bp+A20List] |
|
|
|
|
|
; |
|
|
; First, see if we are on a system with no A20 gate |
|
|
; |
|
|
a20_dunno: |
|
211 |
a20_none: |
a20_none: |
|
mov byte [cs:A20Type], A20_NONE |
|
212 |
call a20_test |
call a20_test |
213 |
jnz a20_done |
jnz a20_done |
214 |
|
; Otherwise, see if we had something memorized... |
215 |
|
jmp word [cs:A20Ptr] |
216 |
|
|
217 |
; |
; |
218 |
; Next, try the BIOS (INT 15h AX=2401h) |
; Next, try the BIOS (INT 15h AX=2401h) |
219 |
; |
; |
220 |
|
a20_dunno: |
221 |
a20_bios: |
a20_bios: |
222 |
mov byte [cs:A20Type], A20_BIOS |
mov word [cs:A20Ptr], a20_bios |
223 |
mov ax,2401h |
mov ax,2401h |
224 |
pushf ; Some BIOSes muck with IF |
pushf ; Some BIOSes muck with IF |
225 |
int 15h |
int 15h |
236 |
call empty_8042 |
call empty_8042 |
237 |
jnz a20_done ; A20 live, no need to use KBC |
jnz a20_done ; A20 live, no need to use KBC |
238 |
|
|
239 |
mov byte [cs:A20Type], A20_KBC ; Starting KBC command sequence |
mov word [cs:A20Ptr], a20_kbc ; Starting KBC command sequence |
240 |
|
|
241 |
mov al,0D1h ; Command write |
mov al,0D1h ; Write output port |
242 |
out 064h, al |
out 064h, al |
243 |
call empty_8042_uncond |
call empty_8042_uncond |
244 |
|
|
246 |
out 060h, al |
out 060h, al |
247 |
call empty_8042_uncond |
call empty_8042_uncond |
248 |
|
|
249 |
|
; Apparently the UHCI spec assumes that A20 toggle |
250 |
|
; ends with a null command (assumed to be for sychronization?) |
251 |
|
; Put it here to see if it helps anything... |
252 |
|
mov al,0FFh ; Null command |
253 |
|
out 064h, al |
254 |
|
call empty_8042_uncond |
255 |
|
|
256 |
; Verify that A20 actually is enabled. Do that by |
; Verify that A20 actually is enabled. Do that by |
257 |
; observing a word in low memory and the same word in |
; observing a word in low memory and the same word in |
258 |
; the HMA until they are no longer coherent. Note that |
; the HMA until they are no longer coherent. Note that |
271 |
; Running out of options here. Final attempt: enable the "fast A20 gate" |
; Running out of options here. Final attempt: enable the "fast A20 gate" |
272 |
; |
; |
273 |
a20_fast: |
a20_fast: |
274 |
mov byte [cs:A20Type], A20_FAST ; Haven't used the KBC yet |
mov word [cs:A20Ptr], a20_fast |
275 |
in al, 092h |
in al, 092h |
276 |
or al,02h |
or al,02h |
277 |
and al,~01h ; Don't accidentally reset the machine! |
and al,~01h ; Don't accidentally reset the machine! |
290 |
; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up |
; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up |
291 |
; and report failure to the user. |
; and report failure to the user. |
292 |
; |
; |
|
|
|
|
|
|
293 |
dec byte [cs:A20Tries] |
dec byte [cs:A20Tries] |
294 |
jnz try_enable_a20 |
jnz a20_dunno ; Did we get the wrong type? |
295 |
|
|
296 |
mov si, err_a20 |
mov si, err_a20 |
297 |
jmp abort_load |
jmp abort_load |
298 |
|
|
299 |
|
section .data |
300 |
|
err_a20 db CR, LF, 'A20 gate not responding!', CR, LF, 0 |
301 |
|
section .text |
302 |
|
|
303 |
; |
; |
304 |
; A20 unmasked, proceed... |
; A20 unmasked, proceed... |
305 |
; |
; |
311 |
; This routine tests if A20 is enabled (ZF = 0). This routine |
; This routine tests if A20 is enabled (ZF = 0). This routine |
312 |
; must not destroy any register contents. |
; must not destroy any register contents. |
313 |
; |
; |
314 |
|
; The no-write early out avoids the io_delay in the (presumably common) |
315 |
|
; case of A20 already enabled (e.g. from a previous call.) |
316 |
|
; |
317 |
a20_test: |
a20_test: |
318 |
push es |
push es |
319 |
push cx |
push cx |
320 |
push ax |
push eax |
321 |
mov cx,0FFFFh ; HMA = segment 0FFFFh |
mov cx,0FFFFh ; HMA = segment 0FFFFh |
322 |
mov es,cx |
mov es,cx |
323 |
mov cx,32 ; Loop count |
mov eax,[cs:A20Test] |
324 |
mov ax,[cs:A20Test] |
mov cx,32 ; Loop count |
325 |
.a20_wait: inc ax |
jmp .test ; First iteration = early out |
326 |
mov [cs:A20Test],ax |
.wait: add eax,0x430aea41 ; A large prime number |
327 |
io_delay ; Serialize, and fix delay |
mov [cs:A20Test],eax |
328 |
cmp ax,[es:A20Test+10h] |
io_delay ; Serialize, and fix delay |
329 |
loopz .a20_wait |
.test: cmp eax,[es:A20Test+10h] |
330 |
.a20_done: pop ax |
loopz .wait |
331 |
|
.done: pop eax |
332 |
pop cx |
pop cx |
333 |
pop es |
pop es |
334 |
ret |
ret |
335 |
|
|
|
disable_a20: |
|
|
pushad |
|
|
; |
|
|
; Flush the caches |
|
|
; |
|
|
%if DO_WBINVD |
|
|
call try_wbinvd |
|
|
%endif |
|
|
|
|
|
mov bp,[cs:A20Type] |
|
|
jmp word [cs:bp+A20DList] |
|
|
|
|
|
a20d_bios: |
|
|
mov ax,2400h |
|
|
pushf ; Some BIOSes muck with IF |
|
|
int 15h |
|
|
popf |
|
|
jmp short a20d_snooze |
|
|
|
|
|
; |
|
|
; Disable the "fast A20 gate" |
|
|
; |
|
|
a20d_fast: |
|
|
in al, 092h |
|
|
and al,~03h |
|
|
out 092h, al |
|
|
jmp short a20d_snooze |
|
|
|
|
|
; |
|
|
; Disable the keyboard controller A20 gate |
|
|
; |
|
|
a20d_kbc: |
|
|
call empty_8042_uncond |
|
|
mov al,0D1h |
|
|
out 064h, al ; Command write |
|
|
call empty_8042_uncond |
|
|
mov al,0DDh ; A20 off |
|
|
out 060h, al |
|
|
call empty_8042_uncond |
|
|
; Wait a bit for it to take effect |
|
|
a20d_snooze: |
|
|
push cx |
|
|
mov cx, disable_wait |
|
|
.delayloop: call a20_test |
|
|
jz .disabled |
|
|
loop .delayloop |
|
|
.disabled: pop cx |
|
|
a20d_dunno: |
|
|
a20d_none: |
|
|
popad |
|
|
ret |
|
|
|
|
336 |
; |
; |
337 |
; Routine to empty the 8042 KBC controller. If dl != 0 |
; Routine to empty the 8042 KBC controller. If dl != 0 |
338 |
; then we will test A20 in the loop and exit if A20 is |
; then we will test A20 in the loop and exit if A20 is |
356 |
test al,2 |
test al,2 |
357 |
jnz empty_8042 |
jnz empty_8042 |
358 |
io_delay |
io_delay |
359 |
.done: ret |
.done: ret |
|
|
|
|
; |
|
|
; Execute a WBINVD instruction if possible on this CPU |
|
|
; |
|
|
%if DO_WBINVD |
|
|
try_wbinvd: |
|
|
wbinvd |
|
|
ret |
|
|
%endif |
|
360 |
|
|
361 |
; |
; |
362 |
; bcopy_over_self: |
; The 32-bit copy and shuffle code is "special", so it is in its own file |
|
; |
|
|
; This routine is used to shuffle memory around, followed by |
|
|
; invoking an entry point somewhere in low memory. This routine |
|
|
; can clobber any memory above 7C00h, we therefore have to move |
|
|
; necessary code into the trackbuf area before doing the copy, |
|
|
; and do adjustments to anything except BSS area references. |
|
|
; |
|
|
; NOTE: Since PXELINUX relocates itself, put all these |
|
|
; references in the ".earlybss" segment. |
|
|
; |
|
|
; After performing the copy, this routine resets the stack and |
|
|
; jumps to the specified entrypoint. |
|
|
; |
|
|
; IMPORTANT: This routine does not canonicalize the stack or the |
|
|
; SS register. That is the responsibility of the caller. |
|
363 |
; |
; |
364 |
; Inputs: |
%include "bcopyxx.inc" |
|
; DS:BX -> Pointer to list of (dst, src, len) pairs |
|
|
; AX -> Number of list entries |
|
|
; [CS:EntryPoint] -> CS:IP to jump to |
|
|
; On stack - initial state (fd, ad, ds, es, fs, gs) |
|
|
; |
|
|
shuffle_and_boot: |
|
|
and ax,ax |
|
|
jz .done |
|
|
.loop: |
|
|
mov edi,[bx] |
|
|
mov esi,[bx+4] |
|
|
mov ecx,[bx+8] |
|
|
call bcopy |
|
|
add bx,12 |
|
|
dec ax |
|
|
jnz .loop |
|
|
|
|
|
.done: |
|
|
pop gs |
|
|
pop fs |
|
|
pop es |
|
|
pop ds |
|
|
popad |
|
|
popfd |
|
|
jmp far [cs:EntryPoint] |
|
|
|
|
|
align 2 |
|
|
A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast |
|
|
A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast |
|
|
a20_adjust_cnt equ ($-A20List)/2 |
|
|
|
|
|
A20Type dw A20_NONE ; A20 type |
|
|
|
|
|
; Total size of .bcopy32 section |
|
|
alignb 4, db 0 ; Even number of dwords |
|
|
__bcopy_size equ $-__bcopy_start |
|
|
|
|
|
section .earlybss |
|
|
alignb 2 |
|
|
EntryPoint resd 1 ; CS:IP for shuffle_and_boot |
|
|
SavedSSSP resd 1 ; Saved real mode SS:SP |
|
|
A20Test resw 1 ; Counter for testing status of A20 |
|
|
A20Tries resb 1 ; Times until giving up on A20 |
|