Magellan Linux

Diff of /trunk/mkinitrd-magellan/isolinux/bcopy32.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1132 by niro, Sat Sep 1 22:45:15 2007 UTC revision 1133 by niro, Thu Aug 19 09:50:43 2010 UTC
# Line 1  Line 1 
 ;; $Id: bcopy32.inc,v 1.1 2007-09-01 22:44:04 niro Exp $  
1  ;; -----------------------------------------------------------------------  ;; -----------------------------------------------------------------------
2  ;;    ;;
3  ;;   Copyright 1994-2005 H. Peter Anvin - All Rights Reserved  ;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
4    ;;   Copyright 2009 Intel Corporation; author: H. Peter Anvin
5  ;;  ;;
6  ;;   This program is free software; you can redistribute it and/or modify  ;;   This program is free software; you can redistribute it and/or modify
7  ;;   it under the terms of the GNU General Public License as published by  ;;   it under the terms of the GNU General Public License as published by
# Line 13  Line 13 
13    
14  ;;  ;;
15  ;; bcopy32.inc  ;; bcopy32.inc
16  ;;  ;;
17  ;; 32-bit bcopy routine for real mode  ;; 32-bit bcopy routine for real mode
18  ;;  ;;
19    
# Line 27  Line 27 
27  ; segments, but this stuff is painful enough as it is without having to rely  ; segments, but this stuff is painful enough as it is without having to rely
28  ; on everything happening "as it ought to."  ; on everything happening "as it ought to."
29  ;  ;
 ; NOTE: this code is relocated into low memory, just after the .earlybss  
 ; segment, in order to support to "bcopy over self" operation.  
 ;  
30    
31   section .bcopy32   bits 16
32   align 8   section .text
 __bcopy_start:  
   
  ; This is in the .text segment since it needs to be  
  ; contiguous with the rest of the bcopy stuff  
   
 bcopy_gdt: dw bcopy_gdt_size-1 ; Null descriptor - contains GDT  
  dd bcopy_gdt ; pointer for LGDT instruction  
  dw 0  
  dd 0000ffffh ; Code segment, use16, readable,  
  dd 00009b00h ; present, dpl 0, cover 64K  
  dd 0000ffffh ; Data segment, use16, read/write,  
  dd 008f9300h ; present, dpl 0, cover all 4G  
  dd 0000ffffh ; Data segment, use16, read/write,  
  dd 00009300h ; present, dpl 0, cover 64K  
  ; The rest are used for COM32 only  
  dd 0000ffffh ; Code segment, use32, readable,  
  dd 00cf9b00h ; present, dpl 0, cover all 4G  
  dd 0000ffffh ; Data segment, use32, read/write,  
  dd 00cf9300h ; present, dpl 0, cover all 4G  
 bcopy_gdt_size: equ $-bcopy_gdt  
33    
34  ;  ;
35  ; bcopy:  ; bcopy:
36  ; 32-bit copy, overlap safe  ; 32-bit copy, overlap safe
37  ;  ;
38  ; Inputs:  ; Inputs:
39  ; ESI - source pointer  ; ESI - source pointer (-1 means do bzero rather than bcopy)
40  ; EDI - target pointer  ; EDI - target pointer
41  ; ECX - byte count  ; ECX - byte count
42  ; DF - zero  ; DF - zero
43  ;  ;
44  ; Outputs:  ; Outputs:
45  ; ESI - first byte after source  ; ESI - first byte after source (garbage if ESI == -1 on entry)
46  ; EDI - first byte after target  ; EDI - first byte after target
 ; ECX - zero  
47  ;  ;
48  bcopy: push eax  bcopy: jecxz .ret
49   push esi   pushad
50   push edi   push word pm_bcopy
51   push ecx   call simple_pm_call
52   pushf ; Saves, among others, the IF flag   popad
53     add edi,ecx
54     add esi,ecx
55    .ret: ret
56    
57    ;
58    ; shuffle_and_boot_raw:
59    ; The new version of shuffle and boot.
60    ; Inputs:
61    ; ESI -> Pointer to list of (dst, src, len) pairs(*)
62    ; EDI -> Pointer to safe area for list + shuffler
63    ;   (must not overlap this code nor the RM stack)
64    ; ECX -> Byte count of list area (for initial copy)
65    ;
66    ;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
67    ;    this is handled inside the bcopy routine.
68    ;
69    ;     If len == 0:  this marks the end of the list; dst indicates
70    ;    the entry point and src the mode (0 = pm, 1 = rm)
71    ;
72    shuffle_and_boot_raw:
73     push word pm_shuffle
74     call simple_pm_call
75     ; Never returns...
76     jmp kaboom
77    
78    ;
79    ; This routine is used to invoke a simple routine in 32-bit protected
80    ; mode (with 32-bit zero-based CS, DS, ES, and SS, with ESP pointing to the
81    ; real-mode stack even if the real-mode stack was in a nonzero SS.)
82    ;
83    ; No interrupt thunking services are provided; interrupts are disabled
84    ; for the duration of the routine.  Don't run for too long at a time
85    ; unless you really mean it.
86    ;
87    ; Inputs:
88    ; On stack - pm entrypoint (IP only)
89    ; EAX, EBP preserved until real-mode exit
90    ; EBX, ECX, EDX, ESI and EDI passed to the called routine
91    ;
92    ; Outputs:
93    ; EAX, EBP restored from real-mode entry
94    ; All other registers as returned from called function
95    ; PM entrypoint cleaned off stack
96    ;
97    simple_pm_call:
98     push eax
99     push ebp
100     movzx ebp,sp ; BP is used as frame pointer
101     pushfd ; Saves, among others, the IF flag
102   push ds   push ds
103   push es   push es
104     push fs
105     push gs
106    
107   cli   cli
108   call enable_a20   call enable_a20
109    
110     mov byte [cs:bcopy_gdt.TSS+5],89h ; Mark TSS unbusy
111    
112     ; Convert the stack segment to a base
113     xor eax,eax
114     mov ax,ss
115     shl eax,4
116     add ebp,eax ; EBP is now an absolute frame ptr
117    
118     ; Save the old segmented stack pointer
119     mov [cs:.rm_esp],esp
120     mov [cs:.rm_ss],ss
121    
122   o32 lgdt [cs:bcopy_gdt]   o32 lgdt [cs:bcopy_gdt]
123   mov eax,cr0   mov eax,cr0
124   or al,1   or al,1
125   mov cr0,eax ; Enter protected mode   mov cr0,eax ; Enter protected mode
126   jmp 08h:.in_pm   jmp PM_CS32:.in_pm
127    
128  .in_pm: mov ax,10h ; Data segment selector   bits 32
129   mov es,ax  .in_pm:
130   mov ds,ax   mov ax,PM_DS32
131     mov ss,eax
132   ; Don't mess with ss, fs, and gs.  They are never changed   lea esp,[ebp-8*4-2*4] ; Flat mode stack
133   ; and should be able to make it back out of protected mode.   mov es,eax
134   ; This works because (and only because) we don't take   mov ds,eax
135   ; interrupt in protected mode.  
136     ; Set fs, gs, tr, and ldtr in case we're on a virtual
137   cmp esi,edi ; If source > destination, we might   ; machine running on Intel VT hardware -- it can't
138   ja .reverse ; have to copy backwards   ; deal with a partial transition, for no good reason.
139    
140  .forward:   mov al,PM_DS16 ; Real-mode-like segment
141   mov al,cl ; Save low bits   mov fs,eax
142   and al,3   mov gs,eax
143   shr ecx,2 ; Convert to dwords   mov al,PM_TSS ; Intel VT really doesn't want
144   a32 rep movsd ; Do our business   ltr ax ; an invalid TR and LDTR, so give
145   ; At this point ecx == 0   xor eax,eax ; it something that it can use...
146     lldt ax ; (sigh)
  mov cl,al ; Copy any fractional dword  
  a32 rep movsb  
  jmp .exit  
   
 .reverse:  
  std ; Reverse copy  
  lea esi,[esi+ecx-1] ; Point to final byte  
  lea edi,[edi+ecx-1]  
  mov eax,ecx  
  and ecx,3  
  shr eax,2  
  a32 rep movsb  
   
  ; Change ESI/EDI to point to the last dword, instead  
  ; of the last byte.  
  sub esi,3  
  sub edi,3  
  mov ecx,eax  
  a32 rep movsd  
147    
148   cld   movzx eax,word [ebp+2*4+2]
149     call eax ; Call actual routine
150    
151     jmp PM_CS16:.exit
152     bits 16
153  .exit:  .exit:
154   mov ax,18h ; "Real-mode-like" data segment   mov ax,PM_DS16 ; "Real-mode-like" data segment
155   mov es,ax   mov es,eax
156   mov ds,ax   mov ds,eax
157     mov ss,eax
158    
159   mov eax,cr0   mov eax,cr0
160   and al,~1   and al,~1
# Line 138  bcopy: push eax Line 162  bcopy: push eax
162   jmp 0:.in_rm   jmp 0:.in_rm
163    
164  .in_rm: ; Back in real mode  .in_rm: ; Back in real mode
165     lss esp,[cs:.rm_esp] ; Restore the stack
166     pop gs
167     pop fs
168   pop es   pop es
169   pop ds   pop ds
  call disable_a20  
170    
171   popf ; Re-enables interrupts   popfd ; Re-enables interrupts
172   pop eax   pop ebp
  pop edi  
  pop esi  
  add edi,eax  
  add esi,eax  
173   pop eax   pop eax
174   ret   ret 2 ; Drops the pm entry
175    
176     section .bss
177     alignb 4
178    .rm_esp resd 1
179    .rm_ss resw 1
180    
181    
182     section .text
183  ;  ;
184  ; Routines to enable and disable (yuck) A20.  These routines are gathered  ; Routines to enable and disable (yuck) A20.  These routines are gathered
185  ; from tips from a couple of sources, including the Linux kernel and  ; from tips from a couple of sources, including the Linux kernel and
# Line 158  bcopy: push eax Line 187  bcopy: push eax
187  ; is indicated by Donnie Barnes of RedHat, the problematic system being an  ; is indicated by Donnie Barnes of RedHat, the problematic system being an
188  ; IBM ThinkPad 760EL.  ; IBM ThinkPad 760EL.
189  ;  ;
 ; We typically toggle A20 twice for every 64K transferred.  
 ;  
 %define io_delay call _io_delay  
 %define IO_DELAY_PORT 80h ; Invalid port (we hope!)  
 %define disable_wait 32 ; How long to wait for a disable  
   
 ; Note the skip of 2 here  
 %define A20_DUNNO 0 ; A20 type unknown  
 %define A20_NONE 2 ; A20 always on?  
 %define A20_BIOS 4 ; A20 BIOS enable  
 %define A20_KBC 6 ; A20 through KBC  
 %define A20_FAST 8 ; A20 through port 92h  
190    
191  slow_out: out dx, al ; Fall through   section .data
192     alignz 2
193  _io_delay: out IO_DELAY_PORT,al  A20Ptr dw a20_dunno
194   out IO_DELAY_PORT,al  
195   ret   section .bss
196     alignb 4
197    A20Test resd 1 ; Counter for testing A20 status
198    A20Tries resb 1 ; Times until giving up on A20
199    
200     section .text
201  enable_a20:  enable_a20:
202   pushad   pushad
203   mov byte [cs:A20Tries],255 ; Times to try to make this work   mov byte [cs:A20Tries],255 ; Times to try to make this work
204    
205  try_enable_a20:  try_enable_a20:
 ;  
 ; Flush the caches  
 ;  
 %if DO_WBINVD  
  call try_wbinvd  
 %endif  
206    
207  ;  ;
208  ; If the A20 type is known, jump straight to type  ; First, see if we are on a system with no A20 gate, or the A20 gate
209    ; is already enabled for us...
210  ;  ;
  mov bp,[cs:A20Type]  
  jmp word [cs:bp+A20List]  
   
 ;  
 ; First, see if we are on a system with no A20 gate  
 ;  
 a20_dunno:  
211  a20_none:  a20_none:
  mov byte [cs:A20Type], A20_NONE  
212   call a20_test   call a20_test
213   jnz a20_done   jnz a20_done
214     ; Otherwise, see if we had something memorized...
215     jmp word [cs:A20Ptr]
216    
217  ;  ;
218  ; Next, try the BIOS (INT 15h AX=2401h)  ; Next, try the BIOS (INT 15h AX=2401h)
219  ;  ;
220    a20_dunno:
221  a20_bios:  a20_bios:
222   mov byte [cs:A20Type], A20_BIOS   mov word [cs:A20Ptr], a20_bios
223   mov ax,2401h   mov ax,2401h
224   pushf ; Some BIOSes muck with IF   pushf ; Some BIOSes muck with IF
225   int 15h   int 15h
# Line 225  a20_kbc: Line 236  a20_kbc:
236   call empty_8042   call empty_8042
237   jnz a20_done ; A20 live, no need to use KBC   jnz a20_done ; A20 live, no need to use KBC
238    
239   mov byte [cs:A20Type], A20_KBC ; Starting KBC command sequence   mov word [cs:A20Ptr], a20_kbc ; Starting KBC command sequence
240    
241   mov al,0D1h ; Command write   mov al,0D1h ; Write output port
242   out 064h, al   out 064h, al
243   call empty_8042_uncond   call empty_8042_uncond
244    
# Line 235  a20_kbc: Line 246  a20_kbc:
246   out 060h, al   out 060h, al
247   call empty_8042_uncond   call empty_8042_uncond
248    
249     ; Apparently the UHCI spec assumes that A20 toggle
250     ; ends with a null command (assumed to be for sychronization?)
251     ; Put it here to see if it helps anything...
252     mov al,0FFh ; Null command
253     out 064h, al
254     call empty_8042_uncond
255    
256   ; Verify that A20 actually is enabled.  Do that by   ; Verify that A20 actually is enabled.  Do that by
257   ; observing a word in low memory and the same word in   ; observing a word in low memory and the same word in
258   ; the HMA until they are no longer coherent.  Note that   ; the HMA until they are no longer coherent.  Note that
# Line 253  a20_kbc: Line 271  a20_kbc:
271  ; Running out of options here.  Final attempt: enable the "fast A20 gate"  ; Running out of options here.  Final attempt: enable the "fast A20 gate"
272  ;  ;
273  a20_fast:  a20_fast:
274   mov byte [cs:A20Type], A20_FAST ; Haven't used the KBC yet   mov word [cs:A20Ptr], a20_fast
275   in al, 092h   in al, 092h
276   or al,02h   or al,02h
277   and al,~01h ; Don't accidentally reset the machine!   and al,~01h ; Don't accidentally reset the machine!
# Line 272  a20_fast: Line 290  a20_fast:
290  ; Oh bugger.  A20 is not responding.  Try frobbing it again; eventually give up  ; Oh bugger.  A20 is not responding.  Try frobbing it again; eventually give up
291  ; and report failure to the user.  ; and report failure to the user.
292  ;  ;
   
   
293   dec byte [cs:A20Tries]   dec byte [cs:A20Tries]
294   jnz try_enable_a20   jnz a20_dunno ; Did we get the wrong type?
295    
296   mov si, err_a20   mov si, err_a20
297   jmp abort_load   jmp abort_load
298    
299     section .data
300    err_a20 db CR, LF, 'A20 gate not responding!', CR, LF, 0
301     section .text
302    
303  ;  ;
304  ; A20 unmasked, proceed...  ; A20 unmasked, proceed...
305  ;  ;
# Line 290  a20_done: popad Line 311  a20_done: popad
311  ; This routine tests if A20 is enabled (ZF = 0).  This routine  ; This routine tests if A20 is enabled (ZF = 0).  This routine
312  ; must not destroy any register contents.  ; must not destroy any register contents.
313  ;  ;
314    ; The no-write early out avoids the io_delay in the (presumably common)
315    ; case of A20 already enabled (e.g. from a previous call.)
316    ;
317  a20_test:  a20_test:
318   push es   push es
319   push cx   push cx
320   push ax   push eax
321   mov cx,0FFFFh ; HMA = segment 0FFFFh   mov cx,0FFFFh ; HMA = segment 0FFFFh
322   mov es,cx   mov es,cx
323   mov cx,32 ; Loop count   mov eax,[cs:A20Test]
324   mov ax,[cs:A20Test]   mov cx,32 ; Loop count
325  .a20_wait: inc ax   jmp .test ; First iteration = early out
326   mov [cs:A20Test],ax  .wait: add eax,0x430aea41 ; A large prime number
327   io_delay ; Serialize, and fix delay   mov [cs:A20Test],eax
328   cmp ax,[es:A20Test+10h]   io_delay ; Serialize, and fix delay
329   loopz .a20_wait  .test: cmp eax,[es:A20Test+10h]
330  .a20_done: pop ax   loopz .wait
331    .done: pop eax
332   pop cx   pop cx
333   pop es   pop es
334   ret   ret
335    
 disable_a20:  
  pushad  
 ;  
 ; Flush the caches  
 ;  
 %if DO_WBINVD  
  call try_wbinvd  
 %endif  
   
  mov bp,[cs:A20Type]  
  jmp word [cs:bp+A20DList]  
   
 a20d_bios:  
  mov ax,2400h  
  pushf ; Some BIOSes muck with IF  
  int 15h  
  popf  
  jmp short a20d_snooze  
   
 ;  
 ; Disable the "fast A20 gate"  
 ;  
 a20d_fast:  
  in al, 092h  
  and al,~03h  
  out 092h, al  
  jmp short a20d_snooze  
   
 ;  
 ; Disable the keyboard controller A20 gate  
 ;  
 a20d_kbc:  
  call empty_8042_uncond  
  mov al,0D1h  
  out 064h, al ; Command write  
  call empty_8042_uncond  
  mov al,0DDh ; A20 off  
  out 060h, al  
  call empty_8042_uncond  
  ; Wait a bit for it to take effect  
 a20d_snooze:  
  push cx  
  mov cx, disable_wait  
 .delayloop: call a20_test  
  jz .disabled  
  loop .delayloop  
 .disabled: pop cx  
 a20d_dunno:  
 a20d_none:  
  popad  
  ret  
   
336  ;  ;
337  ; Routine to empty the 8042 KBC controller.  If dl != 0  ; Routine to empty the 8042 KBC controller.  If dl != 0
338  ; then we will test A20 in the loop and exit if A20 is  ; then we will test A20 in the loop and exit if A20 is
# Line 383  empty_8042: Line 356  empty_8042:
356   test al,2   test al,2
357   jnz empty_8042   jnz empty_8042
358   io_delay   io_delay
359  .done: ret  .done: ret
   
 ;  
 ; Execute a WBINVD instruction if possible on this CPU  
 ;  
 %if DO_WBINVD  
 try_wbinvd:  
  wbinvd  
  ret  
 %endif  
360    
361  ;  ;
362  ; bcopy_over_self:  ; The 32-bit copy and shuffle code is "special", so it is in its own file
 ;  
 ; This routine is used to shuffle memory around, followed by  
 ; invoking an entry point somewhere in low memory.  This routine  
 ; can clobber any memory above 7C00h, we therefore have to move  
 ; necessary code into the trackbuf area before doing the copy,  
 ; and do adjustments to anything except BSS area references.  
 ;  
 ; NOTE: Since PXELINUX relocates itself, put all these  
 ; references in the ".earlybss" segment.  
 ;  
 ; After performing the copy, this routine resets the stack and  
 ; jumps to the specified entrypoint.  
 ;  
 ; IMPORTANT: This routine does not canonicalize the stack or the  
 ; SS register.  That is the responsibility of the caller.  
363  ;  ;
364  ; Inputs:  %include "bcopyxx.inc"
 ; DS:BX -> Pointer to list of (dst, src, len) pairs  
 ; AX -> Number of list entries  
 ; [CS:EntryPoint] -> CS:IP to jump to  
 ; On stack - initial state (fd, ad, ds, es, fs, gs)  
 ;  
 shuffle_and_boot:  
  and ax,ax  
  jz .done  
 .loop:  
  mov edi,[bx]  
  mov esi,[bx+4]  
  mov ecx,[bx+8]  
  call bcopy  
  add bx,12  
  dec ax  
  jnz .loop  
   
 .done:  
  pop gs  
  pop fs  
  pop es  
  pop ds  
  popad  
  popfd  
  jmp far [cs:EntryPoint]  
   
  align 2  
 A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast  
 A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast  
 a20_adjust_cnt equ ($-A20List)/2  
   
 A20Type dw A20_NONE ; A20 type  
   
  ; Total size of .bcopy32 section  
  alignb 4, db 0 ; Even number of dwords  
 __bcopy_size equ $-__bcopy_start  
   
  section .earlybss  
  alignb 2  
 EntryPoint resd 1 ; CS:IP for shuffle_and_boot  
 SavedSSSP resd 1 ; Saved real mode SS:SP  
 A20Test resw 1 ; Counter for testing status of A20  
 A20Tries resb 1 ; Times until giving up on A20  

Legend:
Removed from v.1132  
changed lines
  Added in v.1133