mkinitrd-magellan/isolinux/bcopyxx.inc

;; -----------------------------------------------------------------------
;;
;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;;   Copyright 2009 Intel Corporation; author: H. Peter Anvin
;;
;;   This program is free software; you can redistribute it and/or modify
;;   it under the terms of the GNU General Public License as published by
;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;;   Boston MA 02111-1307, USA; either version 2 of the License, or
;;   (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------

;;
;; bcopy32xx.inc
;;


;
; 32-bit bcopy routine
;
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; routines.  ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
;
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
;

		bits 32
		section .bcopyxx
		align 16
bcopyxx_start	equ $
;
; pm_bcopy:
;
;	This is the protected-mode core of the "bcopy" routine.
;	Try to do aligned transfers; if the src and dst are relatively
;	misaligned, align the dst.
;
;	ECX is guaranteed to not be zero on entry.
;
;	Clobbers ESI, EDI, ECX.
;

pm_bcopy:
		push ebx
		push edx
		push eax

		cmp esi,-1
		je .bzero

		cmp esi,edi		; If source < destination, we might
		jb .reverse		; have to copy backwards

.forward:
		; Initial alignment
		mov edx,edi
		shr edx,1
		jnc .faa1
		movsb
		dec ecx
.faa1:
		mov al,cl
		cmp ecx,2
		jb .f_tiny

		shr edx,1
		jnc .faa2
		movsw
		sub ecx,2
.faa2:

		; Bulk transfer
		mov al,cl		; Save low bits
		shr ecx,2		; Convert to dwords
		rep movsd		; Do our business
		; At this point ecx == 0

		test al,2
		jz .fab2
		movsw
.fab2:
.f_tiny:
		test al,1
		jz .fab1
		movsb
.fab1:
.done:
		pop eax
		pop edx
		pop ebx
		ret

.reverse:
		std			; Reverse copy

		lea esi,[esi+ecx-1]	; Point to final byte
		lea edi,[edi+ecx-1]

		; Initial alignment
		mov edx,edi
		shr edx,1
		jc .raa1
		movsb
		dec ecx
.raa1:

		dec esi
		dec edi
		mov al,cl
		cmp ecx,2
		jb .r_tiny
		shr edx,1
		jc .raa2
		movsw
		sub ecx,2
.raa2:

		; Bulk copy
		sub esi,2
		sub edi,2
		mov al,cl		; Save low bits
		shr ecx,2
		rep movsd

		; Final alignment
.r_final:
		add esi,2
		add edi,2
		test al,2
		jz .rab2
		movsw
.rab2:
.r_tiny:
		inc esi
		inc edi
		test al,1
		jz .rab1
		movsb
.rab1:
		cld
		jmp short .done

.bzero:
		xor eax,eax

		; Initial alignment
		mov edx,edi
		shr edx,1
		jnc .zaa1
		stosb
		dec ecx
.zaa1:

		mov bl,cl
		cmp ecx,2
		jb .z_tiny
		shr edx,1
		jnc .zaa2
		stosw
		sub ecx,2
.zaa2:

		; Bulk
		mov bl,cl		; Save low bits
		shr ecx,2
		rep stosd

		test bl,2
		jz .zab2
		stosw
.zab2:
.z_tiny:
		test bl,1
		jz .zab1
		stosb
.zab1:
		jmp short .done

;
; shuffle_and_boot:
;
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory.  This routine
; can clobber any memory outside the bcopy special area.
;
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
;
; Inputs:
;	ESI		-> Pointer to list of (dst, src, len) pairs(*)
;	EDI		-> Pointer to safe area for list + shuffler
;			   (must not overlap this code nor the RM stack)
;	ECX		-> Byte count of list area (for initial copy)
;
;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
;		    this is handled inside the bcopy routine.
;
;     If len == 0:  this marks the end of the list; dst indicates
;		    the entry point and src the mode (0 = pm, 1 = rm)
;
pm_shuffle:
		mov ebx,edi		; EBX <- descriptor list
		lea edx,[edi+ecx+15]	; EDX <- where to relocate our code to
		and edx,~15		; Align 16 to benefit the GDT
		call pm_bcopy
		mov edi,edx
		mov esi,bcopyxx_start
		mov ecx,bcopyxx_dwords
		lea eax,[edx+.safe-bcopyxx_start]	; Resume point
		; Relocate this code
		rep movsd
		jmp eax			; Jump to safe location
.safe:
		; Give ourselves a safe stack
		lea esp,[edx+bcopyxx_stack+bcopyxx_end-bcopyxx_start]
		add edx,bcopy_gdt-bcopyxx_start
		mov [edx+2],edx		; GDT self-pointer
		lgdt [edx]		; Switch to local GDT

		; Now for the actual shuffling...
.loop:
		mov edi,[ebx]
		mov esi,[ebx+4]
		mov ecx,[ebx+8]
		add ebx,12
		jecxz .done
		call pm_bcopy
		jmp .loop
.done:
		push ecx		; == 0, for cleaning the flags register
		and esi,esi
		jz pm_shuffle_real_mode
		popfd			; Clean the flags
		jmp edi			; Protected mode entry

		; We have a real-mode entry point, so we need to return
		; to real mode.  Note: EDX already points to the GDT.
pm_shuffle_real_mode:
		mov eax,edi
		mov [edx+PM_CS16+2],ax
		mov [edx+PM_DS16+2],ax
		shr eax,16
		mov [edx+PM_CS16+4],al
		mov [edx+PM_CS16+7],ah
		mov [edx+PM_DS16+4],al
		mov [edx+PM_DS16+7],ah
		mov eax,cr0
		and al,~1
		popfd			; Clean the flags
		; No flag-changing instructions below...
		mov dx,PM_DS16
		mov ds,edx
		mov es,edx
		mov fs,edx
		mov gs,edx
		mov ss,edx
		jmp PM_CS16:0

		align	16
; GDT descriptor entry
%macro desc 1
bcopy_gdt.%1:
PM_%1		equ bcopy_gdt.%1-bcopy_gdt
%endmacro

bcopy_gdt:
		dw bcopy_gdt_size-1	; Null descriptor - contains GDT
		dd bcopy_gdt		; pointer for LGDT instruction
		dw 0

		; TSS segment to keep Intel VT happy.  Intel VT is
		; unhappy about anything that doesn't smell like a
		; full-blown 32-bit OS.
	desc TSS
		dw 104-1, DummyTSS	; 08h 32-bit task state segment
		dd 00008900h		; present, dpl 0, 104 bytes @DummyTSS

	desc CS16
		dd 0000ffffh		; 10h Code segment, use16, readable,
		dd 00009b00h		; present, dpl 0, cover 64K
	desc DS16
		dd 0000ffffh		; 18h Data segment, use16, read/write,
		dd 00009300h		; present, dpl 0, cover 64K
	desc CS32
		dd 0000ffffh		; 20h Code segment, use32, readable,
		dd 00cf9b00h		; present, dpl 0, cover all 4G
	desc DS32
		dd 0000ffffh		; 28h Data segment, use32, read/write,
		dd 00cf9300h		; present, dpl 0, cover all 4G

bcopy_gdt_size:	equ $-bcopy_gdt

		alignz 4
bcopyxx_end	equ $			; *Must* be dword-aligned!
bcopyxx_len	equ $-bcopyxx_start
bcopyxx_dwords	equ bcopyxx_len >> 2

bcopyxx_stack	equ 128			; We want this much stack
		; The +15 is for alignment
bcopyxx_safe	equ bcopyxx_len + bcopyxx_stack + 15

;
; Space for a dummy task state segment.  It should never be actually
; accessed, but just in case it is, point to a chunk of memory that
; has a chance to not be used for anything real...
;
DummyTSS	equ 0x580

		bits 16
		section .text
1	niro	1133	;; -----------------------------------------------------------------------
2			;;
3			;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
4			;; Copyright 2009 Intel Corporation; author: H. Peter Anvin
5			;;
6			;; This program is free software; you can redistribute it and/or modify
7			;; it under the terms of the GNU General Public License as published by
8			;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
9			;; Boston MA 02111-1307, USA; either version 2 of the License, or
10			;; (at your option) any later version; incorporated herein by reference.
11			;;
12			;; -----------------------------------------------------------------------
13
14			;;
15			;; bcopy32xx.inc
16			;;
17
18
19			;
20			; 32-bit bcopy routine
21			;
22			; This is the actual 32-bit portion of the bcopy and shuffle and boot
23			; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
24			; sole exception being the actual relocation code at the beginning of
25			; pm_shuffle_boot.
26			;
27			; It also really needs to live all in a single segment, for the
28			; address calculcations to actually work.
29			;
30
31			bits 32
32			section .bcopyxx
33			align 16
34			bcopyxx_start equ $
35			;
36			; pm_bcopy:
37			;
38			; This is the protected-mode core of the "bcopy" routine.
39			; Try to do aligned transfers; if the src and dst are relatively
40			; misaligned, align the dst.
41			;
42			; ECX is guaranteed to not be zero on entry.
43			;
44			; Clobbers ESI, EDI, ECX.
45			;
46
47			pm_bcopy:
48			push ebx
49			push edx
50			push eax
51
52			cmp esi,-1
53			je .bzero
54
55			cmp esi,edi ; If source < destination, we might
56			jb .reverse ; have to copy backwards
57
58			.forward:
59			; Initial alignment
60			mov edx,edi
61			shr edx,1
62			jnc .faa1
63			movsb
64			dec ecx
65			.faa1:
66			mov al,cl
67			cmp ecx,2
68			jb .f_tiny
69
70			shr edx,1
71			jnc .faa2
72			movsw
73			sub ecx,2
74			.faa2:
75
76			; Bulk transfer
77			mov al,cl ; Save low bits
78			shr ecx,2 ; Convert to dwords
79			rep movsd ; Do our business
80			; At this point ecx == 0
81
82			test al,2
83			jz .fab2
84			movsw
85			.fab2:
86			.f_tiny:
87			test al,1
88			jz .fab1
89			movsb
90			.fab1:
91			.done:
92			pop eax
93			pop edx
94			pop ebx
95			ret
96
97			.reverse:
98			std ; Reverse copy
99
100			lea esi,[esi+ecx-1] ; Point to final byte
101			lea edi,[edi+ecx-1]
102
103			; Initial alignment
104			mov edx,edi
105			shr edx,1
106			jc .raa1
107			movsb
108			dec ecx
109			.raa1:
110
111			dec esi
112			dec edi
113			mov al,cl
114			cmp ecx,2
115			jb .r_tiny
116			shr edx,1
117			jc .raa2
118			movsw
119			sub ecx,2
120			.raa2:
121
122			; Bulk copy
123			sub esi,2
124			sub edi,2
125			mov al,cl ; Save low bits
126			shr ecx,2
127			rep movsd
128
129			; Final alignment
130			.r_final:
131			add esi,2
132			add edi,2
133			test al,2
134			jz .rab2
135			movsw
136			.rab2:
137			.r_tiny:
138			inc esi
139			inc edi
140			test al,1
141			jz .rab1
142			movsb
143			.rab1:
144			cld
145			jmp short .done
146
147			.bzero:
148			xor eax,eax
149
150			; Initial alignment
151			mov edx,edi
152			shr edx,1
153			jnc .zaa1
154			stosb
155			dec ecx
156			.zaa1:
157
158			mov bl,cl
159			cmp ecx,2
160			jb .z_tiny
161			shr edx,1
162			jnc .zaa2
163			stosw
164			sub ecx,2
165			.zaa2:
166
167			; Bulk
168			mov bl,cl ; Save low bits
169			shr ecx,2
170			rep stosd
171
172			test bl,2
173			jz .zab2
174			stosw
175			.zab2:
176			.z_tiny:
177			test bl,1
178			jz .zab1
179			stosb
180			.zab1:
181			jmp short .done
182
183			;
184			; shuffle_and_boot:
185			;
186			; This routine is used to shuffle memory around, followed by
187			; invoking an entry point somewhere in low memory. This routine
188			; can clobber any memory outside the bcopy special area.
189			;
190			; IMPORTANT: This routine does not set up any registers.
191			; It is the responsibility of the caller to generate an appropriate entry
192			; stub; especially when going to real mode.
193			;
194			; Inputs:
195			; ESI -> Pointer to list of (dst, src, len) pairs(*)
196			; EDI -> Pointer to safe area for list + shuffler
197			; (must not overlap this code nor the RM stack)
198			; ECX -> Byte count of list area (for initial copy)
199			;
200			; If src == -1: then the memory pointed to by (dst, len) is bzeroed;
201			; this is handled inside the bcopy routine.
202			;
203			; If len == 0: this marks the end of the list; dst indicates
204			; the entry point and src the mode (0 = pm, 1 = rm)
205			;
206			pm_shuffle:
207			mov ebx,edi ; EBX <- descriptor list
208			lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to
209			and edx,~15 ; Align 16 to benefit the GDT
210			call pm_bcopy
211			mov edi,edx
212			mov esi,bcopyxx_start
213			mov ecx,bcopyxx_dwords
214			lea eax,[edx+.safe-bcopyxx_start] ; Resume point
215			; Relocate this code
216			rep movsd
217			jmp eax ; Jump to safe location
218			.safe:
219			; Give ourselves a safe stack
220			lea esp,[edx+bcopyxx_stack+bcopyxx_end-bcopyxx_start]
221			add edx,bcopy_gdt-bcopyxx_start
222			mov [edx+2],edx ; GDT self-pointer
223			lgdt [edx] ; Switch to local GDT
224
225			; Now for the actual shuffling...
226			.loop:
227			mov edi,[ebx]
228			mov esi,[ebx+4]
229			mov ecx,[ebx+8]
230			add ebx,12
231			jecxz .done
232			call pm_bcopy
233			jmp .loop
234			.done:
235			push ecx ; == 0, for cleaning the flags register
236			and esi,esi
237			jz pm_shuffle_real_mode
238			popfd ; Clean the flags
239			jmp edi ; Protected mode entry
240
241			; We have a real-mode entry point, so we need to return
242			; to real mode. Note: EDX already points to the GDT.
243			pm_shuffle_real_mode:
244			mov eax,edi
245			mov [edx+PM_CS16+2],ax
246			mov [edx+PM_DS16+2],ax
247			shr eax,16
248			mov [edx+PM_CS16+4],al
249			mov [edx+PM_CS16+7],ah
250			mov [edx+PM_DS16+4],al
251			mov [edx+PM_DS16+7],ah
252			mov eax,cr0
253			and al,~1
254			popfd ; Clean the flags
255			; No flag-changing instructions below...
256			mov dx,PM_DS16
257			mov ds,edx
258			mov es,edx
259			mov fs,edx
260			mov gs,edx
261			mov ss,edx
262			jmp PM_CS16:0
263
264			align 16
265			; GDT descriptor entry
266			%macro desc 1
267			bcopy_gdt.%1:
268			PM_%1 equ bcopy_gdt.%1-bcopy_gdt
269			%endmacro
270
271			bcopy_gdt:
272			dw bcopy_gdt_size-1 ; Null descriptor - contains GDT
273			dd bcopy_gdt ; pointer for LGDT instruction
274			dw 0
275
276			; TSS segment to keep Intel VT happy. Intel VT is
277			; unhappy about anything that doesn't smell like a
278			; full-blown 32-bit OS.
279			desc TSS
280			dw 104-1, DummyTSS ; 08h 32-bit task state segment
281			dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS
282
283			desc CS16
284			dd 0000ffffh ; 10h Code segment, use16, readable,
285			dd 00009b00h ; present, dpl 0, cover 64K
286			desc DS16
287			dd 0000ffffh ; 18h Data segment, use16, read/write,
288			dd 00009300h ; present, dpl 0, cover 64K
289			desc CS32
290			dd 0000ffffh ; 20h Code segment, use32, readable,
291			dd 00cf9b00h ; present, dpl 0, cover all 4G
292			desc DS32
293			dd 0000ffffh ; 28h Data segment, use32, read/write,
294			dd 00cf9300h ; present, dpl 0, cover all 4G
295
296			bcopy_gdt_size: equ $-bcopy_gdt
297
298			alignz 4
299			bcopyxx_end equ $ ; Must be dword-aligned!
300			bcopyxx_len equ $-bcopyxx_start
301			bcopyxx_dwords equ bcopyxx_len >> 2
302
303			bcopyxx_stack equ 128 ; We want this much stack
304			; The +15 is for alignment
305			bcopyxx_safe equ bcopyxx_len + bcopyxx_stack + 15
306
307			;
308			; Space for a dummy task state segment. It should never be actually
309			; accessed, but just in case it is, point to a chunk of memory that
310			; has a chance to not be used for anything real...
311			;
312			DummyTSS equ 0x580
313
314			bits 16
315			section .text