mkinitrd-magellan/isolinux/bcopyxx.inc

;; -----------------------------------------------------------------------
;;
;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;;   Copyright 2009 Intel Corporation; author: H. Peter Anvin
;;
;;   This program is free software; you can redistribute it and/or modify
;;   it under the terms of the GNU General Public License as published by
;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;;   Boston MA 02111-1307, USA; either version 2 of the License, or
;;   (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------

;;
;; bcopy32xx.inc
;;


;
; 32-bit bcopy routine
;
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; routines.  ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
;
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
;

		bits 32
		section .bcopyxx
		align 16
bcopyxx_start	equ $
;
; pm_bcopy:
;
;	This is the protected-mode core of the "bcopy" routine.
;	Try to do aligned transfers; if the src and dst are relatively
;	misaligned, align the dst.
;
;	ECX is guaranteed to not be zero on entry.
;
;	Clobbers ESI, EDI, ECX.
;

pm_bcopy:
		push ebx
		push edx
		push eax

		cmp esi,-1
		je .bzero

		cmp esi,edi		; If source < destination, we might
		jb .reverse		; have to copy backwards

.forward:
		; Initial alignment
		mov edx,edi
		shr edx,1
		jnc .faa1
		movsb
		dec ecx
.faa1:
		mov al,cl
		cmp ecx,2
		jb .f_tiny

		shr edx,1
		jnc .faa2
		movsw
		sub ecx,2
.faa2:

		; Bulk transfer
		mov al,cl		; Save low bits
		shr ecx,2		; Convert to dwords
		rep movsd		; Do our business
		; At this point ecx == 0

		test al,2
		jz .fab2
		movsw
.fab2:
.f_tiny:
		test al,1
		jz .fab1
		movsb
.fab1:
.done:
		pop eax
		pop edx
		pop ebx
		ret

.reverse:
		std			; Reverse copy

		lea esi,[esi+ecx-1]	; Point to final byte
		lea edi,[edi+ecx-1]

		; Initial alignment
		mov edx,edi
		shr edx,1
		jc .raa1
		movsb
		dec ecx
.raa1:

		dec esi
		dec edi
		mov al,cl
		cmp ecx,2
		jb .r_tiny
		shr edx,1
		jc .raa2
		movsw
		sub ecx,2
.raa2:

		; Bulk copy
		sub esi,2
		sub edi,2
		mov al,cl		; Save low bits
		shr ecx,2
		rep movsd

		; Final alignment
.r_final:
		add esi,2
		add edi,2
		test al,2
		jz .rab2
		movsw
.rab2:
.r_tiny:
		inc esi
		inc edi
		test al,1
		jz .rab1
		movsb
.rab1:
		cld
		jmp short .done

.bzero:
		xor eax,eax

		; Initial alignment
		mov edx,edi
		shr edx,1
		jnc .zaa1
		stosb
		dec ecx
.zaa1:

		mov bl,cl
		cmp ecx,2
		jb .z_tiny
		shr edx,1
		jnc .zaa2
		stosw
		sub ecx,2
.zaa2:

		; Bulk
		mov bl,cl		; Save low bits
		shr ecx,2
		rep stosd

		test bl,2
		jz .zab2
		stosw
.zab2:
.z_tiny:
		test bl,1
		jz .zab1
		stosb
.zab1:
		jmp short .done

;
; shuffle_and_boot:
;
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory.  This routine
; can clobber any memory outside the bcopy special area.
;
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
;
; Inputs:
;	ESI		-> Pointer to list of (dst, src, len) pairs(*)
;	EDI		-> Pointer to safe area for list + shuffler
;			   (must not overlap this code nor the RM stack)
;	ECX		-> Byte count of list area (for initial copy)
;
;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
;		    this is handled inside the bcopy routine.
;
;     If len == 0:  this marks the end of the list; dst indicates
;		    the entry point and src the mode (0 = pm, 1 = rm)
;
pm_shuffle:
		mov ebx,edi		; EBX <- descriptor list
		lea edx,[edi+ecx+15]	; EDX <- where to relocate our code to
		and edx,~15		; Align 16 to benefit the GDT
		call pm_bcopy
		mov edi,edx
		mov esi,bcopyxx_start
		mov ecx,bcopyxx_dwords
		lea eax,[edx+.safe-bcopyxx_start]	; Resume point
		; Relocate this code
		rep movsd
		jmp eax			; Jump to safe location
.safe:
		; Give ourselves a safe stack
		lea esp,[edx+bcopyxx_stack+bcopyxx_end-bcopyxx_start]
		add edx,bcopy_gdt-bcopyxx_start
		mov [edx+2],edx		; GDT self-pointer
		lgdt [edx]		; Switch to local GDT

		; Now for the actual shuffling...
.loop:
		mov edi,[ebx]
		mov esi,[ebx+4]
		mov ecx,[ebx+8]
		add ebx,12
		jecxz .done
		call pm_bcopy
		jmp .loop
.done:
		push ecx		; == 0, for cleaning the flags register
		and esi,esi
		jz pm_shuffle_real_mode
		popfd			; Clean the flags
		jmp edi			; Protected mode entry

		; We have a real-mode entry point, so we need to return
		; to real mode.  Note: EDX already points to the GDT.
pm_shuffle_real_mode:
		mov eax,edi
		mov [edx+PM_CS16+2],ax
		mov [edx+PM_DS16+2],ax
		shr eax,16
		mov [edx+PM_CS16+4],al
		mov [edx+PM_CS16+7],ah
		mov [edx+PM_DS16+4],al
		mov [edx+PM_DS16+7],ah
		mov eax,cr0
		and al,~1
		popfd			; Clean the flags
		; No flag-changing instructions below...
		mov dx,PM_DS16
		mov ds,edx
		mov es,edx
		mov fs,edx
		mov gs,edx
		mov ss,edx
		jmp PM_CS16:0

		align	16
; GDT descriptor entry
%macro desc 1
bcopy_gdt.%1:
PM_%1		equ bcopy_gdt.%1-bcopy_gdt
%endmacro

bcopy_gdt:
		dw bcopy_gdt_size-1	; Null descriptor - contains GDT
		dd bcopy_gdt		; pointer for LGDT instruction
		dw 0

		; TSS segment to keep Intel VT happy.  Intel VT is
		; unhappy about anything that doesn't smell like a
		; full-blown 32-bit OS.
	desc TSS
		dw 104-1, DummyTSS	; 08h 32-bit task state segment
		dd 00008900h		; present, dpl 0, 104 bytes @DummyTSS

	desc CS16
		dd 0000ffffh		; 10h Code segment, use16, readable,
		dd 00009b00h		; present, dpl 0, cover 64K
	desc DS16
		dd 0000ffffh		; 18h Data segment, use16, read/write,
		dd 00009300h		; present, dpl 0, cover 64K
	desc CS32
		dd 0000ffffh		; 20h Code segment, use32, readable,
		dd 00cf9b00h		; present, dpl 0, cover all 4G
	desc DS32
		dd 0000ffffh		; 28h Data segment, use32, read/write,
		dd 00cf9300h		; present, dpl 0, cover all 4G

bcopy_gdt_size:	equ $-bcopy_gdt

		alignz 4
bcopyxx_end	equ $			; *Must* be dword-aligned!
bcopyxx_len	equ $-bcopyxx_start
bcopyxx_dwords	equ bcopyxx_len >> 2

bcopyxx_stack	equ 128			; We want this much stack
		; The +15 is for alignment
bcopyxx_safe	equ bcopyxx_len + bcopyxx_stack + 15

;
; Space for a dummy task state segment.  It should never be actually
; accessed, but just in case it is, point to a chunk of memory that
; has a chance to not be used for anything real...
;
DummyTSS	equ 0x580

		bits 16
		section .text
1	;; -----------------------------------------------------------------------
2	;;
3	;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
4	;; Copyright 2009 Intel Corporation; author: H. Peter Anvin
5	;;
6	;; This program is free software; you can redistribute it and/or modify
7	;; it under the terms of the GNU General Public License as published by
8	;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
9	;; Boston MA 02111-1307, USA; either version 2 of the License, or
10	;; (at your option) any later version; incorporated herein by reference.
11	;;
12	;; -----------------------------------------------------------------------
13
14	;;
15	;; bcopy32xx.inc
16	;;
17
18
19	;
20	; 32-bit bcopy routine
21	;
22	; This is the actual 32-bit portion of the bcopy and shuffle and boot
23	; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
24	; sole exception being the actual relocation code at the beginning of
25	; pm_shuffle_boot.
26	;
27	; It also really needs to live all in a single segment, for the
28	; address calculcations to actually work.
29	;
30
31	bits 32
32	section .bcopyxx
33	align 16
34	bcopyxx_start equ $
35	;
36	; pm_bcopy:
37	;
38	; This is the protected-mode core of the "bcopy" routine.
39	; Try to do aligned transfers; if the src and dst are relatively
40	; misaligned, align the dst.
41	;
42	; ECX is guaranteed to not be zero on entry.
43	;
44	; Clobbers ESI, EDI, ECX.
45	;
46
47	pm_bcopy:
48	push ebx
49	push edx
50	push eax
51
52	cmp esi,-1
53	je .bzero
54
55	cmp esi,edi ; If source < destination, we might
56	jb .reverse ; have to copy backwards
57
58	.forward:
59	; Initial alignment
60	mov edx,edi
61	shr edx,1
62	jnc .faa1
63	movsb
64	dec ecx
65	.faa1:
66	mov al,cl
67	cmp ecx,2
68	jb .f_tiny
69
70	shr edx,1
71	jnc .faa2
72	movsw
73	sub ecx,2
74	.faa2:
75
76	; Bulk transfer
77	mov al,cl ; Save low bits
78	shr ecx,2 ; Convert to dwords
79	rep movsd ; Do our business
80	; At this point ecx == 0
81
82	test al,2
83	jz .fab2
84	movsw
85	.fab2:
86	.f_tiny:
87	test al,1
88	jz .fab1
89	movsb
90	.fab1:
91	.done:
92	pop eax
93	pop edx
94	pop ebx
95	ret
96
97	.reverse:
98	std ; Reverse copy
99
100	lea esi,[esi+ecx-1] ; Point to final byte
101	lea edi,[edi+ecx-1]
102
103	; Initial alignment
104	mov edx,edi
105	shr edx,1
106	jc .raa1
107	movsb
108	dec ecx
109	.raa1:
110
111	dec esi
112	dec edi
113	mov al,cl
114	cmp ecx,2
115	jb .r_tiny
116	shr edx,1
117	jc .raa2
118	movsw
119	sub ecx,2
120	.raa2:
121
122	; Bulk copy
123	sub esi,2
124	sub edi,2
125	mov al,cl ; Save low bits
126	shr ecx,2
127	rep movsd
128
129	; Final alignment
130	.r_final:
131	add esi,2
132	add edi,2
133	test al,2
134	jz .rab2
135	movsw
136	.rab2:
137	.r_tiny:
138	inc esi
139	inc edi
140	test al,1
141	jz .rab1
142	movsb
143	.rab1:
144	cld
145	jmp short .done
146
147	.bzero:
148	xor eax,eax
149
150	; Initial alignment
151	mov edx,edi
152	shr edx,1
153	jnc .zaa1
154	stosb
155	dec ecx
156	.zaa1:
157
158	mov bl,cl
159	cmp ecx,2
160	jb .z_tiny
161	shr edx,1
162	jnc .zaa2
163	stosw
164	sub ecx,2
165	.zaa2:
166
167	; Bulk
168	mov bl,cl ; Save low bits
169	shr ecx,2
170	rep stosd
171
172	test bl,2
173	jz .zab2
174	stosw
175	.zab2:
176	.z_tiny:
177	test bl,1
178	jz .zab1
179	stosb
180	.zab1:
181	jmp short .done
182
183	;
184	; shuffle_and_boot:
185	;
186	; This routine is used to shuffle memory around, followed by
187	; invoking an entry point somewhere in low memory. This routine
188	; can clobber any memory outside the bcopy special area.
189	;
190	; IMPORTANT: This routine does not set up any registers.
191	; It is the responsibility of the caller to generate an appropriate entry
192	; stub; especially when going to real mode.
193	;
194	; Inputs:
195	; ESI -> Pointer to list of (dst, src, len) pairs(*)
196	; EDI -> Pointer to safe area for list + shuffler
197	; (must not overlap this code nor the RM stack)
198	; ECX -> Byte count of list area (for initial copy)
199	;
200	; If src == -1: then the memory pointed to by (dst, len) is bzeroed;
201	; this is handled inside the bcopy routine.
202	;
203	; If len == 0: this marks the end of the list; dst indicates
204	; the entry point and src the mode (0 = pm, 1 = rm)
205	;
206	pm_shuffle:
207	mov ebx,edi ; EBX <- descriptor list
208	lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to
209	and edx,~15 ; Align 16 to benefit the GDT
210	call pm_bcopy
211	mov edi,edx
212	mov esi,bcopyxx_start
213	mov ecx,bcopyxx_dwords
214	lea eax,[edx+.safe-bcopyxx_start] ; Resume point
215	; Relocate this code
216	rep movsd
217	jmp eax ; Jump to safe location
218	.safe:
219	; Give ourselves a safe stack
220	lea esp,[edx+bcopyxx_stack+bcopyxx_end-bcopyxx_start]
221	add edx,bcopy_gdt-bcopyxx_start
222	mov [edx+2],edx ; GDT self-pointer
223	lgdt [edx] ; Switch to local GDT
224
225	; Now for the actual shuffling...
226	.loop:
227	mov edi,[ebx]
228	mov esi,[ebx+4]
229	mov ecx,[ebx+8]
230	add ebx,12
231	jecxz .done
232	call pm_bcopy
233	jmp .loop
234	.done:
235	push ecx ; == 0, for cleaning the flags register
236	and esi,esi
237	jz pm_shuffle_real_mode
238	popfd ; Clean the flags
239	jmp edi ; Protected mode entry
240
241	; We have a real-mode entry point, so we need to return
242	; to real mode. Note: EDX already points to the GDT.
243	pm_shuffle_real_mode:
244	mov eax,edi
245	mov [edx+PM_CS16+2],ax
246	mov [edx+PM_DS16+2],ax
247	shr eax,16
248	mov [edx+PM_CS16+4],al
249	mov [edx+PM_CS16+7],ah
250	mov [edx+PM_DS16+4],al
251	mov [edx+PM_DS16+7],ah
252	mov eax,cr0
253	and al,~1
254	popfd ; Clean the flags
255	; No flag-changing instructions below...
256	mov dx,PM_DS16
257	mov ds,edx
258	mov es,edx
259	mov fs,edx
260	mov gs,edx
261	mov ss,edx
262	jmp PM_CS16:0
263
264	align 16
265	; GDT descriptor entry
266	%macro desc 1
267	bcopy_gdt.%1:
268	PM_%1 equ bcopy_gdt.%1-bcopy_gdt
269	%endmacro
270
271	bcopy_gdt:
272	dw bcopy_gdt_size-1 ; Null descriptor - contains GDT
273	dd bcopy_gdt ; pointer for LGDT instruction
274	dw 0
275
276	; TSS segment to keep Intel VT happy. Intel VT is
277	; unhappy about anything that doesn't smell like a
278	; full-blown 32-bit OS.
279	desc TSS
280	dw 104-1, DummyTSS ; 08h 32-bit task state segment
281	dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS
282
283	desc CS16
284	dd 0000ffffh ; 10h Code segment, use16, readable,
285	dd 00009b00h ; present, dpl 0, cover 64K
286	desc DS16
287	dd 0000ffffh ; 18h Data segment, use16, read/write,
288	dd 00009300h ; present, dpl 0, cover 64K
289	desc CS32
290	dd 0000ffffh ; 20h Code segment, use32, readable,
291	dd 00cf9b00h ; present, dpl 0, cover all 4G
292	desc DS32
293	dd 0000ffffh ; 28h Data segment, use32, read/write,
294	dd 00cf9300h ; present, dpl 0, cover all 4G
295
296	bcopy_gdt_size: equ $-bcopy_gdt
297
298	alignz 4
299	bcopyxx_end equ $ ; Must be dword-aligned!
300	bcopyxx_len equ $-bcopyxx_start
301	bcopyxx_dwords equ bcopyxx_len >> 2
302
303	bcopyxx_stack equ 128 ; We want this much stack
304	; The +15 is for alignment
305	bcopyxx_safe equ bcopyxx_len + bcopyxx_stack + 15
306
307	;
308	; Space for a dummy task state segment. It should never be actually
309	; accessed, but just in case it is, point to a chunk of memory that
310	; has a chance to not be used for anything real...
311	;
312	DummyTSS equ 0x580
313
314	bits 16
315	section .text