reorganize files

author: YamaArashi <shadow962@live.com> 2016-04-27 00:00:40 -0700
committer: YamaArashi <shadow962@live.com> 2016-04-27 00:00:40 -0700
commit: 9dc75fe3b4be91d6066c8e870eacec954117cc08 (patch)
tree: 0ff095d5dd600f23a98952682edf5226f40f0391 /libgcc/lib1thumb.asm
parent: 9e5f6a79618cb7df0b7d3816472b530c3b7d9c1a (diff)
1 files changed, 736 insertions, 0 deletions
diff --git a/libgcc/lib1thumb.asm b/libgcc/lib1thumb.asm
new file mode 100755
index 0000000..e0ff746
--- /dev/null
+++ b/libgcc/lib1thumb.asm
@@ -0,0 +1,736 @@
+@ libgcc1 routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.  */
+
+	.code	 16
+
+#ifdef __elf__
+#define __PLT__ (PLT)
+#define TYPE(x) .type SYM(x),function
+#define SIZE(x) .size SYM(x), . - SYM(x)
+#else
+#define __PLT__
+#define TYPE(x)
+#define SIZE(x)
+#endif
+
+#define RET	mov	pc, lr
+
+#define SYM(x) x
+
+work		.req	r4	@ XXXX is this safe ?
+
+#ifdef L_udivsi3
+
+dividend	.req	r0
+divisor		.req	r1
+result		.req	r2
+curbit		.req	r3
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+	
+	.text
+	.globl	SYM (__udivsi3)
+	TYPE 	(__udivsi3)
+	.align	0
+	.thumb_func
+SYM (__udivsi3):
+	cmp	divisor, #0
+	beq	Ldiv0
+	mov	curbit, #1
+	mov	result, #0
+	
+	push	{ work }
+	cmp	dividend, divisor
+	bcc	Lgot_result
+
+	@ Load the constant 0x10000000 into our work register
+	mov	work, #1
+	lsl	work, #28
+Loop1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, work
+	bcs     Lbignum
+	cmp	divisor, dividend
+	bcs     Lbignum
+	lsl	divisor, #4
+	lsl	curbit,  #4
+	b	Loop1
+
+Lbignum:
+	@ Set work to 0x80000000
+	lsl	work, #3
+Loop2:		
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, work
+	bcs	Loop3
+	cmp	divisor, dividend
+	bcs	Loop3
+	lsl	divisor, #1
+	lsl	curbit,  #1
+	b	Loop2
+
+Loop3:
+	@ Test for possible subtractions, and note which bits
+	@ are done in the result.  On the final pass, this may subtract
+	@ too much from the dividend, but the result will be ok, since the
+	@ "bit" will have been shifted out at the bottom.
+	cmp	dividend, divisor
+	bcc     Over1
+	sub	dividend, dividend, divisor
+	orr	result, result, curbit
+Over1:	
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	bcc	Over2
+	sub	dividend, dividend, work
+	lsr	work, curbit, #1
+	orr	result, work
+Over2:	
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	bcc	Over3
+	sub	dividend, dividend, work
+	lsr	work, curbit, #2
+	orr	result, work
+Over3:	
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	bcc	Over4
+	sub	dividend, dividend, work
+	lsr	work, curbit, #3
+	orr	result, work
+Over4:	
+	cmp	dividend, #0			@ Early termination?
+	beq	Lgot_result
+	lsr	curbit,  #4			@ No, any more bits to do?
+	beq	Lgot_result
+	lsr	divisor, #4
+	b	Loop3
+Lgot_result:
+	mov	r0, result
+	pop	{ work }
+	RET
+
+Ldiv0:
+	push	{ lr }
+	bl	SYM (__div0) __PLT__
+	mov	r0, #0			@ about as wrong as it could be
+	pop	{ pc }
+
+	SIZE	(__udivsi3)
+	
+#endif /* L_udivsi3 */
+
+#ifdef L_umodsi3
+
+dividend	.req	r0
+divisor		.req	r1
+overdone	.req	r2
+curbit		.req	r3
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+	
+	.text
+	.globl	SYM (__umodsi3)
+	TYPE	(__umodsi3)
+	.align	0
+	.thumb_func
+SYM (__umodsi3):
+	cmp	divisor, #0
+	beq	Ldiv0
+	mov	curbit, #1
+	cmp	dividend, divisor
+	bcs	Over1
+	RET	
+
+Over1:	
+	@ Load the constant 0x10000000 into our work register
+	push	{ work }
+	mov	work, #1
+	lsl	work, #28
+Loop1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, work
+	bcs	Lbignum
+	cmp	divisor, dividend
+	bcs	Lbignum
+	lsl	divisor, #4
+	lsl	curbit, #4
+	b	Loop1
+
+Lbignum:
+	@ Set work to 0x80000000
+	lsl	work, #3
+Loop2:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, work
+	bcs	Loop3
+	cmp	divisor, dividend
+	bcs	Loop3
+	lsl	divisor, #1
+	lsl	curbit, #1
+	b	Loop2
+
+Loop3:
+	@ Test for possible subtractions.  On the final pass, this may 
+	@ subtract too much from the dividend, so keep track of which
+	@ subtractions are done, we can fix them up afterwards...
+	mov	overdone, #0
+	cmp	dividend, divisor
+	bcc	Over2
+	sub	dividend, dividend, divisor
+Over2:
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	bcc	Over3
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #1
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+Over3:
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	bcc	Over4
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #2
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+Over4:
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	bcc	Over5
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #3
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+Over5:
+	mov	ip, curbit
+	cmp	dividend, #0			@ Early termination?
+	beq	Over6
+	lsr	curbit, #4			@ No, any more bits to do?
+	beq	Over6
+	lsr	divisor, #4
+	b	Loop3
+
+Over6:	
+	@ Any subtractions that we should not have done will be recorded in
+	@ the top three bits of "overdone".  Exactly which were not needed
+	@ are governed by the position of the bit, stored in ip.
+	@ If we terminated early, because dividend became zero,
+	@ then none of the below will match, since the bit in ip will not be
+	@ in the bottom nibble.
+
+	mov	work, #0xe
+	lsl	work, #28	
+	and	overdone, work
+	bne	Over7
+	pop	{ work }
+	RET					@ No fixups needed
+Over7:
+	mov	curbit, ip
+	mov	work, #3
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	Over8
+	lsr	work, divisor, #3
+	add	dividend, dividend, work
+Over8:
+	mov	curbit, ip
+	mov	work, #2
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	Over9
+	lsr	work, divisor, #2
+	add	dividend, dividend, work
+Over9:
+	mov	curbit, ip
+	mov	work, #1
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	Over10
+	lsr	work, divisor, #1
+	add	dividend, dividend, work
+Over10:
+	pop	{ work }
+	RET	
+
+Ldiv0:
+	push	{ lr }
+	bl	SYM (__div0) __PLT__
+	mov	r0, #0			@ about as wrong as it could be
+	pop	{ pc }
+
+	SIZE	(__umodsi3)
+	
+#endif /* L_umodsi3 */
+
+#ifdef L_divsi3
+
+dividend	.req	r0
+divisor		.req	r1
+result		.req	r2
+curbit		.req	r3
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+	
+	.text
+	.globl	SYM (__divsi3)
+	TYPE	(__divsi3)
+	.align	0
+	.thumb_func
+SYM (__divsi3):
+	cmp	divisor, #0
+	beq	Ldiv0
+	
+	push	{ work }
+	mov	work, dividend
+	eor	work, divisor		@ Save the sign of the result.
+	mov	ip, work
+	mov	curbit, #1
+	mov	result, #0
+	cmp	divisor, #0
+	bpl	Over1
+	neg	divisor, divisor	@ Loops below use unsigned.
+Over1:	
+	cmp	dividend, #0
+	bpl	Over2
+	neg	dividend, dividend
+Over2:	
+	cmp	dividend, divisor
+	bcc	Lgot_result
+
+	mov	work, #1
+	lsl	work, #28
+Loop1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, work
+	Bcs	Lbignum
+	cmp	divisor, dividend
+	Bcs	Lbignum
+	lsl	divisor, #4
+	lsl	curbit, #4
+	b	Loop1
+
+Lbignum:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	lsl	work, #3
+Loop2:		
+	cmp	divisor, work
+	Bcs	Loop3
+	cmp	divisor, dividend
+	Bcs	Loop3
+	lsl	divisor, #1
+	lsl	curbit, #1
+	b	Loop2
+
+Loop3:
+	@ Test for possible subtractions, and note which bits
+	@ are done in the result.  On the final pass, this may subtract
+	@ too much from the dividend, but the result will be ok, since the
+	@ "bit" will have been shifted out at the bottom.
+	cmp	dividend, divisor
+	Bcc	Over3
+	sub	dividend, dividend, divisor
+	orr	result, result, curbit
+Over3:
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	Bcc	Over4
+	sub	dividend, dividend, work
+	lsr	work, curbit, #1
+	orr	result, work
+Over4:	
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	Bcc	Over5
+	sub	dividend, dividend, work
+	lsr	work, curbit, #2
+	orr	result, result, work
+Over5:	
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	Bcc	Over6
+	sub	dividend, dividend, work
+	lsr	work, curbit, #3
+	orr	result, result, work
+Over6:	
+	cmp	dividend, #0			@ Early termination?
+	Beq	Lgot_result
+	lsr	curbit, #4			@ No, any more bits to do?
+	Beq	Lgot_result
+	lsr	divisor, #4
+	b	Loop3
+	
+Lgot_result:
+	mov	r0, result
+	mov	work, ip
+	cmp	work, #0
+	Bpl	Over7
+	neg	r0, r0
+Over7:
+	pop	{ work }
+	RET	
+
+Ldiv0:
+	push	{ lr }
+	bl	SYM (__div0) __PLT__
+	mov	r0, #0			@ about as wrong as it could be
+	pop	{ pc }
+
+	SIZE	(__divsi3)
+	
+#endif /* L_divsi3 */
+
+#ifdef L_modsi3
+
+dividend	.req	r0
+divisor		.req	r1
+overdone	.req	r2
+curbit		.req	r3
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+	
+	.text
+	.globl	SYM (__modsi3)
+	TYPE	(__modsi3)
+	.align	0
+	.thumb_func
+SYM (__modsi3):
+	mov	curbit, #1
+	cmp	divisor, #0
+	beq	Ldiv0
+	Bpl	Over1
+	neg	divisor, divisor		@ Loops below use unsigned.
+Over1:	
+	push	{ work }
+	@ Need to save the sign of the dividend, unfortunately, we need
+	@ ip later on.  Must do this after saving the original value of
+	@ the work register, because we will pop this value off first.
+	push	{ dividend }
+	cmp	dividend, #0
+	Bpl	Over2
+	neg	dividend, dividend
+Over2:	
+	cmp	dividend, divisor
+	bcc	Lgot_result
+	mov	work, #1
+	lsl	work, #28
+Loop1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, work
+	bcs	Lbignum
+	cmp	divisor, dividend
+	bcs	Lbignum
+	lsl	divisor, #4
+	lsl	curbit, #4
+	b	Loop1
+
+Lbignum:
+	@ Set work to 0x80000000
+	lsl	work, #3
+Loop2:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, work
+	bcs	Loop3
+	cmp	divisor, dividend
+	bcs	Loop3
+	lsl	divisor, #1
+	lsl	curbit, #1
+	b	Loop2
+
+Loop3:
+	@ Test for possible subtractions.  On the final pass, this may 
+	@ subtract too much from the dividend, so keep track of which
+	@ subtractions are done, we can fix them up afterwards...
+	mov	overdone, #0
+	cmp	dividend, divisor
+	bcc	Over3
+	sub	dividend, dividend, divisor
+Over3:
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	bcc	Over4
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #1
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+Over4:
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	bcc	Over5
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #2
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+Over5:
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	bcc	Over6
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #3
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+Over6:
+	mov	ip, curbit
+	cmp	dividend, #0			@ Early termination?
+	beq	Over7
+	lsr	curbit, #4			@ No, any more bits to do?
+	beq	Over7
+	lsr	divisor, #4
+	b	Loop3
+
+Over7:	
+	@ Any subtractions that we should not have done will be recorded in
+	@ the top three bits of "overdone".  Exactly which were not needed
+	@ are governed by the position of the bit, stored in ip.
+	@ If we terminated early, because dividend became zero,
+	@ then none of the below will match, since the bit in ip will not be
+	@ in the bottom nibble.
+	mov	work, #0xe
+	lsl	work, #28
+	and	overdone, work
+	beq	Lgot_result
+	
+	mov	curbit, ip
+	mov	work, #3
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	Over8
+	lsr	work, divisor, #3
+	add	dividend, dividend, work
+Over8:
+	mov	curbit, ip
+	mov	work, #2
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	Over9
+	lsr	work, divisor, #2
+	add	dividend, dividend, work
+Over9:
+	mov	curbit, ip
+	mov	work, #1
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	Lgot_result
+	lsr	work, divisor, #1
+	add	dividend, dividend, work
+Lgot_result:
+	pop	{ work }
+	cmp	work, #0
+	bpl	Over10
+	neg	dividend, dividend
+Over10:
+	pop	{ work }
+	RET	
+
+Ldiv0:
+	push    { lr }
+	bl	SYM (__div0) __PLT__
+	mov	r0, #0			@ about as wrong as it could be
+	pop	{ pc }
+	
+	SIZE	(__modsi3)
+		
+#endif /* L_modsi3 */
+
+#ifdef L_dvmd_tls
+
+	.globl	SYM (__div0)
+	TYPE	(__div0)
+	.align	0
+	.thumb_func
+SYM (__div0):
+	RET	
+
+	SIZE	(__div0)
+	
+#endif /* L_divmodsi_tools */
+
+	
+#ifdef L_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code. 
+   The address of function to be called is loaded into a register and then 
+   one of these labels is called via a BL instruction.  This puts the 
+   return address into the link register with the bottom bit set, and the 
+   code here switches to the correct mode before executing the function.  */
+	
+	.text
+	.align 0
+
+.macro call_via register
+	.globl	SYM (_call_via_\register)
+	TYPE	(_call_via_\register)
+	.thumb_func
+SYM (_call_via_\register):
+	bx	\register
+	nop
+	
+	SIZE	(_call_via_\register)
+.endm
+
+	call_via r0
+	call_via r1
+	call_via r2
+	call_via r3
+	call_via r4
+	call_via r5
+	call_via r6
+	call_via r7
+	call_via r8
+	call_via r9
+	call_via sl
+	call_via fp
+	call_via ip
+	call_via sp
+	call_via lr
+
+#endif /* L_call_via_rX */
+
+#ifdef L_interwork_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code,
+   when the target address is in an unknown instruction set.  The address 
+   of function to be called is loaded into a register and then one of these
+   labels is called via a BL instruction.  This puts the return address 
+   into the link register with the bottom bit set, and the code here 
+   switches to the correct mode before executing the function.  Unfortunately
+   the target code cannot be relied upon to return via a BX instruction, so
+   instead we have to store the resturn address on the stack and allow the
+   called function to return here instead.  Upon return we recover the real
+   return address and use a BX to get back to Thumb mode.  */
+	
+	.text
+	.align 0
+
+	.code 32
+	.globl _arm_return
+_arm_return:		
+	ldmia 	r13!, {r12}
+	bx 	r12
+	
+.macro interwork register					
+	.code 16
+	
+	.globl	SYM (_interwork_call_via_\register)
+	TYPE	(_interwork_call_via_\register)
+	.thumb_func
+SYM (_interwork_call_via_\register):
+	bx 	pc
+	nop
+	
+	.code 32
+	.globl .Lchange_\register
+.Lchange_\register:
+	tst	\register, #1
+	stmeqdb	r13!, {lr}
+	adreq	lr, _arm_return
+	bx	\register
+
+	SIZE	(_interwork_call_via_\register)
+.endm
+	
+	interwork r0
+	interwork r1
+	interwork r2
+	interwork r3
+	interwork r4
+	interwork r5
+	interwork r6
+	interwork r7
+	interwork r8
+	interwork r9
+	interwork sl
+	interwork fp
+	interwork ip
+	interwork sp
+
+	/* The lr case has to be handled a little differently...*/
+	.code 16
+	.globl	SYM (_interwork_call_via_lr)
+	TYPE	(_interwork_call_via_lr)
+	.thumb_func
+SYM (_interwork_call_via_lr):
+	bx 	pc
+	nop
+	
+	.code 32
+	.globl .Lchange_lr
+.Lchange_lr:
+	tst	lr, #1
+	stmeqdb	r13!, {lr}
+	mov	ip, lr
+	adreq	lr, _arm_return
+	bx	ip
+
+	SIZE	(_interwork_call_via_lr)
+	
+#endif /* L_interwork_call_via_rX */
author	YamaArashi <shadow962@live.com>	2016-04-27 00:00:40 -0700
committer	YamaArashi <shadow962@live.com>	2016-04-27 00:00:40 -0700
commit	9dc75fe3b4be91d6066c8e870eacec954117cc08 (patch)
tree	0ff095d5dd600f23a98952682edf5226f40f0391 /libgcc/lib1thumb.asm
parent	9e5f6a79618cb7df0b7d3816472b530c3b7d9c1a (diff)