summaryrefslogtreecommitdiff
path: root/libgcc/lib1thumb.asm
diff options
context:
space:
mode:
authorYamaArashi <shadow962@live.com>2016-04-27 00:00:40 -0700
committerYamaArashi <shadow962@live.com>2016-04-27 00:00:40 -0700
commit9dc75fe3b4be91d6066c8e870eacec954117cc08 (patch)
tree0ff095d5dd600f23a98952682edf5226f40f0391 /libgcc/lib1thumb.asm
parent9e5f6a79618cb7df0b7d3816472b530c3b7d9c1a (diff)
reorganize files
Diffstat (limited to 'libgcc/lib1thumb.asm')
-rwxr-xr-xlibgcc/lib1thumb.asm736
1 files changed, 736 insertions, 0 deletions
diff --git a/libgcc/lib1thumb.asm b/libgcc/lib1thumb.asm
new file mode 100755
index 0000000..e0ff746
--- /dev/null
+++ b/libgcc/lib1thumb.asm
@@ -0,0 +1,736 @@
+@ libgcc1 routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file. (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you link this library with other files,
+ some of which are compiled with GCC, to produce an executable,
+ this library does not by itself cause the resulting executable
+ to be covered by the GNU General Public License.
+ This exception does not however invalidate any other reasons why
+ the executable file might be covered by the GNU General Public License. */
+
+ .code 16
+
+#ifdef __elf__
+#define __PLT__ (PLT)
+#define TYPE(x) .type SYM(x),function
+#define SIZE(x) .size SYM(x), . - SYM(x)
+#else
+#define __PLT__
+#define TYPE(x)
+#define SIZE(x)
+#endif
+
+#define RET mov pc, lr
+
+#define SYM(x) x
+
+work .req r4 @ XXXX is this safe ?
+
+#ifdef L_udivsi3
+
+dividend .req r0
+divisor .req r1
+result .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+
+ .text
+ .globl SYM (__udivsi3)
+ TYPE (__udivsi3)
+ .align 0
+ .thumb_func
+SYM (__udivsi3):
+ cmp divisor, #0
+ beq Ldiv0
+ mov curbit, #1
+ mov result, #0
+
+ push { work }
+ cmp dividend, divisor
+ bcc Lgot_result
+
+ @ Load the constant 0x10000000 into our work register
+ mov work, #1
+ lsl work, #28
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, work
+ bcs Lbignum
+ cmp divisor, dividend
+ bcs Lbignum
+ lsl divisor, #4
+ lsl curbit, #4
+ b Loop1
+
+Lbignum:
+ @ Set work to 0x80000000
+ lsl work, #3
+Loop2:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, work
+ bcs Loop3
+ cmp divisor, dividend
+ bcs Loop3
+ lsl divisor, #1
+ lsl curbit, #1
+ b Loop2
+
+Loop3:
+ @ Test for possible subtractions, and note which bits
+ @ are done in the result. On the final pass, this may subtract
+ @ too much from the dividend, but the result will be ok, since the
+ @ "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ bcc Over1
+ sub dividend, dividend, divisor
+ orr result, result, curbit
+Over1:
+ lsr work, divisor, #1
+ cmp dividend, work
+ bcc Over2
+ sub dividend, dividend, work
+ lsr work, curbit, #1
+ orr result, work
+Over2:
+ lsr work, divisor, #2
+ cmp dividend, work
+ bcc Over3
+ sub dividend, dividend, work
+ lsr work, curbit, #2
+ orr result, work
+Over3:
+ lsr work, divisor, #3
+ cmp dividend, work
+ bcc Over4
+ sub dividend, dividend, work
+ lsr work, curbit, #3
+ orr result, work
+Over4:
+ cmp dividend, #0 @ Early termination?
+ beq Lgot_result
+ lsr curbit, #4 @ No, any more bits to do?
+ beq Lgot_result
+ lsr divisor, #4
+ b Loop3
+Lgot_result:
+ mov r0, result
+ pop { work }
+ RET
+
+Ldiv0:
+ push { lr }
+ bl SYM (__div0) __PLT__
+ mov r0, #0 @ about as wrong as it could be
+ pop { pc }
+
+ SIZE (__udivsi3)
+
+#endif /* L_udivsi3 */
+
+#ifdef L_umodsi3
+
+dividend .req r0
+divisor .req r1
+overdone .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+
+ .text
+ .globl SYM (__umodsi3)
+ TYPE (__umodsi3)
+ .align 0
+ .thumb_func
+SYM (__umodsi3):
+ cmp divisor, #0
+ beq Ldiv0
+ mov curbit, #1
+ cmp dividend, divisor
+ bcs Over1
+ RET
+
+Over1:
+ @ Load the constant 0x10000000 into our work register
+ push { work }
+ mov work, #1
+ lsl work, #28
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, work
+ bcs Lbignum
+ cmp divisor, dividend
+ bcs Lbignum
+ lsl divisor, #4
+ lsl curbit, #4
+ b Loop1
+
+Lbignum:
+ @ Set work to 0x80000000
+ lsl work, #3
+Loop2:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, work
+ bcs Loop3
+ cmp divisor, dividend
+ bcs Loop3
+ lsl divisor, #1
+ lsl curbit, #1
+ b Loop2
+
+Loop3:
+ @ Test for possible subtractions. On the final pass, this may
+ @ subtract too much from the dividend, so keep track of which
+ @ subtractions are done, we can fix them up afterwards...
+ mov overdone, #0
+ cmp dividend, divisor
+ bcc Over2
+ sub dividend, dividend, divisor
+Over2:
+ lsr work, divisor, #1
+ cmp dividend, work
+ bcc Over3
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #1
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+Over3:
+ lsr work, divisor, #2
+ cmp dividend, work
+ bcc Over4
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #2
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+Over4:
+ lsr work, divisor, #3
+ cmp dividend, work
+ bcc Over5
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #3
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+Over5:
+ mov ip, curbit
+ cmp dividend, #0 @ Early termination?
+ beq Over6
+ lsr curbit, #4 @ No, any more bits to do?
+ beq Over6
+ lsr divisor, #4
+ b Loop3
+
+Over6:
+ @ Any subtractions that we should not have done will be recorded in
+ @ the top three bits of "overdone". Exactly which were not needed
+ @ are governed by the position of the bit, stored in ip.
+ @ If we terminated early, because dividend became zero,
+ @ then none of the below will match, since the bit in ip will not be
+ @ in the bottom nibble.
+
+ mov work, #0xe
+ lsl work, #28
+ and overdone, work
+ bne Over7
+ pop { work }
+ RET @ No fixups needed
+Over7:
+ mov curbit, ip
+ mov work, #3
+ ror curbit, work
+ tst overdone, curbit
+ beq Over8
+ lsr work, divisor, #3
+ add dividend, dividend, work
+Over8:
+ mov curbit, ip
+ mov work, #2
+ ror curbit, work
+ tst overdone, curbit
+ beq Over9
+ lsr work, divisor, #2
+ add dividend, dividend, work
+Over9:
+ mov curbit, ip
+ mov work, #1
+ ror curbit, work
+ tst overdone, curbit
+ beq Over10
+ lsr work, divisor, #1
+ add dividend, dividend, work
+Over10:
+ pop { work }
+ RET
+
+Ldiv0:
+ push { lr }
+ bl SYM (__div0) __PLT__
+ mov r0, #0 @ about as wrong as it could be
+ pop { pc }
+
+ SIZE (__umodsi3)
+
+#endif /* L_umodsi3 */
+
+#ifdef L_divsi3
+
+dividend .req r0
+divisor .req r1
+result .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+
+ .text
+ .globl SYM (__divsi3)
+ TYPE (__divsi3)
+ .align 0
+ .thumb_func
+SYM (__divsi3):
+ cmp divisor, #0
+ beq Ldiv0
+
+ push { work }
+ mov work, dividend
+ eor work, divisor @ Save the sign of the result.
+ mov ip, work
+ mov curbit, #1
+ mov result, #0
+ cmp divisor, #0
+ bpl Over1
+ neg divisor, divisor @ Loops below use unsigned.
+Over1:
+ cmp dividend, #0
+ bpl Over2
+ neg dividend, dividend
+Over2:
+ cmp dividend, divisor
+ bcc Lgot_result
+
+ mov work, #1
+ lsl work, #28
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, work
+ Bcs Lbignum
+ cmp divisor, dividend
+ Bcs Lbignum
+ lsl divisor, #4
+ lsl curbit, #4
+ b Loop1
+
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ lsl work, #3
+Loop2:
+ cmp divisor, work
+ Bcs Loop3
+ cmp divisor, dividend
+ Bcs Loop3
+ lsl divisor, #1
+ lsl curbit, #1
+ b Loop2
+
+Loop3:
+ @ Test for possible subtractions, and note which bits
+ @ are done in the result. On the final pass, this may subtract
+ @ too much from the dividend, but the result will be ok, since the
+ @ "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ Bcc Over3
+ sub dividend, dividend, divisor
+ orr result, result, curbit
+Over3:
+ lsr work, divisor, #1
+ cmp dividend, work
+ Bcc Over4
+ sub dividend, dividend, work
+ lsr work, curbit, #1
+ orr result, work
+Over4:
+ lsr work, divisor, #2
+ cmp dividend, work
+ Bcc Over5
+ sub dividend, dividend, work
+ lsr work, curbit, #2
+ orr result, result, work
+Over5:
+ lsr work, divisor, #3
+ cmp dividend, work
+ Bcc Over6
+ sub dividend, dividend, work
+ lsr work, curbit, #3
+ orr result, result, work
+Over6:
+ cmp dividend, #0 @ Early termination?
+ Beq Lgot_result
+ lsr curbit, #4 @ No, any more bits to do?
+ Beq Lgot_result
+ lsr divisor, #4
+ b Loop3
+
+Lgot_result:
+ mov r0, result
+ mov work, ip
+ cmp work, #0
+ Bpl Over7
+ neg r0, r0
+Over7:
+ pop { work }
+ RET
+
+Ldiv0:
+ push { lr }
+ bl SYM (__div0) __PLT__
+ mov r0, #0 @ about as wrong as it could be
+ pop { pc }
+
+ SIZE (__divsi3)
+
+#endif /* L_divsi3 */
+
+#ifdef L_modsi3
+
+dividend .req r0
+divisor .req r1
+overdone .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+
+ .text
+ .globl SYM (__modsi3)
+ TYPE (__modsi3)
+ .align 0
+ .thumb_func
+SYM (__modsi3):
+ mov curbit, #1
+ cmp divisor, #0
+ beq Ldiv0
+ Bpl Over1
+ neg divisor, divisor @ Loops below use unsigned.
+Over1:
+ push { work }
+ @ Need to save the sign of the dividend, unfortunately, we need
+ @ ip later on. Must do this after saving the original value of
+ @ the work register, because we will pop this value off first.
+ push { dividend }
+ cmp dividend, #0
+ Bpl Over2
+ neg dividend, dividend
+Over2:
+ cmp dividend, divisor
+ bcc Lgot_result
+ mov work, #1
+ lsl work, #28
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, work
+ bcs Lbignum
+ cmp divisor, dividend
+ bcs Lbignum
+ lsl divisor, #4
+ lsl curbit, #4
+ b Loop1
+
+Lbignum:
+ @ Set work to 0x80000000
+ lsl work, #3
+Loop2:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, work
+ bcs Loop3
+ cmp divisor, dividend
+ bcs Loop3
+ lsl divisor, #1
+ lsl curbit, #1
+ b Loop2
+
+Loop3:
+ @ Test for possible subtractions. On the final pass, this may
+ @ subtract too much from the dividend, so keep track of which
+ @ subtractions are done, we can fix them up afterwards...
+ mov overdone, #0
+ cmp dividend, divisor
+ bcc Over3
+ sub dividend, dividend, divisor
+Over3:
+ lsr work, divisor, #1
+ cmp dividend, work
+ bcc Over4
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #1
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+Over4:
+ lsr work, divisor, #2
+ cmp dividend, work
+ bcc Over5
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #2
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+Over5:
+ lsr work, divisor, #3
+ cmp dividend, work
+ bcc Over6
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #3
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+Over6:
+ mov ip, curbit
+ cmp dividend, #0 @ Early termination?
+ beq Over7
+ lsr curbit, #4 @ No, any more bits to do?
+ beq Over7
+ lsr divisor, #4
+ b Loop3
+
+Over7:
+ @ Any subtractions that we should not have done will be recorded in
+ @ the top three bits of "overdone". Exactly which were not needed
+ @ are governed by the position of the bit, stored in ip.
+ @ If we terminated early, because dividend became zero,
+ @ then none of the below will match, since the bit in ip will not be
+ @ in the bottom nibble.
+ mov work, #0xe
+ lsl work, #28
+ and overdone, work
+ beq Lgot_result
+
+ mov curbit, ip
+ mov work, #3
+ ror curbit, work
+ tst overdone, curbit
+ beq Over8
+ lsr work, divisor, #3
+ add dividend, dividend, work
+Over8:
+ mov curbit, ip
+ mov work, #2
+ ror curbit, work
+ tst overdone, curbit
+ beq Over9
+ lsr work, divisor, #2
+ add dividend, dividend, work
+Over9:
+ mov curbit, ip
+ mov work, #1
+ ror curbit, work
+ tst overdone, curbit
+ beq Lgot_result
+ lsr work, divisor, #1
+ add dividend, dividend, work
+Lgot_result:
+ pop { work }
+ cmp work, #0
+ bpl Over10
+ neg dividend, dividend
+Over10:
+ pop { work }
+ RET
+
+Ldiv0:
+ push { lr }
+ bl SYM (__div0) __PLT__
+ mov r0, #0 @ about as wrong as it could be
+ pop { pc }
+
+ SIZE (__modsi3)
+
+#endif /* L_modsi3 */
+
+#ifdef L_dvmd_tls
+
+ .globl SYM (__div0)
+ TYPE (__div0)
+ .align 0
+ .thumb_func
+SYM (__div0):
+ RET
+
+ SIZE (__div0)
+
+#endif /* L_divmodsi_tools */
+
+
+#ifdef L_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code.
+ The address of function to be called is loaded into a register and then
+ one of these labels is called via a BL instruction. This puts the
+ return address into the link register with the bottom bit set, and the
+ code here switches to the correct mode before executing the function. */
+
+ .text
+ .align 0
+
+.macro call_via register
+ .globl SYM (_call_via_\register)
+ TYPE (_call_via_\register)
+ .thumb_func
+SYM (_call_via_\register):
+ bx \register
+ nop
+
+ SIZE (_call_via_\register)
+.endm
+
+ call_via r0
+ call_via r1
+ call_via r2
+ call_via r3
+ call_via r4
+ call_via r5
+ call_via r6
+ call_via r7
+ call_via r8
+ call_via r9
+ call_via sl
+ call_via fp
+ call_via ip
+ call_via sp
+ call_via lr
+
+#endif /* L_call_via_rX */
+
+#ifdef L_interwork_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code,
+ when the target address is in an unknown instruction set. The address
+ of function to be called is loaded into a register and then one of these
+ labels is called via a BL instruction. This puts the return address
+ into the link register with the bottom bit set, and the code here
+ switches to the correct mode before executing the function. Unfortunately
+ the target code cannot be relied upon to return via a BX instruction, so
+ instead we have to store the resturn address on the stack and allow the
+ called function to return here instead. Upon return we recover the real
+ return address and use a BX to get back to Thumb mode. */
+
+ .text
+ .align 0
+
+ .code 32
+ .globl _arm_return
+_arm_return:
+ ldmia r13!, {r12}
+ bx r12
+
+.macro interwork register
+ .code 16
+
+ .globl SYM (_interwork_call_via_\register)
+ TYPE (_interwork_call_via_\register)
+ .thumb_func
+SYM (_interwork_call_via_\register):
+ bx pc
+ nop
+
+ .code 32
+ .globl .Lchange_\register
+.Lchange_\register:
+ tst \register, #1
+ stmeqdb r13!, {lr}
+ adreq lr, _arm_return
+ bx \register
+
+ SIZE (_interwork_call_via_\register)
+.endm
+
+ interwork r0
+ interwork r1
+ interwork r2
+ interwork r3
+ interwork r4
+ interwork r5
+ interwork r6
+ interwork r7
+ interwork r8
+ interwork r9
+ interwork sl
+ interwork fp
+ interwork ip
+ interwork sp
+
+ /* The lr case has to be handled a little differently...*/
+ .code 16
+ .globl SYM (_interwork_call_via_lr)
+ TYPE (_interwork_call_via_lr)
+ .thumb_func
+SYM (_interwork_call_via_lr):
+ bx pc
+ nop
+
+ .code 32
+ .globl .Lchange_lr
+.Lchange_lr:
+ tst lr, #1
+ stmeqdb r13!, {lr}
+ mov ip, lr
+ adreq lr, _arm_return
+ bx ip
+
+ SIZE (_interwork_call_via_lr)
+
+#endif /* L_interwork_call_via_rX */