diff options
author | YamaArashi <shadow962@live.com> | 2016-04-27 00:00:40 -0700 |
---|---|---|
committer | YamaArashi <shadow962@live.com> | 2016-04-27 00:00:40 -0700 |
commit | 9dc75fe3b4be91d6066c8e870eacec954117cc08 (patch) | |
tree | 0ff095d5dd600f23a98952682edf5226f40f0391 /libgcc/lib1thumb.asm | |
parent | 9e5f6a79618cb7df0b7d3816472b530c3b7d9c1a (diff) |
reorganize files
Diffstat (limited to 'libgcc/lib1thumb.asm')
-rwxr-xr-x | libgcc/lib1thumb.asm | 736 |
1 files changed, 736 insertions, 0 deletions
diff --git a/libgcc/lib1thumb.asm b/libgcc/lib1thumb.asm new file mode 100755 index 0000000..e0ff746 --- /dev/null +++ b/libgcc/lib1thumb.asm @@ -0,0 +1,736 @@ +@ libgcc1 routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) + +/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file with other programs, and to distribute +those programs without any restriction coming from the use of this +file. (The General Public License restrictions do apply in other +respects; for example, they cover modification of the file, and +distribution when not linked into another program.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. */ + + .code 16 + +#ifdef __elf__ +#define __PLT__ (PLT) +#define TYPE(x) .type SYM(x),function +#define SIZE(x) .size SYM(x), . - SYM(x) +#else +#define __PLT__ +#define TYPE(x) +#define SIZE(x) +#endif + +#define RET mov pc, lr + +#define SYM(x) x + +work .req r4 @ XXXX is this safe ? + +#ifdef L_udivsi3 + +dividend .req r0 +divisor .req r1 +result .req r2 +curbit .req r3 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 + + .text + .globl SYM (__udivsi3) + TYPE (__udivsi3) + .align 0 + .thumb_func +SYM (__udivsi3): + cmp divisor, #0 + beq Ldiv0 + mov curbit, #1 + mov result, #0 + + push { work } + cmp dividend, divisor + bcc Lgot_result + + @ Load the constant 0x10000000 into our work register + mov work, #1 + lsl work, #28 +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + bcs Lbignum + cmp divisor, dividend + bcs Lbignum + lsl divisor, #4 + lsl curbit, #4 + b Loop1 + +Lbignum: + @ Set work to 0x80000000 + lsl work, #3 +Loop2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, work + bcs Loop3 + cmp divisor, dividend + bcs Loop3 + lsl divisor, #1 + lsl curbit, #1 + b Loop2 + +Loop3: + @ Test for possible subtractions, and note which bits + @ are done in the result. On the final pass, this may subtract + @ too much from the dividend, but the result will be ok, since the + @ "bit" will have been shifted out at the bottom. + cmp dividend, divisor + bcc Over1 + sub dividend, dividend, divisor + orr result, result, curbit +Over1: + lsr work, divisor, #1 + cmp dividend, work + bcc Over2 + sub dividend, dividend, work + lsr work, curbit, #1 + orr result, work +Over2: + lsr work, divisor, #2 + cmp dividend, work + bcc Over3 + sub dividend, dividend, work + lsr work, curbit, #2 + orr result, work +Over3: + lsr work, divisor, #3 + cmp dividend, work + bcc Over4 + sub dividend, dividend, work + lsr work, curbit, #3 + orr result, work +Over4: + cmp dividend, #0 @ Early termination? + beq Lgot_result + lsr curbit, #4 @ No, any more bits to do? + beq Lgot_result + lsr divisor, #4 + b Loop3 +Lgot_result: + mov r0, result + pop { work } + RET + +Ldiv0: + push { lr } + bl SYM (__div0) __PLT__ + mov r0, #0 @ about as wrong as it could be + pop { pc } + + SIZE (__udivsi3) + +#endif /* L_udivsi3 */ + +#ifdef L_umodsi3 + +dividend .req r0 +divisor .req r1 +overdone .req r2 +curbit .req r3 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 + + .text + .globl SYM (__umodsi3) + TYPE (__umodsi3) + .align 0 + .thumb_func +SYM (__umodsi3): + cmp divisor, #0 + beq Ldiv0 + mov curbit, #1 + cmp dividend, divisor + bcs Over1 + RET + +Over1: + @ Load the constant 0x10000000 into our work register + push { work } + mov work, #1 + lsl work, #28 +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + bcs Lbignum + cmp divisor, dividend + bcs Lbignum + lsl divisor, #4 + lsl curbit, #4 + b Loop1 + +Lbignum: + @ Set work to 0x80000000 + lsl work, #3 +Loop2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, work + bcs Loop3 + cmp divisor, dividend + bcs Loop3 + lsl divisor, #1 + lsl curbit, #1 + b Loop2 + +Loop3: + @ Test for possible subtractions. On the final pass, this may + @ subtract too much from the dividend, so keep track of which + @ subtractions are done, we can fix them up afterwards... + mov overdone, #0 + cmp dividend, divisor + bcc Over2 + sub dividend, dividend, divisor +Over2: + lsr work, divisor, #1 + cmp dividend, work + bcc Over3 + sub dividend, dividend, work + mov ip, curbit + mov work, #1 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Over3: + lsr work, divisor, #2 + cmp dividend, work + bcc Over4 + sub dividend, dividend, work + mov ip, curbit + mov work, #2 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Over4: + lsr work, divisor, #3 + cmp dividend, work + bcc Over5 + sub dividend, dividend, work + mov ip, curbit + mov work, #3 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Over5: + mov ip, curbit + cmp dividend, #0 @ Early termination? + beq Over6 + lsr curbit, #4 @ No, any more bits to do? + beq Over6 + lsr divisor, #4 + b Loop3 + +Over6: + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + @ If we terminated early, because dividend became zero, + @ then none of the below will match, since the bit in ip will not be + @ in the bottom nibble. + + mov work, #0xe + lsl work, #28 + and overdone, work + bne Over7 + pop { work } + RET @ No fixups needed +Over7: + mov curbit, ip + mov work, #3 + ror curbit, work + tst overdone, curbit + beq Over8 + lsr work, divisor, #3 + add dividend, dividend, work +Over8: + mov curbit, ip + mov work, #2 + ror curbit, work + tst overdone, curbit + beq Over9 + lsr work, divisor, #2 + add dividend, dividend, work +Over9: + mov curbit, ip + mov work, #1 + ror curbit, work + tst overdone, curbit + beq Over10 + lsr work, divisor, #1 + add dividend, dividend, work +Over10: + pop { work } + RET + +Ldiv0: + push { lr } + bl SYM (__div0) __PLT__ + mov r0, #0 @ about as wrong as it could be + pop { pc } + + SIZE (__umodsi3) + +#endif /* L_umodsi3 */ + +#ifdef L_divsi3 + +dividend .req r0 +divisor .req r1 +result .req r2 +curbit .req r3 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 + + .text + .globl SYM (__divsi3) + TYPE (__divsi3) + .align 0 + .thumb_func +SYM (__divsi3): + cmp divisor, #0 + beq Ldiv0 + + push { work } + mov work, dividend + eor work, divisor @ Save the sign of the result. + mov ip, work + mov curbit, #1 + mov result, #0 + cmp divisor, #0 + bpl Over1 + neg divisor, divisor @ Loops below use unsigned. +Over1: + cmp dividend, #0 + bpl Over2 + neg dividend, dividend +Over2: + cmp dividend, divisor + bcc Lgot_result + + mov work, #1 + lsl work, #28 +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + Bcs Lbignum + cmp divisor, dividend + Bcs Lbignum + lsl divisor, #4 + lsl curbit, #4 + b Loop1 + +Lbignum: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + lsl work, #3 +Loop2: + cmp divisor, work + Bcs Loop3 + cmp divisor, dividend + Bcs Loop3 + lsl divisor, #1 + lsl curbit, #1 + b Loop2 + +Loop3: + @ Test for possible subtractions, and note which bits + @ are done in the result. On the final pass, this may subtract + @ too much from the dividend, but the result will be ok, since the + @ "bit" will have been shifted out at the bottom. + cmp dividend, divisor + Bcc Over3 + sub dividend, dividend, divisor + orr result, result, curbit +Over3: + lsr work, divisor, #1 + cmp dividend, work + Bcc Over4 + sub dividend, dividend, work + lsr work, curbit, #1 + orr result, work +Over4: + lsr work, divisor, #2 + cmp dividend, work + Bcc Over5 + sub dividend, dividend, work + lsr work, curbit, #2 + orr result, result, work +Over5: + lsr work, divisor, #3 + cmp dividend, work + Bcc Over6 + sub dividend, dividend, work + lsr work, curbit, #3 + orr result, result, work +Over6: + cmp dividend, #0 @ Early termination? + Beq Lgot_result + lsr curbit, #4 @ No, any more bits to do? + Beq Lgot_result + lsr divisor, #4 + b Loop3 + +Lgot_result: + mov r0, result + mov work, ip + cmp work, #0 + Bpl Over7 + neg r0, r0 +Over7: + pop { work } + RET + +Ldiv0: + push { lr } + bl SYM (__div0) __PLT__ + mov r0, #0 @ about as wrong as it could be + pop { pc } + + SIZE (__divsi3) + +#endif /* L_divsi3 */ + +#ifdef L_modsi3 + +dividend .req r0 +divisor .req r1 +overdone .req r2 +curbit .req r3 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 + + .text + .globl SYM (__modsi3) + TYPE (__modsi3) + .align 0 + .thumb_func +SYM (__modsi3): + mov curbit, #1 + cmp divisor, #0 + beq Ldiv0 + Bpl Over1 + neg divisor, divisor @ Loops below use unsigned. +Over1: + push { work } + @ Need to save the sign of the dividend, unfortunately, we need + @ ip later on. Must do this after saving the original value of + @ the work register, because we will pop this value off first. + push { dividend } + cmp dividend, #0 + Bpl Over2 + neg dividend, dividend +Over2: + cmp dividend, divisor + bcc Lgot_result + mov work, #1 + lsl work, #28 +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + bcs Lbignum + cmp divisor, dividend + bcs Lbignum + lsl divisor, #4 + lsl curbit, #4 + b Loop1 + +Lbignum: + @ Set work to 0x80000000 + lsl work, #3 +Loop2: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, work + bcs Loop3 + cmp divisor, dividend + bcs Loop3 + lsl divisor, #1 + lsl curbit, #1 + b Loop2 + +Loop3: + @ Test for possible subtractions. On the final pass, this may + @ subtract too much from the dividend, so keep track of which + @ subtractions are done, we can fix them up afterwards... + mov overdone, #0 + cmp dividend, divisor + bcc Over3 + sub dividend, dividend, divisor +Over3: + lsr work, divisor, #1 + cmp dividend, work + bcc Over4 + sub dividend, dividend, work + mov ip, curbit + mov work, #1 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Over4: + lsr work, divisor, #2 + cmp dividend, work + bcc Over5 + sub dividend, dividend, work + mov ip, curbit + mov work, #2 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Over5: + lsr work, divisor, #3 + cmp dividend, work + bcc Over6 + sub dividend, dividend, work + mov ip, curbit + mov work, #3 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Over6: + mov ip, curbit + cmp dividend, #0 @ Early termination? + beq Over7 + lsr curbit, #4 @ No, any more bits to do? + beq Over7 + lsr divisor, #4 + b Loop3 + +Over7: + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + @ If we terminated early, because dividend became zero, + @ then none of the below will match, since the bit in ip will not be + @ in the bottom nibble. + mov work, #0xe + lsl work, #28 + and overdone, work + beq Lgot_result + + mov curbit, ip + mov work, #3 + ror curbit, work + tst overdone, curbit + beq Over8 + lsr work, divisor, #3 + add dividend, dividend, work +Over8: + mov curbit, ip + mov work, #2 + ror curbit, work + tst overdone, curbit + beq Over9 + lsr work, divisor, #2 + add dividend, dividend, work +Over9: + mov curbit, ip + mov work, #1 + ror curbit, work + tst overdone, curbit + beq Lgot_result + lsr work, divisor, #1 + add dividend, dividend, work +Lgot_result: + pop { work } + cmp work, #0 + bpl Over10 + neg dividend, dividend +Over10: + pop { work } + RET + +Ldiv0: + push { lr } + bl SYM (__div0) __PLT__ + mov r0, #0 @ about as wrong as it could be + pop { pc } + + SIZE (__modsi3) + +#endif /* L_modsi3 */ + +#ifdef L_dvmd_tls + + .globl SYM (__div0) + TYPE (__div0) + .align 0 + .thumb_func +SYM (__div0): + RET + + SIZE (__div0) + +#endif /* L_divmodsi_tools */ + + +#ifdef L_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code. + The address of function to be called is loaded into a register and then + one of these labels is called via a BL instruction. This puts the + return address into the link register with the bottom bit set, and the + code here switches to the correct mode before executing the function. */ + + .text + .align 0 + +.macro call_via register + .globl SYM (_call_via_\register) + TYPE (_call_via_\register) + .thumb_func +SYM (_call_via_\register): + bx \register + nop + + SIZE (_call_via_\register) +.endm + + call_via r0 + call_via r1 + call_via r2 + call_via r3 + call_via r4 + call_via r5 + call_via r6 + call_via r7 + call_via r8 + call_via r9 + call_via sl + call_via fp + call_via ip + call_via sp + call_via lr + +#endif /* L_call_via_rX */ + +#ifdef L_interwork_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code, + when the target address is in an unknown instruction set. The address + of function to be called is loaded into a register and then one of these + labels is called via a BL instruction. This puts the return address + into the link register with the bottom bit set, and the code here + switches to the correct mode before executing the function. Unfortunately + the target code cannot be relied upon to return via a BX instruction, so + instead we have to store the resturn address on the stack and allow the + called function to return here instead. Upon return we recover the real + return address and use a BX to get back to Thumb mode. */ + + .text + .align 0 + + .code 32 + .globl _arm_return +_arm_return: + ldmia r13!, {r12} + bx r12 + +.macro interwork register + .code 16 + + .globl SYM (_interwork_call_via_\register) + TYPE (_interwork_call_via_\register) + .thumb_func +SYM (_interwork_call_via_\register): + bx pc + nop + + .code 32 + .globl .Lchange_\register +.Lchange_\register: + tst \register, #1 + stmeqdb r13!, {lr} + adreq lr, _arm_return + bx \register + + SIZE (_interwork_call_via_\register) +.endm + + interwork r0 + interwork r1 + interwork r2 + interwork r3 + interwork r4 + interwork r5 + interwork r6 + interwork r7 + interwork r8 + interwork r9 + interwork sl + interwork fp + interwork ip + interwork sp + + /* The lr case has to be handled a little differently...*/ + .code 16 + .globl SYM (_interwork_call_via_lr) + TYPE (_interwork_call_via_lr) + .thumb_func +SYM (_interwork_call_via_lr): + bx pc + nop + + .code 32 + .globl .Lchange_lr +.Lchange_lr: + tst lr, #1 + stmeqdb r13!, {lr} + mov ip, lr + adreq lr, _arm_return + bx ip + + SIZE (_interwork_call_via_lr) + +#endif /* L_interwork_call_via_rX */ |