diff options
Diffstat (limited to 'gcc/config/c4x/libgcc.S')
-rwxr-xr-x | gcc/config/c4x/libgcc.S | 1501 |
1 files changed, 0 insertions, 1501 deletions
diff --git a/gcc/config/c4x/libgcc.S b/gcc/config/c4x/libgcc.S deleted file mode 100755 index fb79cf8..0000000 --- a/gcc/config/c4x/libgcc.S +++ /dev/null @@ -1,1501 +0,0 @@ -/* libgcc1 routines for the Texas Instruments TMS320C[34]x - Copyright (C) 1997,98 Free Software Foundation, Inc. - - Contributed by Michael Hayes (m.hayes@elec.canterbury.cri.nz) - and Herman Ten Brugge (Haj.Ten.Brugge@net.HCC.nl). - - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file with other programs, and to distribute -those programs without any restriction coming from the use of this -file. (The General Public License restrictions do apply in other -respects; for example, they cover modification of the file, and -distribution when not linked into another program.) - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -/* As a special exception, if you link this library with files - compiled with GCC to produce an executable, this does not cause - the resulting executable to be covered by the GNU General Public License. - This exception does not however invalidate any other reasons why - the executable file might be covered by the GNU General Public License. */ - - -; These routines are called using the standard TI register argument -; passing model. -; The following registers do not have to be saved: -; r0, r1, r2, r3, ar0, ar1, ar2, ir0, ir1, bk, rs, rc, re, (r9, r10, r11) -; -; Perform floating point divqf3 -; -; This routine performs a reciprocal of the divisor using the method -; described in the C30/C40 user manuals. It then multiplies that -; result by the dividend. -; -; Let r be the reciprocal of the divisor v and let the ith estimate -; of r be denoted by r[i]. An iterative approach can be used to -; improve the estimate of r, given an initial estimate r[0], where -; -; r[i + 1] = r[i] * (2.0 - v * r[i]) -; -; The normalised error e[i] at the ith iteration is -; -; e[i] = (r - r[i]) / r = (1 / v - r[i]) * v = (1 - v * r[i]) -; -; Note that -; -; e[i + 1] = (1 - v * r[i + 1]) = 1 - 2 * v * r[i] + v^2 + (r[i])^2 -; = (1 - v * r[i])^2 = (e[i])^2 - -; r2 dividend, r3 divisor, r0 quotient -; clobbers r1, ar1 -#ifdef L_divqf3 - .text - .global ___divqf3 -___divqf3: - -#ifdef _TMS320C4x - .if .REGPARM == 0 - lda sp,ar0 - ldf *-ar0(2), r3 - .endif - - pop ar1 ; Pop return address - -; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor - rcpf r3, r0 ; Compute initial estimate r[0] - - mpyf3 r0, r3, r1 ; r1 = r[0] * v - subrf 2.0, r1 ; r1 = 2.0 - r[0] * v - mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1] -; End of 1st iteration (16 bits accuracy) - - mpyf3 r0, r3, r1 ; r1 = r[1] * v - subrf 2.0, r1 ; r1 = 2.0 - r[1] * v - - bud ar1 ; Delayed branch - mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2] -; End of 2nd iteration (32 bits accuracy) - .if .REGPARM == 0 - mpyf *-ar0(1), r0 ; Multiply by the dividend - .else - mpyf r2, r0 ; Multiply by the dividend - .endif - rnd r0 - ; Branch occurs here -#else - .if .REGPARM == 0 - ldiu sp,ar0 - ldf *-ar0(2), r3 - .endif - - pop ar1 ; Pop return address - -; Initial estimate r[0] = 1.0 * 2^(-e - 1) -; where v = m * 2^e - -; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor - -; Calculate initial estimate r[0] - pushf r3 - pop r0 - not r0 ; r0 = -e - ; complement exponent = -e -1 - ; complement sign (side effect) - ; complement mantissa (almost 3 bit accurate) - push r0 - popf r0 ; r0 = 1.0 * e^(-e - 1) + inverted mantissa - ldf -1.0, r1 ; undo complement sign bit - xor r1, r0 - - mpyf3 r0, r3, r1 ; r1 = r[0] * v - subrf 2.0, r1 ; r1 = 2.0 - r[0] * v - mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1] -; End of 1st iteration - - mpyf3 r0, r3, r1 ; r1 = r[1] * v - subrf 2.0, r1 ; r1 = 2.0 - r[1] * v - mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2] -; End of 2nd iteration - - mpyf3 r0, r3, r1 ; r1 = r[2] * v - subrf 2.0, r1 ; r1 = 2.0 - r[2] * v - mpyf r1, r0 ; r0 = r[2] * (2.0 - r[2] * v) = r[3] -; End of 3rd iteration - - or 080h, r0 ; add 1 lsb to result. needed when complemeting - ; 1.0 / 2.0 - rnd r0 - -; Use modified last iteration -; r[4] = (r[3] * (1.0 - (v * r[3]))) + r[3] - mpyf3 r0, r3, r1 ; r1 = r[3] * v - subrf 1.0, r1 ; r1 = 1.0 - r[3] * v - mpyf r0, r1 ; r1 = r[3] * (1.0 - r[3] * v) - - bud ar1 ; Delayed branch - addf r1, r0 ; r0 = r[3] * (1.0 - r[3] * v) + r[3] = r[4] - .if .REGPARM == 0 - mpyf *-ar0(1), r0 ; Multiply by the dividend - .else - mpyf r2, r0 ; Multiply by the dividend - .endif - rnd r0 - ; Branch occurs here -#endif - -#endif -; -; Integer signed division -; -; ar2 dividend, r2 divisor, r0 quotient -; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re -#ifdef L_divqi3 - .text - .global ___divqi3 - .ref udivqi3n -___divqi3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - - xor3 ar2, r2, r3 ; Get the sign - absi ar2, r0 - bvd divq32 - ldi r0, ar2 - absi r2, r2 - cmpi ar2, r2 ; Divisor > dividend? - - pop ir1 - bhid zero ; If so, return 0 - -; -; Normalize oeprands. Use difference exponents as shift count -; for divisor, and as repeat count for "subc" -; - float ar2, r1 ; Normalize dividend - pushf r1 ; Get as integer - pop ar0 - lsh -24, ar0 ; Get exponent - - float r2, r1 ; Normalize divisor - pushf r1 ; Get as integer - pop ir0 - lsh -24, ir0 ; Get exponent - - subi ir0, ar0 ; Get difference of exponents - lsh ar0, r2 ; Align divisor with dividend - -; -; Do count + 1 subtracts and shifts -; - rpts ar0 - subc r2, ar2 - -; -; Mask off the lower count+1 bits of ar2 -; - subri 31, ar0 ; Shift count is (32 - (ar0 + 1)) - lsh ar0, ar2 ; Shift left - negi ar0, ar0 - lsh3 ar0, ar2, r0 ; Shift right and put result in r0 - -; -; Check sign and negate result if necessary -; - bud ir1 ; Delayed return - negi r0, r1 ; Negate result - ash -31, r3 ; Check sign - ldinz r1, r0 ; If set, use negative result - ; Branch occurs here - -zero: bud ir1 ; Delayed branch - ldi 0, r0 - nop - nop - ; Branch occurs here -; -; special case where ar2 = abs(ar2) = 0x80000000. We handle this by -; calling unsigned divide and negating the result if necessary. -; -divq32: - push r3 ; Save sign - call udivqi3n - pop r3 - pop ir1 - bd ir1 - negi r0, r1 ; Negate result - ash -31, r3 ; Check sign - ldinz r1, r0 ; If set, use negative result - ; Branch occurs here -#endif -; -; -; ar2 dividend, r2 divisor, r0 quotient, -; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re -#ifdef L_udivqi3 - .text - .global ___udivqi3 - .global udivqi3n -___udivqi3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - -udivqi3n: - pop ir1 - - cmpi ar2, r2 ; If divisor > dividend - bhi qzero ; return zero - ldi r2, ar1 ; Store divisor in ar1 - - tstb ar2, ar2 ; Check top bit, jump if set to special handler - bld div_32 ; Delayed branch - -; -; Get divisor exponent -; - float ar1, r1 ; Normalize the divisor - pushf r1 ; Get into int register - pop rc - ; branch occurs here - - bzd qzero ; if (float) divisor zero, return zero - - float ar2, r1 ; Normalize the dividend - pushf r1 ; Get into int register - pop ar0 - lsh -24, ar0 ; Get both the exponents - lsh -24, rc - - subi rc, ar0 ; Get the difference between the exponents - lsh ar0, ar1 ; Normalize the divisor with the dividend - -; -; Do count_1 subtracts and shifts -; - rpts ar0 - subc ar1, ar2 - -; -; mask off the lower count+1 bits -; - subri 31, ar0 ; Shift count (31 - (ar0+1)) - bud ir1 ; Delayed return - lsh3 ar0, ar2, r0 - negi ar0, ar0 - lsh ar0, r0 - ; Branch occurs here - -; -; Handle a full 32-bit dividend -; -div_32: tstb ar1, ar1 - bld qone ; if divisor high bit is one, the result is one - lsh -24, rc - subri 31, rc - lsh rc, ar1 ; Line up the divisor - -; -; Now divisor and dividend are aligned. Do first SUBC by hand, save -; of the forst quotient digit. Then, shift divisor right rather -; than shifting dividend left. This leaves a zero in the top bit of -; the divident -; - ldi 1, ar0 ; Initizialize MSB of quotient - lsh rc, ar0 ; create a mask for MSBs - subi 1, ar0 ; mask is (2 << count) - 1 - - subi3 ar1, ar2, r1 - ldihs r1, ar2 - ldihs 1, r1 - ldilo 0, r1 - lsh rc, r1 - - lsh -1, ar1 - subi 1, rc -; -; do the rest of the shifts and subtracts -; - rpts rc - subc ar1, ar2 - - bud ir1 - and ar0, ar2 - or3 r1, ar2, r0 - nop - -qone: - bud ir1 - ldi 1, r0 - nop - nop - -qzero: - bud ir1 - ldi 0, r0 - nop - nop -#endif - -#ifdef L_umodqi3 - .text - .global ___umodqi3 - .global umodqi3n -___umodqi3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - -umodqi3n: - pop ir1 ; return address - cmpi ar2, r2 ; divisor > dividend ? - bhi uzero ; if so, return dividend - ldi r2, ar1 ; load divisor -; -; If top bit of dividend is set, handle specially. -; - tstb ar2, ar2 ; check top bit - bld umod_32 ; get divisor exponent, then jump. -; -; Get divisor exponent by converting to float. -; - float ar1, r1 ; normalize divisor - pushf r1 ; push as float - pop rc ; pop as int to get exponent - bzd uzero ; if (float)divisor was zero, return -; -; 31 or less bits in dividend. Get dividend exponent. -; - float ar2, r1 ; normalize dividend - pushf r1 ; push as float - pop ar0 ; pop as int to get exponent -; -; Use difference in exponents as shift count to line up MSBs. -; - lsh -24, rc ; divisor exponent - lsh -24, ar0 ; dividend exponent - subi rc, ar0 ; difference - lsh ar0, ar1 ; shift divisor up -; -; Do COUNT+1 subtract & shifts. -; - rpts ar0 - subc ar1, ar2 -; -; Remainder is in upper 31-COUNT bits. -; - bud ir1 ; delayed branch to return - addi 1, ar0 ; shift count is COUNT+1 - negi ar0, ar0 ; negate for right shift - lsh3 ar0, ar2, r0 ; shift to get result - ; Return occurs here - -; -; The following code handles cases of a full 32-bit dividend. Before -; SUBC can be used, the top bit must be cleared (otherwise SUBC can -; possibly shift a significant 1 out the top of the dividend). This -; is accomplished by first doing a normal subtraction, then proceeding -; with SUBCs. -; -umod_32: -; -; If the top bit of the divisor is set too, the remainder is simply -; the difference between the dividend and divisor. Otherwise, shift -; the divisor up to line up the MSBs. -; - tstb ar1, ar1 ; check divisor - bld uone ; if negative, remainder is diff - - lsh -24, rc ; divisor exponent - subri 31, rc ; shift count = 31 - exp - negi rc, ar0 ; used later as shift count - lsh rc, ar1 ; shift up to line up MSBs -; -; Now MSBs are aligned. Do first SUBC by hand using a plain subtraction. -; Then, shift divisor right rather than shifting dividend left. This leaves -; a 0 in the top bit of the dividend. -; - subi3 ar1, ar2, r1 ; subtract - ldihs r1, ar2 ; if positive, replace dividend - subi 1, rc ; first iteration is done - lsh -1, ar1 ; shift divisor down -; -; Do EXP subtract & shifts. -; - rpts rc - subc ar1, ar2 -; -; Quotient is in EXP+1 LSBs; shift remainder (in MSBs) down. -; - bud ir1 - lsh3 ar0, ar2, r0 ; COUNT contains -(EXP+1) - nop - nop -; -; Return (dividend - divisor). -; -uone: bud ir1 - subi3 r2, ar2, r0 - nop - nop -; -; Return dividend. -; -uzero: bud ir1 - ldi ar2, r0 ; set status from result - nop - nop -#endif - -#ifdef L_modqi3 - .text - .global ___modqi3 - .ref umodqi3n -___modqi3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - -; -; Determine sign of result. Get absolute value of operands. -; - ldi ar2, ar0 ; sign of result same as dividend - absi ar2, r0 ; make dividend positive - bvd mod_32 ; if still negative, escape - absi r2, r1 ; make divisor positive - ldi r1, ar1 ; save in ar1 - cmpi r0, ar1 ; divisor > dividend ? - - pop ir1 ; return address - bhid return ; if so, return dividend -; -; Normalize operands. Use difference in exponents as shift count -; for divisor, and as repeat count for SUBC. -; - float r1, r1 ; normalize divisor - pushf r1 ; push as float - pop rc ; pop as int - bzd return ; if (float)divisor was zero, return - - float r0, r1 ; normalize dividend - pushf r1 ; push as float - pop r1 ; pop as int - - lsh -24, rc ; get divisor exponent - lsh -24, r1 ; get dividend exponent - subi rc, r1 ; get difference in exponents - lsh r1, ar1 ; align divisor with dividend -; -; Do COUNT+1 subtract & shifts. -; - rpts r1 - subc ar1, r0 -; -; Remainder is in upper bits of R0 -; - addi 1, r1 ; shift count is -(r1+1) - negi r1, r1 - lsh r1, r0 ; shift right -; -; Check sign and negate result if necessary. -; -return: - bud ir1 ; delayed branch to return - negi r0, r1 ; negate result - cmpi 0, ar0 ; check sign - ldin r1, r0 ; if set, use negative result - ; Return occurs here -; -; The following code handles cases of a full 32-bit dividend. This occurs -; when R0 = abs(R0) = 080000000h. Handle this by calling the unsigned mod -; function, then negating the result if necessary. -; -mod_32: - push ar0 ; remember sign - call umodqi3n ; do divide - - brd return ; return - pop ar0 ; restore sign - pop ir1 ; return address - nop -#endif - -#ifdef L_unsfltconst - .section .const - .global ___unsfltconst -___unsfltconst: .float 4294967296.0 -#endif - -#ifdef L_unsfltcompare - .section .const - .global ___unsfltcompare -___unsfltcompare: .float 2147483648.0 -#endif - -; Integer 32-bit signed multiplication -; -; The TMS320C3x MPYI instruction takes two 24-bit signed integers -; and produces a 48-bit signed result which is truncated to 32-bits. -; -; A 32-bit by 32-bit multiplication thus requires a number of steps. -; -; Consider the product of two 32-bit signed integers, -; -; z = x * y -; -; where x = (b << 16) + a, y = (d << 16) + c -; -; This can be expressed as -; -; z = ((b << 16) + a) * ((d << 16) + c) -; -; = ((b * d) << 32) + ((b * c + a * d) << 16) + a * c -; -; Let z = (f << 16) + e where f < (1 << 16). -; -; Since we are only interested in a 32-bit result, we can ignore the -; (b * d) << 32 term, and thus -; -; f = b * c + a * d, e = a * c -; -; We can simplify things if we have some a priori knowledge of the -; operands, for example, if -32768 <= y <= 32767, then y = c and d = 0 and thus -; -; f = b * c, e = a * c -; -; ar2 multiplier, r2 multiplicand, r0 product -; clobbers r1, r2, r3 -#ifdef L_mulqi3 - .text - .global ___mulqi3 -___mulqi3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - - pop ir1 ; return address - ldi ar2, r0 ; - and 0ffffh, r0 ; a - lsh -16, ar2 ; b - ldi r2, r3 ; - and 0ffffh, r3 ; c - mpyi r3, ar2 ; c * b - lsh -16, r2 ; d - mpyi r0, r2 ; a * d - addi ar2, r2 ; c * b + a * d - bd ir1 ; delayed branch to return - lsh 16, r2 ; (c * b + a * d) << 16 - mpyi r3, r0 ; a * c - addi r2, r0 ; a * c + (c * b + a * d) << 16 -; branch occurs here - -#endif - -; -; Integer 64 by 64 multiply -; long1 and long2 on stack -; result in r0,r1 -; -#ifdef L_mulhi3 - .text - .global ___mulhi3 -#ifdef _TMS320C4x -___mulhi3: - pop ar0 - ldi sp,ar2 - ldi *-ar2(1),r2 - ldi *-ar2(3),r3 - mpyi3 r2,r3,r0 - mpyuhi3 r2,r3,r1 - mpyi *-ar2(2),r2 - bd ar0 - mpyi *-ar2(0),r3 - addi r2,r1 - addi r3,r1 -#else -___mulhi3: - ldi sp,ar2 - ldi -16,rs - ldi *-ar2(2),ar0 - ldi *-ar2(4),ar1 - ldi ar0,r2 - and 0ffffh,r2 - ldi ar1,r3 - and 0ffffh,r3 - lsh rs,ar0 - lsh rs,ar1 - - mpyi r2,r3,r0 - mpyi ar0,ar1,r1 - mpyi r2,ar1,rc - lsh rs,rc,re - addi re,r1 - lsh 16,rc - addi rc,r0 - addc 0,r1 - mpyi r3,ar0,rc - lsh rs,rc,re - addi re,r1 - lsh 16,rc - addi rc,r0 - addc 0,r1 - - ldi *-ar2(1),ar0 - ldi ar0,r2 - and 0ffffh,r2 - lsh rs,ar0 - mpyi r2,r3,rc - addi rc,r1 - mpyi r2,ar1,rc - mpyi r3,ar0,re - addi re,rc - lsh 16,rc - addi rc,r1 - - ldi *-ar2(2),ar0 - ldi *-ar2(3),ar1 - ldi ar0,r2 - and 0ffffh,r2 - ldi ar1,r3 - and 0ffffh,r3 - lsh rs,ar0 - lsh rs,ar1 - mpyi r2,r3,rc - addi rc,r1 - mpyi r2,ar1,rc - mpyi r3,ar0,re - pop ar0 - bd ar0 - addi re,rc - lsh 16,rc - addi rc,r1 -#endif -#endif - -; -; Integer 32 by 32 multiply highpart unsigned -; src1 in ar2 -; src2 in r2 -; result in r0 -; -#ifdef L_umulhi3_high - .text - .global ___umulhi3_high -___umulhi3_high: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - - ldi -16,rs - ldi r2,r3 - and 0ffffh,r2 - ldi ar2,ar1 - and 0ffffh,ar2 - lsh rs,r3 - lsh rs,ar1 - - mpyi ar2,r2,r1 - mpyi ar1,r3,r0 - mpyi ar2,r3,rc - lsh rs,rc,re - addi re,r0 - lsh 16,rc - addi rc,r1 - addc 0,r0 - mpyi r2,ar1,rc - lsh rs,rc,re - addi re,r0 - pop ar0 - bd ar0 - lsh 16,rc - addi rc,r1 - addc 0,r0 -#endif - -; -; Integer 32 by 32 multiply highpart signed -; src1 in ar2 -; src2 in r2 -; result in r0 -; -#ifdef L_smulhi3_high - .text - .global ___smulhi3_high -___smulhi3_high: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - ldi *-ar0(2), r2 - .endif - - ldi -16,rs - ldi 0,rc - subi3 ar2,rc,r0 - ldi r2,r3 - ldilt r0,rc - subi3 r2,rc,r0 - ldi ar2,ar1 - tstb ar1,ar1 - ldilt r0,rc - and 0ffffh,r2 - and 0ffffh,ar2 - lsh rs,r3 - lsh rs,ar1 - - mpyi ar2,r2,r1 - mpyi ar1,r3,r0 - addi rc,r0 - mpyi ar2,r3,rc - lsh rs,rc,re - addi re,r0 - lsh 16,rc - addi rc,r1 - addc 0,r0 - mpyi r2,ar1,rc - lsh rs,rc,re - addi re,r0 - pop ar0 - bd ar0 - lsh 16,rc - addi rc,r1 - addc 0,r0 -#endif - -; -; Integer 64 by 64 unsigned divide -; long1 and long2 on stack -; divide in r0,r1 -; modulo in r2,r3 -; routine takes a maximum of 64*9+21=597 cycles = 24 us @ 50Mhz -; -#ifdef L_udivhi3 - .text - .global ___udivhi3 - .global ___udivide - .global ___umodulo - .ref udivqi3n - .ref umodqi3n -___udivhi3: - ldi sp,ar2 - ldi *-ar2(4),ar0 - ldi *-ar2(3),ar1 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 - -___udivide: - or r1,ar1,r2 - bne udiv0 - ldi ar0,r2 - ldi r0,ar2 - call udivqi3n - ldiu 0,r1 - rets - -___umodulo: - or r1,ar1,r2 - bne udiv0 - ldi ar0,r2 - ldi r0,ar2 - call umodqi3n - ldi r0,r2 - ldiu 0,r3 - rets - -udiv0: - tstb ar1,ar1 - bne udiv1 - tstb ar0,ar0 - bn udiv1 - - ldiu 63,rc -#ifdef _TMS320C4x - rptbd udivend0 - ldiu 0,r2 - addi r0,r0 - rolc r1 -#else - ldiu 0,r2 - addi r0,r0 - rolc r1 - rptb udivend0 -#endif - - rolc r2 - subi3 ar0,r2,r3 - xor 1,st - ldic r3,r2 - rolc r0 -udivend0: - rolc r1 - - ldiu 0,r3 - rets -udiv1: - push r4 - push r5 - ldiu 63,rc - ldiu 0,r2 -#ifdef _TMS320C4x - rptbd udivend1 - ldiu 0,r3 - addi r0,r0 - rolc r1 -#else - ldiu 0,r3 - addi r0,r0 - rolc r1 - rptb udivend1 -#endif - - rolc r2 - rolc r3 - subi3 ar0,r2,r4 - subb3 ar1,r3,r5 - xor 1,st - ldic r4,r2 - ldic r5,r3 - rolc r0 -udivend1: - rolc r1 - - pop r5 - pop r4 - rets -#endif - -; -; Integer 64 by 64 unsigned modulo -; long1 and long2 on stack -; result in r0,r1 -; -#ifdef L_umodhi3 - .text - .global ___umodhi3 - .ref ___modulo -___umodhi3: - ldi sp,ar2 - ldi *-ar2(4),ar0 - ldi *-ar2(3),ar1 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 - call ___umodulo - pop ar0 - bd ar0 - ldi r2,r0 - ldi r3,r1 - nop -#endif - -; -; Integer 64 by 64 signed divide -; long1 and long2 on stack -; result in r0,r1 -; -#ifdef L_divhi3 - .text - .global ___divhi3 - .ref ___udivide -___divhi3: - ldi 0,ir0 - ldi sp,ar2 - ldi *-ar2(4),r0 - ldi *-ar2(3),r1 - bge div1 - negi ir0 - negi r0 - negb r1 -div1: - ldi r0,ar0 - ldi r1,ar1 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 - bge div2 - negi ir0 - negi r0 - negb r1 -div2: - call ___udivide - tstb ir0,ir0 - bge div3 - negi r0 - negb r1 -div3: - rets -#endif - -; -; Integer 64 by 64 signed modulo -; long1 and long2 on stack -; result in r0,r1 -; -#ifdef L_modhi3 - .text - .global ___modhi3 - .ref ___umodulo -___modhi3: - ldi 0,ir0 - ldi sp,ar2 - ldi *-ar2(4),r0 - ldi *-ar2(3),r1 - bge mod1 - negi ir0 - negi r0 - negb r1 -mod1: - ldi r0,ar0 - ldi r1,ar1 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 - bge mod2 - negi ir0 - negi r0 - negb r1 -mod2: - call ___umodulo - ldi r2,r0 - ldi r3,r1 - tstb ir0,ir0 - bge mod3 - negi r0 - negb r1 -mod3: - rets -#endif - -; -; double to signed long long converion -; input in r2 -; result in r0,r1 -; -#ifdef L_fix_truncqfhi2 - .text - .global ___fix_truncqfhi2 - .ref ufix_truncqfhi2n -___fix_truncqfhi2: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldf *-ar0(1), r2 - .endif - - cmpf 0.0,r2 - bge ufix_truncqfhi2n - negf r2 - call ufix_truncqfhi2n - negi r0 - negb r1 - rets -#endif - -; -; double to unsigned long long converion -; input in r2 -; result in r0,r1 -; -#ifdef L_ufix_truncqfhi2 - .text - .global ___ufix_truncqfhi2 - .global ufix_truncqfhi2n -___ufix_truncqfhi2: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldf *-ar0(1), r2 - .endif - -ufix_truncqfhi2n: - cmpf 0.0,r2 - ble ufix1 - pushf r2 - pop r3 - ash -24,r3 - subi 31,r3 - cmpi 32,r3 - bge ufix1 - cmpi -32,r3 - ble ufix1 - ldi 1,r0 - ash 31,r0 - or3 r0,r2,r0 - ldi r0,r1 - lsh3 r3,r0,r0 - subi 32,r3 - cmpi -32,r3 - ldile 0,r1 - lsh3 r3,r1,r1 - rets -ufix1: - ldi 0,r0 - ldi 0,r1 - rets -#endif - -; -; signed long long to double converion -; input on stack -; result in r0 -; -#ifdef L_floathiqf2 - .text - .global ___floathiqf2 - .ref ufloathiqf2n -___floathiqf2: - ldi sp,ar2 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 - bge ufloathiqf2n - negi r0 - negb r1 - call ufloathiqf2n - negf r0 - rets -#endif - -; -; unsigned long long to double converion -; input on stack -; result in r0 -; -#ifdef L_ufloathiqf2 - .text - .global ___ufloathiqf2 - .global ufloathiqf2n - .ref ___unsfltconst -___ufloathiqf2: - ldi sp,ar2 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 -ufloathiqf2n: - .if .BIGMODEL -#ifdef _TMS320C4x - ldpk @___unsfltconst -#else - ldp @___unsfltconst -#endif - .endif - ldf @___unsfltconst,r2 - float r0 - bge uflt1 - addf r2,r0 -uflt1: - float r1 - bge uflt2 - addf r2,r1 -uflt2: -#ifdef _TMS320C4x - pop r3 - bd r3 - mpyf r2,r1 - addf r1,r0 - nop -#else - ldf r1,r3 - and 0ffh,r3 - norm r3,r3 - mpyf r2,r3 - pop ar2 - bd ar2 - addf r3,r0 - mpyf r2,r1 - addf r1,r0 -#endif -#endif - -; -; long double to signed long long converion -; input in r2 -; result in r0,r1 -; -#ifdef L_fix_trunchfhi2 - .text - .global ___fix_trunchfhi2 - .ref ufix_trunchfhi2n -___fix_trunchfhi2: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldf *-ar0(2), r2 - ldi *-ar0(1), r2 - .endif - - cmpf 0.0,r2 - bge ufix_trunchfhi2n - negf r2 - call ufix_trunchfhi2n - negi r0 - negb r1 - rets -#endif - -; -; long double to unsigned long long converion -; input in r2 -; result in r0,r1 -; -#ifdef L_ufix_trunchfhi2 - .text - .global ___ufix_trunchfhi2 - .global ufix_trunchfhi2n -___ufix_trunchfhi2: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldf *-ar0(2), r2 - ldi *-ar0(1), r2 - .endif - -ufix_trunchfhi2n: - cmpf 0.0,r2 - ble ufixh1 - pushf r2 - pop r3 - ash -24,r3 - subi 31,r3 - cmpi 32,r3 - bge ufixh1 - cmpi -32,r3 - ble ufixh1 - ldi 1,r0 - ash 31,r0 - or3 r0,r2,r0 - ldi r0,r1 - lsh3 r3,r0,r0 - subi 32,r3 - cmpi -32,r3 - ldile 0,r1 - lsh3 r3,r1,r1 - rets -ufixh1: - ldi 0,r0 - ldi 0,r1 - rets -#endif - -; -; signed long long to long double converion -; input on stack -; result in r0 -; -#ifdef L_floathihf2 - .text - .global ___floathihf2 - .ref ufloathihf2n -___floathihf2: - ldi sp,ar2 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 - bge ufloathihf2n - negi r0 - negb r1 - call ufloathihf2n - negf r0 - rets -#endif - -; -; unsigned long long to double converion -; input on stack -; result in r0 -; -#ifdef L_ufloathihf2 - .text - .global ___ufloathihf2 - .global ufloathihf2n - .ref ___unsfltconst -___ufloathihf2: - ldi sp,ar2 - ldi *-ar2(2),r0 - ldi *-ar2(1),r1 -ufloathihf2n - .if .BIGMODEL -#ifdef _TMS320C4x - ldpk @___unsfltconst -#else - ldp @___unsfltconst -#endif - .endif - ldf @___unsfltconst,r2 - float r0 - bge uflth1 - addf r2,r0 -uflth1: - float r1 - bge uflth2 - addf r2,r1 -uflth2: -#ifdef _TMS320C4x - pop r3 - bd r3 - mpyf r2,r1 - addf r1,r0 - nop -#else - ldf r1,r3 - and 0ffh,r3 - norm r3,r3 - mpyf r2,r3 - pop ar2 - bd ar2 - addf r3,r0 - mpyf r2,r1 - addf r1,r0 -#endif -#endif - -; -; calculate ffs -; input in ar2 -; result in r0 -; -#ifdef L_ffs - .global ___ffs - .ref ___unsfltconst - .text -___ffs: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldi *-ar0(1), ar2 - .endif - - negi ar2,r0 - and ar2,r0 - float r0,r0 - ldfu 0.0,r1 - .if .BIGMODEL -#ifdef _TMS320C4x - ldpk @___unsfltconst -#else - ldp @___unsfltconst -#endif - .endif - ldflt @___unsfltconst,r1 - addf r1,r0 - pushf r0 - pop r0 - pop ar0 - bd ar0 - ash -24,r0 - ldilt -1,r0 - addi 1,r0 -#endif - -; -; calculate long double * long double -; input in r2, r3 -; output in r0 -; -#ifdef L_mulhf3 - .global ___mulhf3 - .text -___mulhf3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldf *-ar0(2), r2 - ldi *-ar0(1), r2 - ldf *-ar0(4), r3 - ldi *-ar0(3), r3 - .endif - - pop ar2 ; return ad - ldf r2,r0 ; copy lsb0 - ldf r3,r1 ; copy lsb1 - and 0ffh,r0 ; mask lsb0 - and 0ffh,r1 ; mask lsb1 - norm r0,r0 ; correct lsb0 - norm r1,r1 ; correct lsb1 - mpyf r2,r1 ; arg0*lsb1 - mpyf r3,r0 ; arg1*lsb0 - bd ar2 ; return (delayed) - addf r0,r1 ; arg0*lsb1 + arg1*lsb0 - mpyf r2,r3,r0 ; msb0*msb1 - addf r1,r0 ; msb0*msb1 + arg0*lsb1 + arg1*lsb0 -#endif - -; -; calculate long double / long double -; r2 dividend, r3 divisor, r0 quotient -; -#ifdef L_divhf3 - .global ___divhf3 - .text -___divhf3: - .if .REGPARM == 0 -#ifdef _TMS320C4x - lda sp,ar0 -#else - ldiu sp,ar0 -#endif - ldf *-ar0(2), r2 - ldi *-ar0(1), r2 - ldf *-ar0(4), r3 - ldi *-ar0(3), r3 - .endif - -#ifdef _TMS320C4x - pop ar1 - rcpf r3, r0 - mpyf3 r0, r3, r1 - subrf 2.0, r1 - mpyf r1, r0 - mpyf3 r0, r3, r1 - bud ar1 - subrf 2.0, r1 - mpyf r1, r0 - mpyf r2, r0 -#else - pop ar1 - pushf r3 - pop r0 - not r0 - push r0 - popf r0 - ldf -1.0, r1 - xor r1, r0 - - mpyf3 r0, r3, r1 ; r1 = r[0] * v - subrf 2.0, r1 ; r1 = 2.0 - r[0] * v - mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1] -; End of 1st iteration - - mpyf3 r0, r3, r1 ; r1 = r[1] * v - subrf 2.0, r1 ; r1 = 2.0 - r[1] * v - mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2] -; End of 2nd iteration - - mpyf3 r0, r3, r1 ; r1 = r[2] * v - subrf 2.0, r1 ; r1 = 2.0 - r[2] * v - mpyf r1, r0 ; r0 = r[2] * (2.0 - r[2] * v) = r[3] -; End of 3rd iteration - - or 080h, r0 - rnd r0 - -; mpyf3 r0, r3, r1 ; r1 = r[3] * v - push r4 - pushf r4 - mpyf r0, r3, r1 - - ldf r0, r4 - and 0ffh, r4 - norm r4, r4 - mpyf r3, r4 - addf r4, r1 - - ldf r3, r4 - and 0ffh, r4 - norm r4, r4 - mpyf r0, r4 - addf r4, r1 - - subrf 2.0, r1 ; r1 = 2.0 - r[3] * v - - mpyf r1, r0, r3 ; r3 = r[3] * (2.0 - r[3] * v) = r[5] - - ldf r1, r4 - and 0ffh, r4 - norm r4, r4 - mpyf r0, r4 - addf r4, r3 - - ldf r0, r4 - and 0ffh, r4 - norm r4, r4 - mpyf r1, r4 - addf r4, r3 - - mpyf r2, r3, r0 ; Multiply by the dividend - - ldf r2, r4 - and 0ffh, r4 - norm r4, r4 - mpyf r3, r4 - addf r4, r0 - - ldf r3, r4 - and 0ffh, r4 - norm r4, r4 - mpyf r2, r4 - bd ar1 - addf r4, r0 - - popf r4 - pop r4 -#endif -#endif |