summaryrefslogtreecommitdiff
path: root/gcc/config/c4x/libgcc.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/c4x/libgcc.S')
-rwxr-xr-xgcc/config/c4x/libgcc.S1501
1 files changed, 0 insertions, 1501 deletions
diff --git a/gcc/config/c4x/libgcc.S b/gcc/config/c4x/libgcc.S
deleted file mode 100755
index fb79cf8..0000000
--- a/gcc/config/c4x/libgcc.S
+++ /dev/null
@@ -1,1501 +0,0 @@
-/* libgcc1 routines for the Texas Instruments TMS320C[34]x
- Copyright (C) 1997,98 Free Software Foundation, Inc.
-
- Contributed by Michael Hayes (m.hayes@elec.canterbury.cri.nz)
- and Herman Ten Brugge (Haj.Ten.Brugge@net.HCC.nl).
-
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file. (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-/* As a special exception, if you link this library with files
- compiled with GCC to produce an executable, this does not cause
- the resulting executable to be covered by the GNU General Public License.
- This exception does not however invalidate any other reasons why
- the executable file might be covered by the GNU General Public License. */
-
-
-; These routines are called using the standard TI register argument
-; passing model.
-; The following registers do not have to be saved:
-; r0, r1, r2, r3, ar0, ar1, ar2, ir0, ir1, bk, rs, rc, re, (r9, r10, r11)
-;
-; Perform floating point divqf3
-;
-; This routine performs a reciprocal of the divisor using the method
-; described in the C30/C40 user manuals. It then multiplies that
-; result by the dividend.
-;
-; Let r be the reciprocal of the divisor v and let the ith estimate
-; of r be denoted by r[i]. An iterative approach can be used to
-; improve the estimate of r, given an initial estimate r[0], where
-;
-; r[i + 1] = r[i] * (2.0 - v * r[i])
-;
-; The normalised error e[i] at the ith iteration is
-;
-; e[i] = (r - r[i]) / r = (1 / v - r[i]) * v = (1 - v * r[i])
-;
-; Note that
-;
-; e[i + 1] = (1 - v * r[i + 1]) = 1 - 2 * v * r[i] + v^2 + (r[i])^2
-; = (1 - v * r[i])^2 = (e[i])^2
-
-; r2 dividend, r3 divisor, r0 quotient
-; clobbers r1, ar1
-#ifdef L_divqf3
- .text
- .global ___divqf3
-___divqf3:
-
-#ifdef _TMS320C4x
- .if .REGPARM == 0
- lda sp,ar0
- ldf *-ar0(2), r3
- .endif
-
- pop ar1 ; Pop return address
-
-; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor
- rcpf r3, r0 ; Compute initial estimate r[0]
-
- mpyf3 r0, r3, r1 ; r1 = r[0] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[0] * v
- mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
-; End of 1st iteration (16 bits accuracy)
-
- mpyf3 r0, r3, r1 ; r1 = r[1] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[1] * v
-
- bud ar1 ; Delayed branch
- mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
-; End of 2nd iteration (32 bits accuracy)
- .if .REGPARM == 0
- mpyf *-ar0(1), r0 ; Multiply by the dividend
- .else
- mpyf r2, r0 ; Multiply by the dividend
- .endif
- rnd r0
- ; Branch occurs here
-#else
- .if .REGPARM == 0
- ldiu sp,ar0
- ldf *-ar0(2), r3
- .endif
-
- pop ar1 ; Pop return address
-
-; Initial estimate r[0] = 1.0 * 2^(-e - 1)
-; where v = m * 2^e
-
-; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor
-
-; Calculate initial estimate r[0]
- pushf r3
- pop r0
- not r0 ; r0 = -e
- ; complement exponent = -e -1
- ; complement sign (side effect)
- ; complement mantissa (almost 3 bit accurate)
- push r0
- popf r0 ; r0 = 1.0 * e^(-e - 1) + inverted mantissa
- ldf -1.0, r1 ; undo complement sign bit
- xor r1, r0
-
- mpyf3 r0, r3, r1 ; r1 = r[0] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[0] * v
- mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
-; End of 1st iteration
-
- mpyf3 r0, r3, r1 ; r1 = r[1] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[1] * v
- mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
-; End of 2nd iteration
-
- mpyf3 r0, r3, r1 ; r1 = r[2] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[2] * v
- mpyf r1, r0 ; r0 = r[2] * (2.0 - r[2] * v) = r[3]
-; End of 3rd iteration
-
- or 080h, r0 ; add 1 lsb to result. needed when complemeting
- ; 1.0 / 2.0
- rnd r0
-
-; Use modified last iteration
-; r[4] = (r[3] * (1.0 - (v * r[3]))) + r[3]
- mpyf3 r0, r3, r1 ; r1 = r[3] * v
- subrf 1.0, r1 ; r1 = 1.0 - r[3] * v
- mpyf r0, r1 ; r1 = r[3] * (1.0 - r[3] * v)
-
- bud ar1 ; Delayed branch
- addf r1, r0 ; r0 = r[3] * (1.0 - r[3] * v) + r[3] = r[4]
- .if .REGPARM == 0
- mpyf *-ar0(1), r0 ; Multiply by the dividend
- .else
- mpyf r2, r0 ; Multiply by the dividend
- .endif
- rnd r0
- ; Branch occurs here
-#endif
-
-#endif
-;
-; Integer signed division
-;
-; ar2 dividend, r2 divisor, r0 quotient
-; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re
-#ifdef L_divqi3
- .text
- .global ___divqi3
- .ref udivqi3n
-___divqi3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
- xor3 ar2, r2, r3 ; Get the sign
- absi ar2, r0
- bvd divq32
- ldi r0, ar2
- absi r2, r2
- cmpi ar2, r2 ; Divisor > dividend?
-
- pop ir1
- bhid zero ; If so, return 0
-
-;
-; Normalize oeprands. Use difference exponents as shift count
-; for divisor, and as repeat count for "subc"
-;
- float ar2, r1 ; Normalize dividend
- pushf r1 ; Get as integer
- pop ar0
- lsh -24, ar0 ; Get exponent
-
- float r2, r1 ; Normalize divisor
- pushf r1 ; Get as integer
- pop ir0
- lsh -24, ir0 ; Get exponent
-
- subi ir0, ar0 ; Get difference of exponents
- lsh ar0, r2 ; Align divisor with dividend
-
-;
-; Do count + 1 subtracts and shifts
-;
- rpts ar0
- subc r2, ar2
-
-;
-; Mask off the lower count+1 bits of ar2
-;
- subri 31, ar0 ; Shift count is (32 - (ar0 + 1))
- lsh ar0, ar2 ; Shift left
- negi ar0, ar0
- lsh3 ar0, ar2, r0 ; Shift right and put result in r0
-
-;
-; Check sign and negate result if necessary
-;
- bud ir1 ; Delayed return
- negi r0, r1 ; Negate result
- ash -31, r3 ; Check sign
- ldinz r1, r0 ; If set, use negative result
- ; Branch occurs here
-
-zero: bud ir1 ; Delayed branch
- ldi 0, r0
- nop
- nop
- ; Branch occurs here
-;
-; special case where ar2 = abs(ar2) = 0x80000000. We handle this by
-; calling unsigned divide and negating the result if necessary.
-;
-divq32:
- push r3 ; Save sign
- call udivqi3n
- pop r3
- pop ir1
- bd ir1
- negi r0, r1 ; Negate result
- ash -31, r3 ; Check sign
- ldinz r1, r0 ; If set, use negative result
- ; Branch occurs here
-#endif
-;
-;
-; ar2 dividend, r2 divisor, r0 quotient,
-; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re
-#ifdef L_udivqi3
- .text
- .global ___udivqi3
- .global udivqi3n
-___udivqi3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
-udivqi3n:
- pop ir1
-
- cmpi ar2, r2 ; If divisor > dividend
- bhi qzero ; return zero
- ldi r2, ar1 ; Store divisor in ar1
-
- tstb ar2, ar2 ; Check top bit, jump if set to special handler
- bld div_32 ; Delayed branch
-
-;
-; Get divisor exponent
-;
- float ar1, r1 ; Normalize the divisor
- pushf r1 ; Get into int register
- pop rc
- ; branch occurs here
-
- bzd qzero ; if (float) divisor zero, return zero
-
- float ar2, r1 ; Normalize the dividend
- pushf r1 ; Get into int register
- pop ar0
- lsh -24, ar0 ; Get both the exponents
- lsh -24, rc
-
- subi rc, ar0 ; Get the difference between the exponents
- lsh ar0, ar1 ; Normalize the divisor with the dividend
-
-;
-; Do count_1 subtracts and shifts
-;
- rpts ar0
- subc ar1, ar2
-
-;
-; mask off the lower count+1 bits
-;
- subri 31, ar0 ; Shift count (31 - (ar0+1))
- bud ir1 ; Delayed return
- lsh3 ar0, ar2, r0
- negi ar0, ar0
- lsh ar0, r0
- ; Branch occurs here
-
-;
-; Handle a full 32-bit dividend
-;
-div_32: tstb ar1, ar1
- bld qone ; if divisor high bit is one, the result is one
- lsh -24, rc
- subri 31, rc
- lsh rc, ar1 ; Line up the divisor
-
-;
-; Now divisor and dividend are aligned. Do first SUBC by hand, save
-; of the forst quotient digit. Then, shift divisor right rather
-; than shifting dividend left. This leaves a zero in the top bit of
-; the divident
-;
- ldi 1, ar0 ; Initizialize MSB of quotient
- lsh rc, ar0 ; create a mask for MSBs
- subi 1, ar0 ; mask is (2 << count) - 1
-
- subi3 ar1, ar2, r1
- ldihs r1, ar2
- ldihs 1, r1
- ldilo 0, r1
- lsh rc, r1
-
- lsh -1, ar1
- subi 1, rc
-;
-; do the rest of the shifts and subtracts
-;
- rpts rc
- subc ar1, ar2
-
- bud ir1
- and ar0, ar2
- or3 r1, ar2, r0
- nop
-
-qone:
- bud ir1
- ldi 1, r0
- nop
- nop
-
-qzero:
- bud ir1
- ldi 0, r0
- nop
- nop
-#endif
-
-#ifdef L_umodqi3
- .text
- .global ___umodqi3
- .global umodqi3n
-___umodqi3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
-umodqi3n:
- pop ir1 ; return address
- cmpi ar2, r2 ; divisor > dividend ?
- bhi uzero ; if so, return dividend
- ldi r2, ar1 ; load divisor
-;
-; If top bit of dividend is set, handle specially.
-;
- tstb ar2, ar2 ; check top bit
- bld umod_32 ; get divisor exponent, then jump.
-;
-; Get divisor exponent by converting to float.
-;
- float ar1, r1 ; normalize divisor
- pushf r1 ; push as float
- pop rc ; pop as int to get exponent
- bzd uzero ; if (float)divisor was zero, return
-;
-; 31 or less bits in dividend. Get dividend exponent.
-;
- float ar2, r1 ; normalize dividend
- pushf r1 ; push as float
- pop ar0 ; pop as int to get exponent
-;
-; Use difference in exponents as shift count to line up MSBs.
-;
- lsh -24, rc ; divisor exponent
- lsh -24, ar0 ; dividend exponent
- subi rc, ar0 ; difference
- lsh ar0, ar1 ; shift divisor up
-;
-; Do COUNT+1 subtract & shifts.
-;
- rpts ar0
- subc ar1, ar2
-;
-; Remainder is in upper 31-COUNT bits.
-;
- bud ir1 ; delayed branch to return
- addi 1, ar0 ; shift count is COUNT+1
- negi ar0, ar0 ; negate for right shift
- lsh3 ar0, ar2, r0 ; shift to get result
- ; Return occurs here
-
-;
-; The following code handles cases of a full 32-bit dividend. Before
-; SUBC can be used, the top bit must be cleared (otherwise SUBC can
-; possibly shift a significant 1 out the top of the dividend). This
-; is accomplished by first doing a normal subtraction, then proceeding
-; with SUBCs.
-;
-umod_32:
-;
-; If the top bit of the divisor is set too, the remainder is simply
-; the difference between the dividend and divisor. Otherwise, shift
-; the divisor up to line up the MSBs.
-;
- tstb ar1, ar1 ; check divisor
- bld uone ; if negative, remainder is diff
-
- lsh -24, rc ; divisor exponent
- subri 31, rc ; shift count = 31 - exp
- negi rc, ar0 ; used later as shift count
- lsh rc, ar1 ; shift up to line up MSBs
-;
-; Now MSBs are aligned. Do first SUBC by hand using a plain subtraction.
-; Then, shift divisor right rather than shifting dividend left. This leaves
-; a 0 in the top bit of the dividend.
-;
- subi3 ar1, ar2, r1 ; subtract
- ldihs r1, ar2 ; if positive, replace dividend
- subi 1, rc ; first iteration is done
- lsh -1, ar1 ; shift divisor down
-;
-; Do EXP subtract & shifts.
-;
- rpts rc
- subc ar1, ar2
-;
-; Quotient is in EXP+1 LSBs; shift remainder (in MSBs) down.
-;
- bud ir1
- lsh3 ar0, ar2, r0 ; COUNT contains -(EXP+1)
- nop
- nop
-;
-; Return (dividend - divisor).
-;
-uone: bud ir1
- subi3 r2, ar2, r0
- nop
- nop
-;
-; Return dividend.
-;
-uzero: bud ir1
- ldi ar2, r0 ; set status from result
- nop
- nop
-#endif
-
-#ifdef L_modqi3
- .text
- .global ___modqi3
- .ref umodqi3n
-___modqi3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
-;
-; Determine sign of result. Get absolute value of operands.
-;
- ldi ar2, ar0 ; sign of result same as dividend
- absi ar2, r0 ; make dividend positive
- bvd mod_32 ; if still negative, escape
- absi r2, r1 ; make divisor positive
- ldi r1, ar1 ; save in ar1
- cmpi r0, ar1 ; divisor > dividend ?
-
- pop ir1 ; return address
- bhid return ; if so, return dividend
-;
-; Normalize operands. Use difference in exponents as shift count
-; for divisor, and as repeat count for SUBC.
-;
- float r1, r1 ; normalize divisor
- pushf r1 ; push as float
- pop rc ; pop as int
- bzd return ; if (float)divisor was zero, return
-
- float r0, r1 ; normalize dividend
- pushf r1 ; push as float
- pop r1 ; pop as int
-
- lsh -24, rc ; get divisor exponent
- lsh -24, r1 ; get dividend exponent
- subi rc, r1 ; get difference in exponents
- lsh r1, ar1 ; align divisor with dividend
-;
-; Do COUNT+1 subtract & shifts.
-;
- rpts r1
- subc ar1, r0
-;
-; Remainder is in upper bits of R0
-;
- addi 1, r1 ; shift count is -(r1+1)
- negi r1, r1
- lsh r1, r0 ; shift right
-;
-; Check sign and negate result if necessary.
-;
-return:
- bud ir1 ; delayed branch to return
- negi r0, r1 ; negate result
- cmpi 0, ar0 ; check sign
- ldin r1, r0 ; if set, use negative result
- ; Return occurs here
-;
-; The following code handles cases of a full 32-bit dividend. This occurs
-; when R0 = abs(R0) = 080000000h. Handle this by calling the unsigned mod
-; function, then negating the result if necessary.
-;
-mod_32:
- push ar0 ; remember sign
- call umodqi3n ; do divide
-
- brd return ; return
- pop ar0 ; restore sign
- pop ir1 ; return address
- nop
-#endif
-
-#ifdef L_unsfltconst
- .section .const
- .global ___unsfltconst
-___unsfltconst: .float 4294967296.0
-#endif
-
-#ifdef L_unsfltcompare
- .section .const
- .global ___unsfltcompare
-___unsfltcompare: .float 2147483648.0
-#endif
-
-; Integer 32-bit signed multiplication
-;
-; The TMS320C3x MPYI instruction takes two 24-bit signed integers
-; and produces a 48-bit signed result which is truncated to 32-bits.
-;
-; A 32-bit by 32-bit multiplication thus requires a number of steps.
-;
-; Consider the product of two 32-bit signed integers,
-;
-; z = x * y
-;
-; where x = (b << 16) + a, y = (d << 16) + c
-;
-; This can be expressed as
-;
-; z = ((b << 16) + a) * ((d << 16) + c)
-;
-; = ((b * d) << 32) + ((b * c + a * d) << 16) + a * c
-;
-; Let z = (f << 16) + e where f < (1 << 16).
-;
-; Since we are only interested in a 32-bit result, we can ignore the
-; (b * d) << 32 term, and thus
-;
-; f = b * c + a * d, e = a * c
-;
-; We can simplify things if we have some a priori knowledge of the
-; operands, for example, if -32768 <= y <= 32767, then y = c and d = 0 and thus
-;
-; f = b * c, e = a * c
-;
-; ar2 multiplier, r2 multiplicand, r0 product
-; clobbers r1, r2, r3
-#ifdef L_mulqi3
- .text
- .global ___mulqi3
-___mulqi3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
- pop ir1 ; return address
- ldi ar2, r0 ;
- and 0ffffh, r0 ; a
- lsh -16, ar2 ; b
- ldi r2, r3 ;
- and 0ffffh, r3 ; c
- mpyi r3, ar2 ; c * b
- lsh -16, r2 ; d
- mpyi r0, r2 ; a * d
- addi ar2, r2 ; c * b + a * d
- bd ir1 ; delayed branch to return
- lsh 16, r2 ; (c * b + a * d) << 16
- mpyi r3, r0 ; a * c
- addi r2, r0 ; a * c + (c * b + a * d) << 16
-; branch occurs here
-
-#endif
-
-;
-; Integer 64 by 64 multiply
-; long1 and long2 on stack
-; result in r0,r1
-;
-#ifdef L_mulhi3
- .text
- .global ___mulhi3
-#ifdef _TMS320C4x
-___mulhi3:
- pop ar0
- ldi sp,ar2
- ldi *-ar2(1),r2
- ldi *-ar2(3),r3
- mpyi3 r2,r3,r0
- mpyuhi3 r2,r3,r1
- mpyi *-ar2(2),r2
- bd ar0
- mpyi *-ar2(0),r3
- addi r2,r1
- addi r3,r1
-#else
-___mulhi3:
- ldi sp,ar2
- ldi -16,rs
- ldi *-ar2(2),ar0
- ldi *-ar2(4),ar1
- ldi ar0,r2
- and 0ffffh,r2
- ldi ar1,r3
- and 0ffffh,r3
- lsh rs,ar0
- lsh rs,ar1
-
- mpyi r2,r3,r0
- mpyi ar0,ar1,r1
- mpyi r2,ar1,rc
- lsh rs,rc,re
- addi re,r1
- lsh 16,rc
- addi rc,r0
- addc 0,r1
- mpyi r3,ar0,rc
- lsh rs,rc,re
- addi re,r1
- lsh 16,rc
- addi rc,r0
- addc 0,r1
-
- ldi *-ar2(1),ar0
- ldi ar0,r2
- and 0ffffh,r2
- lsh rs,ar0
- mpyi r2,r3,rc
- addi rc,r1
- mpyi r2,ar1,rc
- mpyi r3,ar0,re
- addi re,rc
- lsh 16,rc
- addi rc,r1
-
- ldi *-ar2(2),ar0
- ldi *-ar2(3),ar1
- ldi ar0,r2
- and 0ffffh,r2
- ldi ar1,r3
- and 0ffffh,r3
- lsh rs,ar0
- lsh rs,ar1
- mpyi r2,r3,rc
- addi rc,r1
- mpyi r2,ar1,rc
- mpyi r3,ar0,re
- pop ar0
- bd ar0
- addi re,rc
- lsh 16,rc
- addi rc,r1
-#endif
-#endif
-
-;
-; Integer 32 by 32 multiply highpart unsigned
-; src1 in ar2
-; src2 in r2
-; result in r0
-;
-#ifdef L_umulhi3_high
- .text
- .global ___umulhi3_high
-___umulhi3_high:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
- ldi -16,rs
- ldi r2,r3
- and 0ffffh,r2
- ldi ar2,ar1
- and 0ffffh,ar2
- lsh rs,r3
- lsh rs,ar1
-
- mpyi ar2,r2,r1
- mpyi ar1,r3,r0
- mpyi ar2,r3,rc
- lsh rs,rc,re
- addi re,r0
- lsh 16,rc
- addi rc,r1
- addc 0,r0
- mpyi r2,ar1,rc
- lsh rs,rc,re
- addi re,r0
- pop ar0
- bd ar0
- lsh 16,rc
- addi rc,r1
- addc 0,r0
-#endif
-
-;
-; Integer 32 by 32 multiply highpart signed
-; src1 in ar2
-; src2 in r2
-; result in r0
-;
-#ifdef L_smulhi3_high
- .text
- .global ___smulhi3_high
-___smulhi3_high:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- ldi *-ar0(2), r2
- .endif
-
- ldi -16,rs
- ldi 0,rc
- subi3 ar2,rc,r0
- ldi r2,r3
- ldilt r0,rc
- subi3 r2,rc,r0
- ldi ar2,ar1
- tstb ar1,ar1
- ldilt r0,rc
- and 0ffffh,r2
- and 0ffffh,ar2
- lsh rs,r3
- lsh rs,ar1
-
- mpyi ar2,r2,r1
- mpyi ar1,r3,r0
- addi rc,r0
- mpyi ar2,r3,rc
- lsh rs,rc,re
- addi re,r0
- lsh 16,rc
- addi rc,r1
- addc 0,r0
- mpyi r2,ar1,rc
- lsh rs,rc,re
- addi re,r0
- pop ar0
- bd ar0
- lsh 16,rc
- addi rc,r1
- addc 0,r0
-#endif
-
-;
-; Integer 64 by 64 unsigned divide
-; long1 and long2 on stack
-; divide in r0,r1
-; modulo in r2,r3
-; routine takes a maximum of 64*9+21=597 cycles = 24 us @ 50Mhz
-;
-#ifdef L_udivhi3
- .text
- .global ___udivhi3
- .global ___udivide
- .global ___umodulo
- .ref udivqi3n
- .ref umodqi3n
-___udivhi3:
- ldi sp,ar2
- ldi *-ar2(4),ar0
- ldi *-ar2(3),ar1
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
-
-___udivide:
- or r1,ar1,r2
- bne udiv0
- ldi ar0,r2
- ldi r0,ar2
- call udivqi3n
- ldiu 0,r1
- rets
-
-___umodulo:
- or r1,ar1,r2
- bne udiv0
- ldi ar0,r2
- ldi r0,ar2
- call umodqi3n
- ldi r0,r2
- ldiu 0,r3
- rets
-
-udiv0:
- tstb ar1,ar1
- bne udiv1
- tstb ar0,ar0
- bn udiv1
-
- ldiu 63,rc
-#ifdef _TMS320C4x
- rptbd udivend0
- ldiu 0,r2
- addi r0,r0
- rolc r1
-#else
- ldiu 0,r2
- addi r0,r0
- rolc r1
- rptb udivend0
-#endif
-
- rolc r2
- subi3 ar0,r2,r3
- xor 1,st
- ldic r3,r2
- rolc r0
-udivend0:
- rolc r1
-
- ldiu 0,r3
- rets
-udiv1:
- push r4
- push r5
- ldiu 63,rc
- ldiu 0,r2
-#ifdef _TMS320C4x
- rptbd udivend1
- ldiu 0,r3
- addi r0,r0
- rolc r1
-#else
- ldiu 0,r3
- addi r0,r0
- rolc r1
- rptb udivend1
-#endif
-
- rolc r2
- rolc r3
- subi3 ar0,r2,r4
- subb3 ar1,r3,r5
- xor 1,st
- ldic r4,r2
- ldic r5,r3
- rolc r0
-udivend1:
- rolc r1
-
- pop r5
- pop r4
- rets
-#endif
-
-;
-; Integer 64 by 64 unsigned modulo
-; long1 and long2 on stack
-; result in r0,r1
-;
-#ifdef L_umodhi3
- .text
- .global ___umodhi3
- .ref ___modulo
-___umodhi3:
- ldi sp,ar2
- ldi *-ar2(4),ar0
- ldi *-ar2(3),ar1
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
- call ___umodulo
- pop ar0
- bd ar0
- ldi r2,r0
- ldi r3,r1
- nop
-#endif
-
-;
-; Integer 64 by 64 signed divide
-; long1 and long2 on stack
-; result in r0,r1
-;
-#ifdef L_divhi3
- .text
- .global ___divhi3
- .ref ___udivide
-___divhi3:
- ldi 0,ir0
- ldi sp,ar2
- ldi *-ar2(4),r0
- ldi *-ar2(3),r1
- bge div1
- negi ir0
- negi r0
- negb r1
-div1:
- ldi r0,ar0
- ldi r1,ar1
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
- bge div2
- negi ir0
- negi r0
- negb r1
-div2:
- call ___udivide
- tstb ir0,ir0
- bge div3
- negi r0
- negb r1
-div3:
- rets
-#endif
-
-;
-; Integer 64 by 64 signed modulo
-; long1 and long2 on stack
-; result in r0,r1
-;
-#ifdef L_modhi3
- .text
- .global ___modhi3
- .ref ___umodulo
-___modhi3:
- ldi 0,ir0
- ldi sp,ar2
- ldi *-ar2(4),r0
- ldi *-ar2(3),r1
- bge mod1
- negi ir0
- negi r0
- negb r1
-mod1:
- ldi r0,ar0
- ldi r1,ar1
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
- bge mod2
- negi ir0
- negi r0
- negb r1
-mod2:
- call ___umodulo
- ldi r2,r0
- ldi r3,r1
- tstb ir0,ir0
- bge mod3
- negi r0
- negb r1
-mod3:
- rets
-#endif
-
-;
-; double to signed long long converion
-; input in r2
-; result in r0,r1
-;
-#ifdef L_fix_truncqfhi2
- .text
- .global ___fix_truncqfhi2
- .ref ufix_truncqfhi2n
-___fix_truncqfhi2:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldf *-ar0(1), r2
- .endif
-
- cmpf 0.0,r2
- bge ufix_truncqfhi2n
- negf r2
- call ufix_truncqfhi2n
- negi r0
- negb r1
- rets
-#endif
-
-;
-; double to unsigned long long converion
-; input in r2
-; result in r0,r1
-;
-#ifdef L_ufix_truncqfhi2
- .text
- .global ___ufix_truncqfhi2
- .global ufix_truncqfhi2n
-___ufix_truncqfhi2:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldf *-ar0(1), r2
- .endif
-
-ufix_truncqfhi2n:
- cmpf 0.0,r2
- ble ufix1
- pushf r2
- pop r3
- ash -24,r3
- subi 31,r3
- cmpi 32,r3
- bge ufix1
- cmpi -32,r3
- ble ufix1
- ldi 1,r0
- ash 31,r0
- or3 r0,r2,r0
- ldi r0,r1
- lsh3 r3,r0,r0
- subi 32,r3
- cmpi -32,r3
- ldile 0,r1
- lsh3 r3,r1,r1
- rets
-ufix1:
- ldi 0,r0
- ldi 0,r1
- rets
-#endif
-
-;
-; signed long long to double converion
-; input on stack
-; result in r0
-;
-#ifdef L_floathiqf2
- .text
- .global ___floathiqf2
- .ref ufloathiqf2n
-___floathiqf2:
- ldi sp,ar2
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
- bge ufloathiqf2n
- negi r0
- negb r1
- call ufloathiqf2n
- negf r0
- rets
-#endif
-
-;
-; unsigned long long to double converion
-; input on stack
-; result in r0
-;
-#ifdef L_ufloathiqf2
- .text
- .global ___ufloathiqf2
- .global ufloathiqf2n
- .ref ___unsfltconst
-___ufloathiqf2:
- ldi sp,ar2
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
-ufloathiqf2n:
- .if .BIGMODEL
-#ifdef _TMS320C4x
- ldpk @___unsfltconst
-#else
- ldp @___unsfltconst
-#endif
- .endif
- ldf @___unsfltconst,r2
- float r0
- bge uflt1
- addf r2,r0
-uflt1:
- float r1
- bge uflt2
- addf r2,r1
-uflt2:
-#ifdef _TMS320C4x
- pop r3
- bd r3
- mpyf r2,r1
- addf r1,r0
- nop
-#else
- ldf r1,r3
- and 0ffh,r3
- norm r3,r3
- mpyf r2,r3
- pop ar2
- bd ar2
- addf r3,r0
- mpyf r2,r1
- addf r1,r0
-#endif
-#endif
-
-;
-; long double to signed long long converion
-; input in r2
-; result in r0,r1
-;
-#ifdef L_fix_trunchfhi2
- .text
- .global ___fix_trunchfhi2
- .ref ufix_trunchfhi2n
-___fix_trunchfhi2:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldf *-ar0(2), r2
- ldi *-ar0(1), r2
- .endif
-
- cmpf 0.0,r2
- bge ufix_trunchfhi2n
- negf r2
- call ufix_trunchfhi2n
- negi r0
- negb r1
- rets
-#endif
-
-;
-; long double to unsigned long long converion
-; input in r2
-; result in r0,r1
-;
-#ifdef L_ufix_trunchfhi2
- .text
- .global ___ufix_trunchfhi2
- .global ufix_trunchfhi2n
-___ufix_trunchfhi2:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldf *-ar0(2), r2
- ldi *-ar0(1), r2
- .endif
-
-ufix_trunchfhi2n:
- cmpf 0.0,r2
- ble ufixh1
- pushf r2
- pop r3
- ash -24,r3
- subi 31,r3
- cmpi 32,r3
- bge ufixh1
- cmpi -32,r3
- ble ufixh1
- ldi 1,r0
- ash 31,r0
- or3 r0,r2,r0
- ldi r0,r1
- lsh3 r3,r0,r0
- subi 32,r3
- cmpi -32,r3
- ldile 0,r1
- lsh3 r3,r1,r1
- rets
-ufixh1:
- ldi 0,r0
- ldi 0,r1
- rets
-#endif
-
-;
-; signed long long to long double converion
-; input on stack
-; result in r0
-;
-#ifdef L_floathihf2
- .text
- .global ___floathihf2
- .ref ufloathihf2n
-___floathihf2:
- ldi sp,ar2
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
- bge ufloathihf2n
- negi r0
- negb r1
- call ufloathihf2n
- negf r0
- rets
-#endif
-
-;
-; unsigned long long to double converion
-; input on stack
-; result in r0
-;
-#ifdef L_ufloathihf2
- .text
- .global ___ufloathihf2
- .global ufloathihf2n
- .ref ___unsfltconst
-___ufloathihf2:
- ldi sp,ar2
- ldi *-ar2(2),r0
- ldi *-ar2(1),r1
-ufloathihf2n
- .if .BIGMODEL
-#ifdef _TMS320C4x
- ldpk @___unsfltconst
-#else
- ldp @___unsfltconst
-#endif
- .endif
- ldf @___unsfltconst,r2
- float r0
- bge uflth1
- addf r2,r0
-uflth1:
- float r1
- bge uflth2
- addf r2,r1
-uflth2:
-#ifdef _TMS320C4x
- pop r3
- bd r3
- mpyf r2,r1
- addf r1,r0
- nop
-#else
- ldf r1,r3
- and 0ffh,r3
- norm r3,r3
- mpyf r2,r3
- pop ar2
- bd ar2
- addf r3,r0
- mpyf r2,r1
- addf r1,r0
-#endif
-#endif
-
-;
-; calculate ffs
-; input in ar2
-; result in r0
-;
-#ifdef L_ffs
- .global ___ffs
- .ref ___unsfltconst
- .text
-___ffs:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldi *-ar0(1), ar2
- .endif
-
- negi ar2,r0
- and ar2,r0
- float r0,r0
- ldfu 0.0,r1
- .if .BIGMODEL
-#ifdef _TMS320C4x
- ldpk @___unsfltconst
-#else
- ldp @___unsfltconst
-#endif
- .endif
- ldflt @___unsfltconst,r1
- addf r1,r0
- pushf r0
- pop r0
- pop ar0
- bd ar0
- ash -24,r0
- ldilt -1,r0
- addi 1,r0
-#endif
-
-;
-; calculate long double * long double
-; input in r2, r3
-; output in r0
-;
-#ifdef L_mulhf3
- .global ___mulhf3
- .text
-___mulhf3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldf *-ar0(2), r2
- ldi *-ar0(1), r2
- ldf *-ar0(4), r3
- ldi *-ar0(3), r3
- .endif
-
- pop ar2 ; return ad
- ldf r2,r0 ; copy lsb0
- ldf r3,r1 ; copy lsb1
- and 0ffh,r0 ; mask lsb0
- and 0ffh,r1 ; mask lsb1
- norm r0,r0 ; correct lsb0
- norm r1,r1 ; correct lsb1
- mpyf r2,r1 ; arg0*lsb1
- mpyf r3,r0 ; arg1*lsb0
- bd ar2 ; return (delayed)
- addf r0,r1 ; arg0*lsb1 + arg1*lsb0
- mpyf r2,r3,r0 ; msb0*msb1
- addf r1,r0 ; msb0*msb1 + arg0*lsb1 + arg1*lsb0
-#endif
-
-;
-; calculate long double / long double
-; r2 dividend, r3 divisor, r0 quotient
-;
-#ifdef L_divhf3
- .global ___divhf3
- .text
-___divhf3:
- .if .REGPARM == 0
-#ifdef _TMS320C4x
- lda sp,ar0
-#else
- ldiu sp,ar0
-#endif
- ldf *-ar0(2), r2
- ldi *-ar0(1), r2
- ldf *-ar0(4), r3
- ldi *-ar0(3), r3
- .endif
-
-#ifdef _TMS320C4x
- pop ar1
- rcpf r3, r0
- mpyf3 r0, r3, r1
- subrf 2.0, r1
- mpyf r1, r0
- mpyf3 r0, r3, r1
- bud ar1
- subrf 2.0, r1
- mpyf r1, r0
- mpyf r2, r0
-#else
- pop ar1
- pushf r3
- pop r0
- not r0
- push r0
- popf r0
- ldf -1.0, r1
- xor r1, r0
-
- mpyf3 r0, r3, r1 ; r1 = r[0] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[0] * v
- mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
-; End of 1st iteration
-
- mpyf3 r0, r3, r1 ; r1 = r[1] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[1] * v
- mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
-; End of 2nd iteration
-
- mpyf3 r0, r3, r1 ; r1 = r[2] * v
- subrf 2.0, r1 ; r1 = 2.0 - r[2] * v
- mpyf r1, r0 ; r0 = r[2] * (2.0 - r[2] * v) = r[3]
-; End of 3rd iteration
-
- or 080h, r0
- rnd r0
-
-; mpyf3 r0, r3, r1 ; r1 = r[3] * v
- push r4
- pushf r4
- mpyf r0, r3, r1
-
- ldf r0, r4
- and 0ffh, r4
- norm r4, r4
- mpyf r3, r4
- addf r4, r1
-
- ldf r3, r4
- and 0ffh, r4
- norm r4, r4
- mpyf r0, r4
- addf r4, r1
-
- subrf 2.0, r1 ; r1 = 2.0 - r[3] * v
-
- mpyf r1, r0, r3 ; r3 = r[3] * (2.0 - r[3] * v) = r[5]
-
- ldf r1, r4
- and 0ffh, r4
- norm r4, r4
- mpyf r0, r4
- addf r4, r3
-
- ldf r0, r4
- and 0ffh, r4
- norm r4, r4
- mpyf r1, r4
- addf r4, r3
-
- mpyf r2, r3, r0 ; Multiply by the dividend
-
- ldf r2, r4
- and 0ffh, r4
- norm r4, r4
- mpyf r3, r4
- addf r4, r0
-
- ldf r3, r4
- and 0ffh, r4
- norm r4, r4
- mpyf r2, r4
- bd ar1
- addf r4, r0
-
- popf r4
- pop r4
-#endif
-#endif