summaryrefslogtreecommitdiff
path: root/gcc/config/pa/pa.c
diff options
context:
space:
mode:
authorYamaArashi <shadow962@live.com>2016-01-06 01:47:28 -0800
committerYamaArashi <shadow962@live.com>2016-01-06 01:47:28 -0800
commitbe8b04496302184c6e8f04d6179f9c3afc50aeb6 (patch)
tree726e2468c0c07add773c0dbd86ab6386844259ae /gcc/config/pa/pa.c
initial commit
Diffstat (limited to 'gcc/config/pa/pa.c')
-rwxr-xr-xgcc/config/pa/pa.c6491
1 files changed, 6491 insertions, 0 deletions
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
new file mode 100755
index 0000000..de7f698
--- /dev/null
+++ b/gcc/config/pa/pa.c
@@ -0,0 +1,6491 @@
+/* Subroutines for insn-output.c for HPPA.
+ Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.
+ Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include "config.h"
+#include "system.h"
+
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "reload.h"
+#include "c-tree.h"
+#include "expr.h"
+#include "obstack.h"
+#include "toplev.h"
+
+static void restore_unscaled_index_insn_codes PROTO((rtx));
+static void record_unscaled_index_insn_codes PROTO((rtx));
+static void pa_combine_instructions PROTO((rtx));
+static int pa_can_combine_p PROTO((rtx, rtx, rtx, int, rtx, rtx, rtx));
+static int forward_branch_p PROTO((rtx));
+static int shadd_constant_p PROTO((int));
+
+/* Save the operands last given to a compare for use when we
+ generate a scc or bcc insn. */
+
+rtx hppa_compare_op0, hppa_compare_op1;
+enum cmp_type hppa_branch_type;
+
+/* Which cpu we are scheduling for. */
+enum processor_type pa_cpu;
+
+/* String to hold which cpu we are scheduling for. */
+char *pa_cpu_string;
+
+/* Set by the FUNCTION_PROFILER macro. */
+int hp_profile_labelno;
+
+/* Counts for the number of callee-saved general and floating point
+ registers which were saved by the current function's prologue. */
+static int gr_saved, fr_saved;
+
+/* Whether or not the current function uses an out-of-line prologue
+ and epilogue. */
+static int out_of_line_prologue_epilogue;
+
+static rtx find_addr_reg ();
+
+/* Keep track of the number of bytes we have output in the CODE subspaces
+ during this compilation so we'll know when to emit inline long-calls. */
+
+unsigned int total_code_bytes;
+
+/* Variables to handle plabels that we discover are necessary at assembly
+ output time. They are output after the current function. */
+
+struct deferred_plabel
+{
+ rtx internal_label;
+ char *name;
+} *deferred_plabels = 0;
+int n_deferred_plabels = 0;
+
+/* Array indexed by INSN_UIDs holding the INSN_CODE of an insn which
+ uses an unscaled indexed address before delay slot scheduling. */
+static int *unscaled_index_insn_codes;
+
+/* Upper bound for the array. */
+static int max_unscaled_index_insn_codes_uid;
+
+void
+override_options ()
+{
+ /* Default to 7100 scheduling. If the 7100LC scheduling ever
+ gets reasonably tuned, it should be the default since that
+ what most PAs sold now are. */
+ if (pa_cpu_string == NULL
+ || ! strcmp (pa_cpu_string, "7100"))
+ {
+ pa_cpu_string = "7100";
+ pa_cpu = PROCESSOR_7100;
+ }
+ else if (! strcmp (pa_cpu_string, "700"))
+ {
+ pa_cpu_string = "700";
+ pa_cpu = PROCESSOR_700;
+ }
+ else if (! strcmp (pa_cpu_string, "7100LC"))
+ {
+ pa_cpu_string = "7100LC";
+ pa_cpu = PROCESSOR_7100LC;
+ }
+ else if (! strcmp (pa_cpu_string, "7200"))
+ {
+ pa_cpu_string = "7200";
+ pa_cpu = PROCESSOR_7200;
+ }
+ /* CYGNUS LOCAL PA8000/law */
+ else if (! strcmp (pa_cpu_string, "8000"))
+ {
+ pa_cpu_string = "8000";
+ pa_cpu = PROCESSOR_8000;
+ }
+ else
+ {
+ warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC, 7200 and 8000\n", pa_cpu_string);
+ }
+ /* END CYGNUS LOCAL */
+
+ if (flag_pic && TARGET_PORTABLE_RUNTIME)
+ {
+ warning ("PIC code generation is not supported in the portable runtime model\n");
+ }
+
+ if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
+ {
+ warning ("PIC code generation is not compatible with fast indirect calls\n");
+ }
+
+ if (flag_pic && profile_flag)
+ {
+ warning ("PIC code generation is not compatible with profiling\n");
+ }
+
+ if (TARGET_SPACE && (flag_pic || profile_flag))
+ {
+ warning ("Out of line entry/exit sequences are not compatible\n");
+ warning ("with PIC or profiling\n");
+ }
+
+ if (! TARGET_GAS && write_symbols != NO_DEBUG)
+ {
+ warning ("-g is only supported when using GAS on this processor,");
+ warning ("-g option disabled.");
+ write_symbols = NO_DEBUG;
+ }
+}
+
+
+/* Return non-zero only if OP is a register of mode MODE,
+ or CONST0_RTX. */
+int
+reg_or_0_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return (op == CONST0_RTX (mode) || register_operand (op, mode));
+}
+
+/* Return non-zero if OP is suitable for use in a call to a named
+ function.
+
+ (???) For 2.5 try to eliminate either call_operand_address or
+ function_label_operand, they perform very similar functions. */
+int
+call_operand_address (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
+}
+
+/* Return 1 if X contains a symbolic expression. We know these
+ expressions will have one of a few well defined forms, so
+ we need only check those forms. */
+int
+symbolic_expression_p (x)
+ register rtx x;
+{
+
+ /* Strip off any HIGH. */
+ if (GET_CODE (x) == HIGH)
+ x = XEXP (x, 0);
+
+ return (symbolic_operand (x, VOIDmode));
+}
+
+int
+symbolic_operand (op, mode)
+ register rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ switch (GET_CODE (op))
+ {
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return 1;
+ case CONST:
+ op = XEXP (op, 0);
+ return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+ || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+ && GET_CODE (XEXP (op, 1)) == CONST_INT);
+ default:
+ return 0;
+ }
+}
+
+/* Return truth value of statement that OP is a symbolic memory
+ operand of mode MODE. */
+
+int
+symbolic_memory_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (GET_CODE (op) != MEM)
+ return 0;
+ op = XEXP (op, 0);
+ return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
+ || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
+}
+
+/* Return 1 if the operand is either a register or a memory operand that is
+ not symbolic. */
+
+int
+reg_or_nonsymb_mem_operand (op, mode)
+ register rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ return 1;
+
+ if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
+ return 1;
+
+ return 0;
+}
+
+/* Return 1 if the operand is either a register, zero, or a memory operand
+ that is not symbolic. */
+
+int
+reg_or_0_or_nonsymb_mem_operand (op, mode)
+ register rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ return 1;
+
+ if (op == CONST0_RTX (mode))
+ return 1;
+
+ if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
+ return 1;
+
+ return 0;
+}
+
+/* Accept any constant that can be moved in one instructions into a
+ general register. */
+int
+cint_ok_for_move (intval)
+ HOST_WIDE_INT intval;
+{
+ /* OK if ldo, ldil, or zdepi, can be used. */
+ return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
+ || zdepi_cint_p (intval));
+}
+
+/* Accept anything that can be moved in one instruction into a general
+ register. */
+int
+move_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ return 1;
+
+ if (GET_CODE (op) == CONSTANT_P_RTX)
+ return 1;
+
+ if (GET_CODE (op) == CONST_INT)
+ return cint_ok_for_move (INTVAL (op));
+
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (GET_CODE (op) != MEM)
+ return 0;
+
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == LO_SUM)
+ return (register_operand (XEXP (op, 0), Pmode)
+ && CONSTANT_P (XEXP (op, 1)));
+
+ /* Since move_operand is only used for source operands, we can always
+ allow scaled indexing! */
+ if (! TARGET_DISABLE_INDEXING
+ && GET_CODE (op) == PLUS
+ && ((GET_CODE (XEXP (op, 0)) == MULT
+ && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
+ && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
+ && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
+ && GET_CODE (XEXP (op, 1)) == REG)
+ || (GET_CODE (XEXP (op, 1)) == MULT
+ &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
+ && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
+ && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
+ && GET_CODE (XEXP (op, 0)) == REG)))
+ return 1;
+
+ return memory_address_p (mode, op);
+}
+
+/* Accept REG and any CONST_INT that can be moved in one instruction into a
+ general register. */
+int
+reg_or_cint_move_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ return 1;
+
+ if (GET_CODE (op) == CONST_INT)
+ return cint_ok_for_move (INTVAL (op));
+
+ return 0;
+}
+
+int
+pic_label_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ if (!flag_pic)
+ return 0;
+
+ switch (GET_CODE (op))
+ {
+ case LABEL_REF:
+ return 1;
+ case CONST:
+ op = XEXP (op, 0);
+ return (GET_CODE (XEXP (op, 0)) == LABEL_REF
+ && GET_CODE (XEXP (op, 1)) == CONST_INT);
+ default:
+ return 0;
+ }
+}
+
+int
+fp_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return reg_renumber && FP_REG_P (op);
+}
+
+
+
+/* Return truth value of whether OP can be used as an operand in a
+ three operand arithmetic insn that accepts registers of mode MODE
+ or 14-bit signed integers. */
+int
+arith_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return (register_operand (op, mode)
+ || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
+}
+
+/* Return truth value of whether OP can be used as an operand in a
+ three operand arithmetic insn that accepts registers of mode MODE
+ or 11-bit signed integers. */
+int
+arith11_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return (register_operand (op, mode)
+ || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
+}
+
+/* A constant integer suitable for use in a PRE_MODIFY memory
+ reference. */
+int
+pre_cint_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT
+ && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
+}
+
+/* A constant integer suitable for use in a POST_MODIFY memory
+ reference. */
+int
+post_cint_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT
+ && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
+}
+
+int
+arith_double_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return (register_operand (op, mode)
+ || (GET_CODE (op) == CONST_DOUBLE
+ && GET_MODE (op) == mode
+ && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
+ && ((CONST_DOUBLE_HIGH (op) >= 0)
+ == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
+}
+
+/* Return truth value of whether OP is a integer which fits the
+ range constraining immediate operands in three-address insns, or
+ is an integer register. */
+
+int
+ireg_or_int5_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
+ || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
+}
+
+/* Return truth value of whether OP is a integer which fits the
+ range constraining immediate operands in three-address insns. */
+
+int
+int5_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
+}
+
+int
+uint5_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
+}
+
+int
+int11_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
+}
+
+int
+uint32_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+#if HOST_BITS_PER_WIDE_INT > 32
+ /* All allowed constants will fit a CONST_INT. */
+ return (GET_CODE (op) == CONST_INT
+ && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
+#else
+ return (GET_CODE (op) == CONST_INT
+ || (GET_CODE (op) == CONST_DOUBLE
+ && CONST_DOUBLE_HIGH (op) == 0));
+#endif
+}
+
+int
+arith5_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return register_operand (op, mode) || int5_operand (op, mode);
+}
+
+/* True iff zdepi can be used to generate this CONST_INT. */
+int
+zdepi_cint_p (x)
+ unsigned HOST_WIDE_INT x;
+{
+ unsigned HOST_WIDE_INT lsb_mask, t;
+
+ /* This might not be obvious, but it's at least fast.
+ This function is critical; we don't have the time loops would take. */
+ lsb_mask = x & -x;
+ t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
+ /* Return true iff t is a power of two. */
+ return ((t & (t - 1)) == 0);
+}
+
+/* True iff depi or extru can be used to compute (reg & mask).
+ Accept bit pattern like these:
+ 0....01....1
+ 1....10....0
+ 1..10..01..1 */
+int
+and_mask_p (mask)
+ unsigned HOST_WIDE_INT mask;
+{
+ mask = ~mask;
+ mask += mask & -mask;
+ return (mask & (mask - 1)) == 0;
+}
+
+/* True iff depi or extru can be used to compute (reg & OP). */
+int
+and_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return (register_operand (op, mode)
+ || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
+}
+
+/* True iff depi can be used to compute (reg | MASK). */
+int
+ior_mask_p (mask)
+ unsigned HOST_WIDE_INT mask;
+{
+ mask += mask & -mask;
+ return (mask & (mask - 1)) == 0;
+}
+
+/* True iff depi can be used to compute (reg | OP). */
+int
+ior_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
+}
+
+int
+lhs_lshift_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
+}
+
+/* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
+ Such values can be the left hand side x in (x << r), using the zvdepi
+ instruction. */
+int
+lhs_lshift_cint_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ unsigned HOST_WIDE_INT x;
+ if (GET_CODE (op) != CONST_INT)
+ return 0;
+ x = INTVAL (op) >> 4;
+ return (x & (x + 1)) == 0;
+}
+
+int
+arith32_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
+}
+
+int
+pc_or_label_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
+}
+
+/* Legitimize PIC addresses. If the address is already
+ position-independent, we return ORIG. Newly generated
+ position-independent addresses go to REG. If we need more
+ than one register, we lose. */
+
+rtx
+legitimize_pic_address (orig, mode, reg)
+ rtx orig, reg;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ rtx pic_ref = orig;
+
+ /* Labels need special handling. */
+ if (pic_label_operand (orig))
+ {
+ emit_insn (gen_pic_load_label (reg, orig));
+ current_function_uses_pic_offset_table = 1;
+ return reg;
+ }
+ if (GET_CODE (orig) == SYMBOL_REF)
+ {
+ if (reg == 0)
+ abort ();
+
+ if (flag_pic == 2)
+ {
+ emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
+ pic_ref
+ = gen_rtx_MEM (Pmode,
+ gen_rtx_LO_SUM (Pmode, reg,
+ gen_rtx_UNSPEC (SImode,
+ gen_rtvec (1, orig),
+ 0)));
+ }
+ else
+ pic_ref = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode,
+ pic_offset_table_rtx, orig));
+ current_function_uses_pic_offset_table = 1;
+ RTX_UNCHANGING_P (pic_ref) = 1;
+ emit_move_insn (reg, pic_ref);
+ return reg;
+ }
+ else if (GET_CODE (orig) == CONST)
+ {
+ rtx base;
+
+ if (GET_CODE (XEXP (orig, 0)) == PLUS
+ && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+ return orig;
+
+ if (reg == 0)
+ abort ();
+
+ if (GET_CODE (XEXP (orig, 0)) == PLUS)
+ {
+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+ orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+ base == reg ? 0 : reg);
+ }
+ else abort ();
+ if (GET_CODE (orig) == CONST_INT)
+ {
+ if (INT_14_BITS (orig))
+ return plus_constant_for_output (base, INTVAL (orig));
+ orig = force_reg (Pmode, orig);
+ }
+ pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+ /* Likewise, should we set special REG_NOTEs here? */
+ }
+ return pic_ref;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the PA, transform:
+
+ memory(X + <large int>)
+
+ into:
+
+ if (<large int> & mask) >= 16
+ Y = (<large int> & ~mask) + mask + 1 Round up.
+ else
+ Y = (<large int> & ~mask) Round down.
+ Z = X + Y
+ memory (Z + (<large int> - Y));
+
+ This is for CSE to find several similar references, and only use one Z.
+
+ X can either be a SYMBOL_REF or REG, but because combine can not
+ perform a 4->2 combination we do nothing for SYMBOL_REF + D where
+ D will not fit in 14 bits.
+
+ MODE_FLOAT references allow displacements which fit in 5 bits, so use
+ 0x1f as the mask.
+
+ MODE_INT references allow displacements which fit in 14 bits, so use
+ 0x3fff as the mask.
+
+ This relies on the fact that most mode MODE_FLOAT references will use FP
+ registers and most mode MODE_INT references will use integer registers.
+ (In the rare case of an FP register used in an integer MODE, we depend
+ on secondary reloads to clean things up.)
+
+
+ It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
+ manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
+ addressing modes to be used).
+
+ Put X and Z into registers. Then put the entire expression into
+ a register. */
+
+rtx
+hppa_legitimize_address (x, oldx, mode)
+ rtx x, oldx ATTRIBUTE_UNUSED;
+ enum machine_mode mode;
+{
+ rtx orig = x;
+
+ if (flag_pic)
+ return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
+
+ /* Strip off CONST. */
+ if (GET_CODE (x) == CONST)
+ x = XEXP (x, 0);
+
+ /* Special case. Get the SYMBOL_REF into a register and use indexing.
+ That should always be safe. */
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 0)) == REG
+ && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+ {
+ rtx reg = force_reg (SImode, XEXP (x, 1));
+ return force_reg (SImode, gen_rtx_PLUS (SImode, reg, XEXP (x, 0)));
+ }
+
+ /* Note we must reject symbols which represent function addresses
+ since the assembler/linker can't handle arithmetic on plabels. */
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+ && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
+ || GET_CODE (XEXP (x, 0)) == REG))
+ {
+ rtx int_part, ptr_reg;
+ int newoffset;
+ int offset = INTVAL (XEXP (x, 1));
+ int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
+
+ /* CYGNUS LOCAL pa8000/law */
+ mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
+ ? (TARGET_PARISC_2_0 ? 0x3fff : 0x1f) : 0x3fff);
+ /* END CYGNUS LOCAL */
+
+ /* Choose which way to round the offset. Round up if we
+ are >= halfway to the next boundary. */
+ if ((offset & mask) >= ((mask + 1) / 2))
+ newoffset = (offset & ~ mask) + mask + 1;
+ else
+ newoffset = (offset & ~ mask);
+
+ /* If the newoffset will not fit in 14 bits (ldo), then
+ handling this would take 4 or 5 instructions (2 to load
+ the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
+ add the new offset and the SYMBOL_REF.) Combine can
+ not handle 4->2 or 5->2 combinations, so do not create
+ them. */
+ if (! VAL_14_BITS_P (newoffset)
+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
+ {
+ rtx const_part
+ = gen_rtx_CONST (VOIDmode, gen_rtx_PLUS (Pmode,
+ XEXP (x, 0),
+ GEN_INT (newoffset)));
+ rtx tmp_reg
+ = force_reg (Pmode,
+ gen_rtx_HIGH (Pmode, const_part));
+ ptr_reg
+ = force_reg (Pmode,
+ gen_rtx_LO_SUM (Pmode, tmp_reg, const_part));
+ }
+ else
+ {
+ if (! VAL_14_BITS_P (newoffset))
+ int_part = force_reg (Pmode, GEN_INT (newoffset));
+ else
+ int_part = GEN_INT (newoffset);
+
+ ptr_reg = force_reg (Pmode,
+ gen_rtx_PLUS (Pmode,
+ force_reg (Pmode, XEXP (x, 0)),
+ int_part));
+ }
+ return plus_constant (ptr_reg, offset - newoffset);
+ }
+
+ /* Handle (plus (mult (a) (shadd_constant)) (b)). */
+
+ if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
+ && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
+ || GET_CODE (XEXP (x, 1)) == SUBREG)
+ && GET_CODE (XEXP (x, 1)) != CONST)
+ {
+ int val = INTVAL (XEXP (XEXP (x, 0), 1));
+ rtx reg1, reg2;
+
+ reg1 = XEXP (x, 1);
+ if (GET_CODE (reg1) != REG)
+ reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+ reg2 = XEXP (XEXP (x, 0), 0);
+ if (GET_CODE (reg2) != REG)
+ reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+ return force_reg (Pmode, gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode, reg2,
+ GEN_INT (val)),
+ reg1));
+ }
+
+ /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
+
+ Only do so for floating point modes since this is more speculative
+ and we lose if it's an integer store. */
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
+ && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
+ && (mode == SFmode || mode == DFmode))
+ {
+
+ /* First, try and figure out what to use as a base register. */
+ rtx reg1, reg2, base, idx, orig_base;
+
+ reg1 = XEXP (XEXP (x, 0), 1);
+ reg2 = XEXP (x, 1);
+ base = NULL_RTX;
+ idx = NULL_RTX;
+
+ /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
+ then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
+ know it's a base register below. */
+ if (GET_CODE (reg1) != REG)
+ reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+ if (GET_CODE (reg2) != REG)
+ reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+ /* Figure out what the base and index are. */
+
+ if (GET_CODE (reg1) == REG
+ && REGNO_POINTER_FLAG (REGNO (reg1)))
+ {
+ base = reg1;
+ orig_base = XEXP (XEXP (x, 0), 1);
+ idx = gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode,
+ XEXP (XEXP (XEXP (x, 0), 0), 0),
+ XEXP (XEXP (XEXP (x, 0), 0), 1)),
+ XEXP (x, 1));
+ }
+ else if (GET_CODE (reg2) == REG
+ && REGNO_POINTER_FLAG (REGNO (reg2)))
+ {
+ base = reg2;
+ orig_base = XEXP (x, 1);
+ idx = XEXP (x, 0);
+ }
+
+ if (base == 0)
+ return orig;
+
+ /* If the index adds a large constant, try to scale the
+ constant so that it can be loaded with only one insn. */
+ if (GET_CODE (XEXP (idx, 1)) == CONST_INT
+ && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
+ / INTVAL (XEXP (XEXP (idx, 0), 1)))
+ && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
+ {
+ /* Divide the CONST_INT by the scale factor, then add it to A. */
+ int val = INTVAL (XEXP (idx, 1));
+
+ val /= INTVAL (XEXP (XEXP (idx, 0), 1));
+ reg1 = XEXP (XEXP (idx, 0), 0);
+ if (GET_CODE (reg1) != REG)
+ reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+ reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
+
+ /* We can now generate a simple scaled indexed address. */
+ return force_reg (Pmode,
+ gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode, reg1,
+ XEXP (XEXP (idx, 0), 1)),
+ base));
+ }
+
+ /* If B + C is still a valid base register, then add them. */
+ if (GET_CODE (XEXP (idx, 1)) == CONST_INT
+ && INTVAL (XEXP (idx, 1)) <= 4096
+ && INTVAL (XEXP (idx, 1)) >= -4096)
+ {
+ int val = INTVAL (XEXP (XEXP (idx, 0), 1));
+ rtx reg1, reg2;
+
+ reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
+
+ reg2 = XEXP (XEXP (idx, 0), 0);
+ if (GET_CODE (reg2) != CONST_INT)
+ reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+ return force_reg (Pmode, gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode, reg2,
+ GEN_INT (val)),
+ reg1));
+ }
+
+ /* Get the index into a register, then add the base + index and
+ return a register holding the result. */
+
+ /* First get A into a register. */
+ reg1 = XEXP (XEXP (idx, 0), 0);
+ if (GET_CODE (reg1) != REG)
+ reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+ /* And get B into a register. */
+ reg2 = XEXP (idx, 1);
+ if (GET_CODE (reg2) != REG)
+ reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+ reg1 = force_reg (Pmode,
+ gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode, reg1,
+ XEXP (XEXP (idx, 0), 1)),
+ reg2));
+
+ /* Add the result to our base register and return. */
+ return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
+
+ }
+
+ /* Uh-oh. We might have an address for x[n-100000]. This needs
+ special handling to avoid creating an indexed memory address
+ with x-100000 as the base.
+
+ If the constant part is small enough, then it's still safe because
+ there is a guard page at the beginning and end of the data segment.
+
+ Scaled references are common enough that we want to try and rearrange the
+ terms so that we can use indexing for these addresses too. Only
+ do the optimization for floatint point modes. */
+
+ if (GET_CODE (x) == PLUS
+ && symbolic_expression_p (XEXP (x, 1)))
+ {
+ /* Ugly. We modify things here so that the address offset specified
+ by the index expression is computed first, then added to x to form
+ the entire address. */
+
+ rtx regx1, regx2, regy1, regy2, y;
+
+ /* Strip off any CONST. */
+ y = XEXP (x, 1);
+ if (GET_CODE (y) == CONST)
+ y = XEXP (y, 0);
+
+ if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
+ {
+ /* See if this looks like
+ (plus (mult (reg) (shadd_const))
+ (const (plus (symbol_ref) (const_int))))
+
+ Where const_int is small. In that case the const
+ expression is a valid pointer for indexing.
+
+ If const_int is big, but can be divided evenly by shadd_const
+ and added to (reg). This allows more scaled indexed addresses. */
+ if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
+ && GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (y, 1)) == CONST_INT
+ && INTVAL (XEXP (y, 1)) >= -4096
+ && INTVAL (XEXP (y, 1)) <= 4095
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
+ {
+ int val = INTVAL (XEXP (XEXP (x, 0), 1));
+ rtx reg1, reg2;
+
+ reg1 = XEXP (x, 1);
+ if (GET_CODE (reg1) != REG)
+ reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+ reg2 = XEXP (XEXP (x, 0), 0);
+ if (GET_CODE (reg2) != REG)
+ reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+ return force_reg (Pmode,
+ gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode, reg2,
+ GEN_INT (val)),
+ reg1));
+ }
+ else if ((mode == DFmode || mode == SFmode)
+ && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
+ && GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (y, 1)) == CONST_INT
+ && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
+ {
+ regx1
+ = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
+ / INTVAL (XEXP (XEXP (x, 0), 1))));
+ regx2 = XEXP (XEXP (x, 0), 0);
+ if (GET_CODE (regx2) != REG)
+ regx2 = force_reg (Pmode, force_operand (regx2, 0));
+ regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
+ regx2, regx1));
+ return force_reg (Pmode,
+ gen_rtx_PLUS (Pmode,
+ gen_rtx_MULT (Pmode, regx2,
+ XEXP (XEXP (x, 0),
+ 1)),
+ force_reg (Pmode, XEXP (y, 0))));
+ }
+ else if (GET_CODE (XEXP (y, 1)) == CONST_INT
+ && INTVAL (XEXP (y, 1)) >= -4096
+ && INTVAL (XEXP (y, 1)) <= 4095)
+ {
+ /* This is safe because of the guard page at the
+ beginning and end of the data space. Just
+ return the original address. */
+ return orig;
+ }
+ else
+ {
+ /* Doesn't look like one we can optimize. */
+ regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
+ regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
+ regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
+ regx1 = force_reg (Pmode,
+ gen_rtx_fmt_ee (GET_CODE (y), Pmode,
+ regx1, regy2));
+ return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
+ }
+ }
+ }
+
+ return orig;
+}
+
+/* For the HPPA, REG and REG+CONST is cost 0
+ and addresses involving symbolic constants are cost 2.
+
+ PIC addresses are very expensive.
+
+ It is no coincidence that this has the same structure
+ as GO_IF_LEGITIMATE_ADDRESS. */
+int
+hppa_address_cost (X)
+ rtx X;
+{
+ if (GET_CODE (X) == PLUS)
+ return 1;
+ else if (GET_CODE (X) == LO_SUM)
+ return 1;
+ else if (GET_CODE (X) == HIGH)
+ return 2;
+ return 4;
+}
+
+/* Emit insns to move operands[1] into operands[0].
+
+ Return 1 if we have written out everything that needs to be done to
+ do the move. Otherwise, return 0 and the caller will emit the move
+ normally. */
+
+int
+emit_move_sequence (operands, mode, scratch_reg)
+ rtx *operands;
+ enum machine_mode mode;
+ rtx scratch_reg;
+{
+ register rtx operand0 = operands[0];
+ register rtx operand1 = operands[1];
+ register rtx tem;
+
+ if (scratch_reg
+ && reload_in_progress && GET_CODE (operand0) == REG
+ && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+ operand0 = reg_equiv_mem[REGNO (operand0)];
+ else if (scratch_reg
+ && reload_in_progress && GET_CODE (operand0) == SUBREG
+ && GET_CODE (SUBREG_REG (operand0)) == REG
+ && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
+ {
+ SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
+ operand0 = alter_subreg (operand0);
+ }
+
+ if (scratch_reg
+ && reload_in_progress && GET_CODE (operand1) == REG
+ && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
+ operand1 = reg_equiv_mem[REGNO (operand1)];
+ else if (scratch_reg
+ && reload_in_progress && GET_CODE (operand1) == SUBREG
+ && GET_CODE (SUBREG_REG (operand1)) == REG
+ && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
+ {
+ SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
+ operand1 = alter_subreg (operand1);
+ }
+
+ if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+ && ((tem = find_replacement (&XEXP (operand0, 0)))
+ != XEXP (operand0, 0)))
+ operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
+ if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+ && ((tem = find_replacement (&XEXP (operand1, 0)))
+ != XEXP (operand1, 0)))
+ operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
+
+ /* Handle secondary reloads for loads/stores of FP registers from
+ REG+D addresses where D does not fit in 5 bits, including
+ (subreg (mem (addr))) cases. */
+ if (fp_reg_operand (operand0, mode)
+ && ((GET_CODE (operand1) == MEM
+ && ! memory_address_p (DFmode, XEXP (operand1, 0)))
+ || ((GET_CODE (operand1) == SUBREG
+ && GET_CODE (XEXP (operand1, 0)) == MEM
+ && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
+ && scratch_reg)
+ {
+ if (GET_CODE (operand1) == SUBREG)
+ operand1 = XEXP (operand1, 0);
+
+ scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg));
+
+ /* D might not fit in 14 bits either; for such cases load D into
+ scratch reg. */
+ if (!memory_address_p (SImode, XEXP (operand1, 0)))
+ {
+ emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+ emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
+ SImode,
+ XEXP (XEXP (operand1, 0), 0),
+ scratch_reg));
+ }
+ else
+ emit_move_insn (scratch_reg, XEXP (operand1, 0));
+ emit_insn (gen_rtx_SET (VOIDmode, operand0, gen_rtx_MEM (mode,
+ scratch_reg)));
+ return 1;
+ }
+ else if (fp_reg_operand (operand1, mode)
+ && ((GET_CODE (operand0) == MEM
+ && ! memory_address_p (DFmode, XEXP (operand0, 0)))
+ || ((GET_CODE (operand0) == SUBREG)
+ && GET_CODE (XEXP (operand0, 0)) == MEM
+ && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
+ && scratch_reg)
+ {
+ if (GET_CODE (operand0) == SUBREG)
+ operand0 = XEXP (operand0, 0);
+
+ scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg));
+ /* D might not fit in 14 bits either; for such cases load D into
+ scratch reg. */
+ if (!memory_address_p (SImode, XEXP (operand0, 0)))
+ {
+ emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
+ emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
+ 0)),
+ SImode,
+ XEXP (XEXP (operand0, 0),
+ 0),
+ scratch_reg));
+ }
+ else
+ emit_move_insn (scratch_reg, XEXP (operand0, 0));
+ emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
+ operand1));
+ return 1;
+ }
+ /* Handle secondary reloads for loads of FP registers from constant
+ expressions by forcing the constant into memory.
+
+ use scratch_reg to hold the address of the memory location.
+
+ ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
+ NO_REGS when presented with a const_int and an register class
+ containing only FP registers. Doing so unfortunately creates
+ more problems than it solves. Fix this for 2.5. */
+ else if (fp_reg_operand (operand0, mode)
+ && CONSTANT_P (operand1)
+ && scratch_reg)
+ {
+ rtx xoperands[2];
+
+ /* Force the constant into memory and put the address of the
+ memory location into scratch_reg. */
+ xoperands[0] = scratch_reg;
+ xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
+ emit_move_sequence (xoperands, Pmode, 0);
+
+ /* Now load the destination register. */
+ emit_insn (gen_rtx_SET (mode, operand0, gen_rtx_MEM (mode, scratch_reg)));
+ return 1;
+ }
+ /* Handle secondary reloads for SAR. These occur when trying to load
+ the SAR from memory a FP register, or with a constant. */
+ else if (GET_CODE (operand0) == REG
+ && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
+ && (GET_CODE (operand1) == MEM
+ || GET_CODE (operand1) == CONST_INT
+ || (GET_CODE (operand1) == REG
+ && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
+ && scratch_reg)
+ {
+ /* D might not fit in 14 bits either; for such cases load D into
+ scratch reg. */
+ if (GET_CODE (operand1) == MEM
+ && !memory_address_p (SImode, XEXP (operand1, 0)))
+ {
+ emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+ emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
+ 0)),
+ SImode,
+ XEXP (XEXP (operand1, 0),
+ 0),
+ scratch_reg));
+ emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand1),
+ scratch_reg));
+ }
+ else
+ emit_move_insn (scratch_reg, operand1);
+ emit_move_insn (operand0, scratch_reg);
+ return 1;
+ }
+ /* Handle most common case: storing into a register. */
+ else if (register_operand (operand0, mode))
+ {
+ if (register_operand (operand1, mode)
+ || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
+ || (operand1 == CONST0_RTX (mode))
+ || (GET_CODE (operand1) == HIGH
+ && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
+ /* Only `general_operands' can come here, so MEM is ok. */
+ || GET_CODE (operand1) == MEM)
+ {
+ /* Run this case quickly. */
+ emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+ return 1;
+ }
+ }
+ else if (GET_CODE (operand0) == MEM)
+ {
+ if (mode == DFmode && operand1 == CONST0_RTX (mode)
+ && !(reload_in_progress || reload_completed))
+ {
+ rtx temp = gen_reg_rtx (DFmode);
+
+ emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
+ emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
+ return 1;
+ }
+ if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
+ {
+ /* Run this case quickly. */
+ emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+ return 1;
+ }
+ if (! (reload_in_progress || reload_completed))
+ {
+ operands[0] = validize_mem (operand0);
+ operands[1] = operand1 = force_reg (mode, operand1);
+ }
+ }
+
+ /* Simplify the source if we need to.
+ Note we do have to handle function labels here, even though we do
+ not consider them legitimate constants. Loop optimizations can
+ call the emit_move_xxx with one as a source. */
+ if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
+ || function_label_operand (operand1, mode)
+ || (GET_CODE (operand1) == HIGH
+ && symbolic_operand (XEXP (operand1, 0), mode)))
+ {
+ int ishighonly = 0;
+
+ if (GET_CODE (operand1) == HIGH)
+ {
+ ishighonly = 1;
+ operand1 = XEXP (operand1, 0);
+ }
+ if (symbolic_operand (operand1, mode))
+ {
+ /* Argh. The assembler and linker can't handle arithmetic
+ involving plabels.
+
+ So we force the plabel into memory, load operand0 from
+ the memory location, then add in the constant part. */
+ if ((GET_CODE (operand1) == CONST
+ && GET_CODE (XEXP (operand1, 0)) == PLUS
+ && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
+ || function_label_operand (operand1, mode))
+ {
+ rtx temp, const_part;
+
+ /* Figure out what (if any) scratch register to use. */
+ if (reload_in_progress || reload_completed)
+ scratch_reg = scratch_reg ? scratch_reg : operand0;
+ else if (flag_pic)
+ scratch_reg = gen_reg_rtx (Pmode);
+
+ if (GET_CODE (operand1) == CONST)
+ {
+ /* Save away the constant part of the expression. */
+ const_part = XEXP (XEXP (operand1, 0), 1);
+ if (GET_CODE (const_part) != CONST_INT)
+ abort ();
+
+ /* Force the function label into memory. */
+ temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
+ }
+ else
+ {
+ /* No constant part. */
+ const_part = NULL_RTX;
+
+ /* Force the function label into memory. */
+ temp = force_const_mem (mode, operand1);
+ }
+
+
+ /* Get the address of the memory location. PIC-ify it if
+ necessary. */
+ temp = XEXP (temp, 0);
+ if (flag_pic)
+ temp = legitimize_pic_address (temp, mode, scratch_reg);
+
+ /* Put the address of the memory location into our destination
+ register. */
+ operands[1] = temp;
+ emit_move_sequence (operands, mode, scratch_reg);
+
+ /* Now load from the memory location into our destination
+ register. */
+ operands[1] = gen_rtx_MEM (Pmode, operands[0]);
+ emit_move_sequence (operands, mode, scratch_reg);
+
+ /* And add back in the constant part. */
+ if (const_part != NULL_RTX)
+ expand_inc (operand0, const_part);
+
+ return 1;
+ }
+
+ if (flag_pic)
+ {
+ rtx temp;
+
+ if (reload_in_progress || reload_completed)
+ temp = scratch_reg ? scratch_reg : operand0;
+ else
+ temp = gen_reg_rtx (Pmode);
+
+ /* (const (plus (symbol) (const_int))) must be forced to
+ memory during/after reload if the const_int will not fit
+ in 14 bits. */
+ if (GET_CODE (operand1) == CONST
+ && GET_CODE (XEXP (operand1, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
+ && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
+ && (reload_completed || reload_in_progress)
+ && flag_pic)
+ {
+ operands[1] = force_const_mem (mode, operand1);
+ operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
+ mode, temp);
+ emit_move_sequence (operands, mode, temp);
+ }
+ else
+ {
+ operands[1] = legitimize_pic_address (operand1, mode, temp);
+ emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
+ }
+ }
+ /* On the HPPA, references to data space are supposed to use dp,
+ register 27, but showing it in the RTL inhibits various cse
+ and loop optimizations. */
+ else
+ {
+ rtx temp, set;
+
+ if (reload_in_progress || reload_completed)
+ temp = scratch_reg ? scratch_reg : operand0;
+ else
+ temp = gen_reg_rtx (mode);
+
+ /* Loading a SYMBOL_REF into a register makes that register
+ safe to be used as the base in an indexed address.
+
+ Don't mark hard registers though. That loses. */
+ if (GET_CODE (operand0) == REG
+ && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+ REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
+ if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
+ REGNO_POINTER_FLAG (REGNO (temp)) = 1;
+ if (ishighonly)
+ set = gen_rtx_SET (mode, operand0, temp);
+ else
+ set = gen_rtx_SET (VOIDmode, operand0,
+ gen_rtx_LO_SUM (mode, temp, operand1));
+
+ emit_insn (gen_rtx_SET (VOIDmode,
+ temp,
+ gen_rtx_HIGH (mode, operand1)));
+ emit_insn (set);
+
+ }
+ return 1;
+ }
+ else if (GET_CODE (operand1) != CONST_INT
+ || ! cint_ok_for_move (INTVAL (operand1)))
+ {
+ rtx temp;
+
+ if (reload_in_progress || reload_completed)
+ temp = operand0;
+ else
+ temp = gen_reg_rtx (mode);
+
+ emit_insn (gen_rtx_SET (VOIDmode, temp,
+ gen_rtx_HIGH (mode, operand1)));
+ operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
+ }
+ }
+ /* Now have insn-emit do whatever it normally does. */
+ return 0;
+}
+
+/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
+ it will need a link/runtime reloc). */
+
+int
+reloc_needed (exp)
+ tree exp;
+{
+ int reloc = 0;
+
+ switch (TREE_CODE (exp))
+ {
+ case ADDR_EXPR:
+ return 1;
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ reloc = reloc_needed (TREE_OPERAND (exp, 0));
+ reloc |= reloc_needed (TREE_OPERAND (exp, 1));
+ break;
+
+ case NOP_EXPR:
+ case CONVERT_EXPR:
+ case NON_LVALUE_EXPR:
+ reloc = reloc_needed (TREE_OPERAND (exp, 0));
+ break;
+
+ case CONSTRUCTOR:
+ {
+ register tree link;
+ for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
+ if (TREE_VALUE (link) != 0)
+ reloc |= reloc_needed (TREE_VALUE (link));
+ }
+ break;
+
+ case ERROR_MARK:
+ break;
+
+ default:
+ break;
+ }
+ return reloc;
+}
+
+/* Does operand (which is a symbolic_operand) live in text space? If
+ so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
+
+int
+read_only_operand (operand)
+ rtx operand;
+{
+ if (GET_CODE (operand) == CONST)
+ operand = XEXP (XEXP (operand, 0), 0);
+ if (flag_pic)
+ {
+ if (GET_CODE (operand) == SYMBOL_REF)
+ return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
+ }
+ else
+ {
+ if (GET_CODE (operand) == SYMBOL_REF)
+ return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
+ }
+ return 1;
+}
+
+
+/* Return the best assembler insn template
+ for moving operands[1] into operands[0] as a fullword. */
+char *
+singlemove_string (operands)
+ rtx *operands;
+{
+ HOST_WIDE_INT intval;
+
+ if (GET_CODE (operands[0]) == MEM)
+ return "stw %r1,%0";
+ if (GET_CODE (operands[1]) == MEM)
+ return "ldw %1,%0";
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ {
+ long i;
+ REAL_VALUE_TYPE d;
+
+ if (GET_MODE (operands[1]) != SFmode)
+ abort ();
+
+ /* Translate the CONST_DOUBLE to a CONST_INT with the same target
+ bit pattern. */
+ REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
+ REAL_VALUE_TO_TARGET_SINGLE (d, i);
+
+ operands[1] = GEN_INT (i);
+ /* Fall through to CONST_INT case. */
+ }
+ if (GET_CODE (operands[1]) == CONST_INT)
+ {
+ intval = INTVAL (operands[1]);
+
+ if (VAL_14_BITS_P (intval))
+ return "ldi %1,%0";
+ else if ((intval & 0x7ff) == 0)
+ return "ldil L'%1,%0";
+ else if (zdepi_cint_p (intval))
+ return "zdepi %Z1,%0";
+ else
+ return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
+ }
+ return "copy %1,%0";
+}
+
+
+/* Compute position (in OP[1]) and width (in OP[2])
+ useful for copying IMM to a register using the zdepi
+ instructions. Store the immediate value to insert in OP[0]. */
+void
+compute_zdepi_operands (imm, op)
+ unsigned HOST_WIDE_INT imm;
+ unsigned *op;
+{
+ int lsb, len;
+
+ /* Find the least significant set bit in IMM. */
+ for (lsb = 0; lsb < 32; lsb++)
+ {
+ if ((imm & 1) != 0)
+ break;
+ imm >>= 1;
+ }
+
+ /* Choose variants based on *sign* of the 5-bit field. */
+ if ((imm & 0x10) == 0)
+ len = (lsb <= 28) ? 4 : 32 - lsb;
+ else
+ {
+ /* Find the width of the bitstring in IMM. */
+ for (len = 5; len < 32; len++)
+ {
+ if ((imm & (1 << len)) == 0)
+ break;
+ }
+
+ /* Sign extend IMM as a 5-bit value. */
+ imm = (imm & 0xf) - 0x10;
+ }
+
+ op[0] = imm;
+ op[1] = 31 - lsb;
+ op[2] = len;
+}
+
+/* Output assembler code to perform a doubleword move insn
+ with operands OPERANDS. */
+
+char *
+output_move_double (operands)
+ rtx *operands;
+{
+ enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
+ rtx latehalf[2];
+ rtx addreg0 = 0, addreg1 = 0;
+
+ /* First classify both operands. */
+
+ if (REG_P (operands[0]))
+ optype0 = REGOP;
+ else if (offsettable_memref_p (operands[0]))
+ optype0 = OFFSOP;
+ else if (GET_CODE (operands[0]) == MEM)
+ optype0 = MEMOP;
+ else
+ optype0 = RNDOP;
+
+ if (REG_P (operands[1]))
+ optype1 = REGOP;
+ else if (CONSTANT_P (operands[1]))
+ optype1 = CNSTOP;
+ else if (offsettable_memref_p (operands[1]))
+ optype1 = OFFSOP;
+ else if (GET_CODE (operands[1]) == MEM)
+ optype1 = MEMOP;
+ else
+ optype1 = RNDOP;
+
+ /* Check for the cases that the operand constraints are not
+ supposed to allow to happen. Abort if we get one,
+ because generating code for these cases is painful. */
+
+ if (optype0 != REGOP && optype1 != REGOP)
+ abort ();
+
+ /* Handle auto decrementing and incrementing loads and stores
+ specifically, since the structure of the function doesn't work
+ for them without major modification. Do it better when we learn
+ this port about the general inc/dec addressing of PA.
+ (This was written by tege. Chide him if it doesn't work.) */
+
+ if (optype0 == MEMOP)
+ {
+ /* We have to output the address syntax ourselves, since print_operand
+ doesn't deal with the addresses we want to use. Fix this later. */
+
+ rtx addr = XEXP (operands[0], 0);
+ if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+ {
+ rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
+
+ operands[0] = XEXP (addr, 0);
+ if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
+ abort ();
+
+ if (!reg_overlap_mentioned_p (high_reg, addr))
+ {
+ /* No overlap between high target register and address
+ register. (We do this in a non-obvious way to
+ save a register file writeback) */
+ if (GET_CODE (addr) == POST_INC)
+ return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
+ return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
+ }
+ else
+ abort();
+ }
+ else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+ {
+ rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
+
+ operands[0] = XEXP (addr, 0);
+ if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
+ abort ();
+
+ if (!reg_overlap_mentioned_p (high_reg, addr))
+ {
+ /* No overlap between high target register and address
+ register. (We do this in a non-obvious way to
+ save a register file writeback) */
+ if (GET_CODE (addr) == PRE_INC)
+ return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
+ return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
+ }
+ else
+ abort();
+ }
+ }
+ if (optype1 == MEMOP)
+ {
+ /* We have to output the address syntax ourselves, since print_operand
+ doesn't deal with the addresses we want to use. Fix this later. */
+
+ rtx addr = XEXP (operands[1], 0);
+ if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+ {
+ rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+ operands[1] = XEXP (addr, 0);
+ if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
+ abort ();
+
+ if (!reg_overlap_mentioned_p (high_reg, addr))
+ {
+ /* No overlap between high target register and address
+ register. (We do this in a non-obvious way to
+ save a register file writeback) */
+ if (GET_CODE (addr) == POST_INC)
+ return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
+ return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
+ }
+ else
+ {
+ /* This is an undefined situation. We should load into the
+ address register *and* update that register. Probably
+ we don't need to handle this at all. */
+ if (GET_CODE (addr) == POST_INC)
+ return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
+ return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
+ }
+ }
+ else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+ {
+ rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+ operands[1] = XEXP (addr, 0);
+ if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
+ abort ();
+
+ if (!reg_overlap_mentioned_p (high_reg, addr))
+ {
+ /* No overlap between high target register and address
+ register. (We do this in a non-obvious way to
+ save a register file writeback) */
+ if (GET_CODE (addr) == PRE_INC)
+ return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
+ return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
+ }
+ else
+ {
+ /* This is an undefined situation. We should load into the
+ address register *and* update that register. Probably
+ we don't need to handle this at all. */
+ if (GET_CODE (addr) == PRE_INC)
+ return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
+ return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
+ }
+ }
+ else if (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == MULT)
+ {
+ rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+ if (!reg_overlap_mentioned_p (high_reg, addr))
+ {
+ rtx xoperands[3];
+
+ xoperands[0] = high_reg;
+ xoperands[1] = XEXP (addr, 1);
+ xoperands[2] = XEXP (XEXP (addr, 0), 0);
+ xoperands[3] = XEXP (XEXP (addr, 0), 1);
+ output_asm_insn ("sh%O3addl %2,%1,%0", xoperands);
+ return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0";
+ }
+ else
+ {
+ rtx xoperands[3];
+
+ xoperands[0] = high_reg;
+ xoperands[1] = XEXP (addr, 1);
+ xoperands[2] = XEXP (XEXP (addr, 0), 0);
+ xoperands[3] = XEXP (XEXP (addr, 0), 1);
+ output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands);
+ return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0";
+ }
+
+ }
+ }
+
+ /* If an operand is an unoffsettable memory ref, find a register
+ we can increment temporarily to make it refer to the second word. */
+
+ if (optype0 == MEMOP)
+ addreg0 = find_addr_reg (XEXP (operands[0], 0));
+
+ if (optype1 == MEMOP)
+ addreg1 = find_addr_reg (XEXP (operands[1], 0));
+
+ /* Ok, we can do one word at a time.
+ Normally we do the low-numbered word first.
+
+ In either case, set up in LATEHALF the operands to use
+ for the high-numbered word and in some cases alter the
+ operands in OPERANDS to be suitable for the low-numbered word. */
+
+ if (optype0 == REGOP)
+ latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+ else if (optype0 == OFFSOP)
+ latehalf[0] = adj_offsettable_operand (operands[0], 4);
+ else
+ latehalf[0] = operands[0];
+
+ if (optype1 == REGOP)
+ latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+ else if (optype1 == OFFSOP)
+ latehalf[1] = adj_offsettable_operand (operands[1], 4);
+ else if (optype1 == CNSTOP)
+ split_double (operands[1], &operands[1], &latehalf[1]);
+ else
+ latehalf[1] = operands[1];
+
+ /* If the first move would clobber the source of the second one,
+ do them in the other order.
+
+ This can happen in two cases:
+
+ mem -> register where the first half of the destination register
+ is the same register used in the memory's address. Reload
+ can create such insns.
+
+ mem in this case will be either register indirect or register
+ indirect plus a valid offset.
+
+ register -> register move where REGNO(dst) == REGNO(src + 1)
+ someone (Tim/Tege?) claimed this can happen for parameter loads.
+
+ Handle mem -> register case first. */
+ if (optype0 == REGOP
+ && (optype1 == MEMOP || optype1 == OFFSOP)
+ && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+ operands[1], 0))
+ {
+ /* Do the late half first. */
+ if (addreg1)
+ output_asm_insn ("ldo 4(%0),%0", &addreg1);
+ output_asm_insn (singlemove_string (latehalf), latehalf);
+
+ /* Then clobber. */
+ if (addreg1)
+ output_asm_insn ("ldo -4(%0),%0", &addreg1);
+ return singlemove_string (operands);
+ }
+
+ /* Now handle register -> register case. */
+ if (optype0 == REGOP && optype1 == REGOP
+ && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+ {
+ output_asm_insn (singlemove_string (latehalf), latehalf);
+ return singlemove_string (operands);
+ }
+
+ /* Normal case: do the two words, low-numbered first. */
+
+ output_asm_insn (singlemove_string (operands), operands);
+
+ /* Make any unoffsettable addresses point at high-numbered word. */
+ if (addreg0)
+ output_asm_insn ("ldo 4(%0),%0", &addreg0);
+ if (addreg1)
+ output_asm_insn ("ldo 4(%0),%0", &addreg1);
+
+ /* Do that word. */
+ output_asm_insn (singlemove_string (latehalf), latehalf);
+
+ /* Undo the adds we just did. */
+ if (addreg0)
+ output_asm_insn ("ldo -4(%0),%0", &addreg0);
+ if (addreg1)
+ output_asm_insn ("ldo -4(%0),%0", &addreg1);
+
+ return "";
+}
+
+char *
+output_fp_move_double (operands)
+ rtx *operands;
+{
+ if (FP_REG_P (operands[0]))
+ {
+ if (FP_REG_P (operands[1])
+ || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
+ output_asm_insn ("fcpy,dbl %r1,%0", operands);
+ else
+ output_asm_insn ("fldd%F1 %1,%0", operands);
+ }
+ else if (FP_REG_P (operands[1]))
+ {
+ output_asm_insn ("fstd%F0 %1,%0", operands);
+ }
+ else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
+ {
+ if (GET_CODE (operands[0]) == REG)
+ {
+ rtx xoperands[2];
+ xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+ xoperands[0] = operands[0];
+ output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
+ }
+ /* This is a pain. You have to be prepared to deal with an
+ arbitrary address here including pre/post increment/decrement.
+
+ so avoid this in the MD. */
+ else
+ abort ();
+ }
+ else abort ();
+ return "";
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+ ADDR can be effectively incremented by incrementing REG. */
+
+static rtx
+find_addr_reg (addr)
+ rtx addr;
+{
+ while (GET_CODE (addr) == PLUS)
+ {
+ if (GET_CODE (XEXP (addr, 0)) == REG)
+ addr = XEXP (addr, 0);
+ else if (GET_CODE (XEXP (addr, 1)) == REG)
+ addr = XEXP (addr, 1);
+ else if (CONSTANT_P (XEXP (addr, 0)))
+ addr = XEXP (addr, 1);
+ else if (CONSTANT_P (XEXP (addr, 1)))
+ addr = XEXP (addr, 0);
+ else
+ abort ();
+ }
+ if (GET_CODE (addr) == REG)
+ return addr;
+ abort ();
+}
+
+/* Emit code to perform a block move.
+
+ OPERANDS[0] is the destination pointer as a REG, clobbered.
+ OPERANDS[1] is the source pointer as a REG, clobbered.
+ OPERANDS[2] is a register for temporary storage.
+ OPERANDS[4] is the size as a CONST_INT
+ OPERANDS[3] is a register for temporary storage.
+ OPERANDS[5] is the alignment safe to use, as a CONST_INT.
+ OPERANDS[6] is another temporary register. */
+
+char *
+output_block_move (operands, size_is_constant)
+ rtx *operands;
+ int size_is_constant ATTRIBUTE_UNUSED;
+{
+ int align = INTVAL (operands[5]);
+ unsigned long n_bytes = INTVAL (operands[4]);
+
+ /* We can't move more than four bytes at a time because the PA
+ has no longer integer move insns. (Could use fp mem ops?) */
+ if (align > 4)
+ align = 4;
+
+ /* Note that we know each loop below will execute at least twice
+ (else we would have open-coded the copy). */
+ switch (align)
+ {
+ case 4:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 8);
+ output_asm_insn ("ldi %4,%2", operands);
+
+ /* Copying loop. */
+ output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
+ output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
+ output_asm_insn ("stws,ma %3,4(0,%0)", operands);
+ output_asm_insn ("addib,>= -8,%2,.-12", operands);
+ output_asm_insn ("stws,ma %6,4(0,%0)", operands);
+
+ /* Handle the residual. There could be up to 7 bytes of
+ residual to copy! */
+ if (n_bytes % 8 != 0)
+ {
+ operands[4] = GEN_INT (n_bytes % 4);
+ if (n_bytes % 8 >= 4)
+ output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
+ if (n_bytes % 4 != 0)
+ output_asm_insn ("ldw 0(0,%1),%6", operands);
+ if (n_bytes % 8 >= 4)
+ output_asm_insn ("stws,ma %3,4(0,%0)", operands);
+ if (n_bytes % 4 != 0)
+ output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
+ }
+ return "";
+
+ case 2:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 4);
+ output_asm_insn ("ldi %4,%2", operands);
+
+ /* Copying loop. */
+ output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
+ output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
+ output_asm_insn ("sths,ma %3,2(0,%0)", operands);
+ output_asm_insn ("addib,>= -4,%2,.-12", operands);
+ output_asm_insn ("sths,ma %6,2(0,%0)", operands);
+
+ /* Handle the residual. */
+ if (n_bytes % 4 != 0)
+ {
+ if (n_bytes % 4 >= 2)
+ output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
+ if (n_bytes % 2 != 0)
+ output_asm_insn ("ldb 0(0,%1),%6", operands);
+ if (n_bytes % 4 >= 2)
+ output_asm_insn ("sths,ma %3,2(0,%0)", operands);
+ if (n_bytes % 2 != 0)
+ output_asm_insn ("stb %6,0(0,%0)", operands);
+ }
+ return "";
+
+ case 1:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 2);
+ output_asm_insn ("ldi %4,%2", operands);
+
+ /* Copying loop. */
+ output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
+ output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
+ output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
+ output_asm_insn ("addib,>= -2,%2,.-12", operands);
+ output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
+
+ /* Handle the residual. */
+ if (n_bytes % 2 != 0)
+ {
+ output_asm_insn ("ldb 0(0,%1),%3", operands);
+ output_asm_insn ("stb %3,0(0,%0)", operands);
+ }
+ return "";
+
+ default:
+ abort ();
+ }
+}
+
+/* Count the number of insns necessary to handle this block move.
+
+ Basic structure is the same as emit_block_move, except that we
+ count insns rather than emit them. */
+
+int
+compute_movstrsi_length (insn)
+ rtx insn;
+{
+ rtx pat = PATTERN (insn);
+ int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
+ unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
+ unsigned int n_insns = 0;
+
+ /* We can't move more than four bytes at a time because the PA
+ has no longer integer move insns. (Could use fp mem ops?) */
+ if (align > 4)
+ align = 4;
+
+ /* The basic copying loop. */
+ n_insns = 6;
+
+ /* Residuals. */
+ if (n_bytes % (2 * align) != 0)
+ {
+ if ((n_bytes % (2 * align)) >= align)
+ n_insns += 2;
+
+ if ((n_bytes % align) != 0)
+ n_insns += 2;
+ }
+
+ /* Lengths are expressed in bytes now; each insn is 4 bytes. */
+ return n_insns * 4;
+}
+
+
+char *
+output_and (operands)
+ rtx *operands;
+{
+ if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
+ {
+ unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+ int ls0, ls1, ms0, p, len;
+
+ for (ls0 = 0; ls0 < 32; ls0++)
+ if ((mask & (1 << ls0)) == 0)
+ break;
+
+ for (ls1 = ls0; ls1 < 32; ls1++)
+ if ((mask & (1 << ls1)) != 0)
+ break;
+
+ for (ms0 = ls1; ms0 < 32; ms0++)
+ if ((mask & (1 << ms0)) == 0)
+ break;
+
+ if (ms0 != 32)
+ abort();
+
+ if (ls1 == 32)
+ {
+ len = ls0;
+
+ if (len == 0)
+ abort ();
+
+ operands[2] = GEN_INT (len);
+ return "extru %1,31,%2,%0";
+ }
+ else
+ {
+ /* We could use this `depi' for the case above as well, but `depi'
+ requires one more register file access than an `extru'. */
+
+ p = 31 - ls0;
+ len = ls1 - ls0;
+
+ operands[2] = GEN_INT (p);
+ operands[3] = GEN_INT (len);
+ return "depi 0,%2,%3,%0";
+ }
+ }
+ else
+ return "and %1,%2,%0";
+}
+
+char *
+output_ior (operands)
+ rtx *operands;
+{
+ unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+ int bs0, bs1, p, len;
+
+ if (INTVAL (operands[2]) == 0)
+ return "copy %1,%0";
+
+ for (bs0 = 0; bs0 < 32; bs0++)
+ if ((mask & (1 << bs0)) != 0)
+ break;
+
+ for (bs1 = bs0; bs1 < 32; bs1++)
+ if ((mask & (1 << bs1)) == 0)
+ break;
+
+ if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
+ abort();
+
+ p = 31 - bs0;
+ len = bs1 - bs0;
+
+ operands[2] = GEN_INT (p);
+ operands[3] = GEN_INT (len);
+ return "depi -1,%2,%3,%0";
+}
+
+/* Output an ascii string. */
+void
+output_ascii (file, p, size)
+ FILE *file;
+ unsigned char *p;
+ int size;
+{
+ int i;
+ int chars_output;
+ unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
+
+ /* The HP assembler can only take strings of 256 characters at one
+ time. This is a limitation on input line length, *not* the
+ length of the string. Sigh. Even worse, it seems that the
+ restriction is in number of input characters (see \xnn &
+ \whatever). So we have to do this very carefully. */
+
+ fputs ("\t.STRING \"", file);
+
+ chars_output = 0;
+ for (i = 0; i < size; i += 4)
+ {
+ int co = 0;
+ int io = 0;
+ for (io = 0, co = 0; io < MIN (4, size - i); io++)
+ {
+ register unsigned int c = p[i + io];
+
+ if (c == '\"' || c == '\\')
+ partial_output[co++] = '\\';
+ if (c >= ' ' && c < 0177)
+ partial_output[co++] = c;
+ else
+ {
+ unsigned int hexd;
+ partial_output[co++] = '\\';
+ partial_output[co++] = 'x';
+ hexd = c / 16 - 0 + '0';
+ if (hexd > '9')
+ hexd -= '9' - 'a' + 1;
+ partial_output[co++] = hexd;
+ hexd = c % 16 - 0 + '0';
+ if (hexd > '9')
+ hexd -= '9' - 'a' + 1;
+ partial_output[co++] = hexd;
+ }
+ }
+ if (chars_output + co > 243)
+ {
+ fputs ("\"\n\t.STRING \"", file);
+ chars_output = 0;
+ }
+ fwrite (partial_output, 1, co, file);
+ chars_output += co;
+ co = 0;
+ }
+ fputs ("\"\n", file);
+}
+
+/* Try to rewrite floating point comparisons & branches to avoid
+ useless add,tr insns.
+
+ CHECK_NOTES is nonzero if we should examine REG_DEAD notes
+ to see if FPCC is dead. CHECK_NOTES is nonzero for the
+ first attempt to remove useless add,tr insns. It is zero
+ for the second pass as reorg sometimes leaves bogus REG_DEAD
+ notes lying around.
+
+ When CHECK_NOTES is zero we can only eliminate add,tr insns
+ when there's a 1:1 correspondence between fcmp and ftest/fbranch
+ instructions. */
+void
+remove_useless_addtr_insns (insns, check_notes)
+ rtx insns;
+ int check_notes;
+{
+ rtx insn;
+ static int pass = 0;
+
+ /* This is fairly cheap, so always run it when optimizing. */
+ if (optimize > 0)
+ {
+ int fcmp_count = 0;
+ int fbranch_count = 0;
+
+ /* Walk all the insns in this function looking for fcmp & fbranch
+ instructions. Keep track of how many of each we find. */
+ insns = get_insns ();
+ for (insn = insns; insn; insn = next_insn (insn))
+ {
+ rtx tmp;
+
+ /* Ignore anything that isn't an INSN or a JUMP_INSN. */
+ if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
+ continue;
+
+ tmp = PATTERN (insn);
+
+ /* It must be a set. */
+ if (GET_CODE (tmp) != SET)
+ continue;
+
+ /* If the destination is CCFP, then we've found an fcmp insn. */
+ tmp = SET_DEST (tmp);
+ if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
+ {
+ fcmp_count++;
+ continue;
+ }
+
+ tmp = PATTERN (insn);
+ /* If this is an fbranch instruction, bump the fbranch counter. */
+ if (GET_CODE (tmp) == SET
+ && SET_DEST (tmp) == pc_rtx
+ && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
+ && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
+ && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
+ && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
+ {
+ fbranch_count++;
+ continue;
+ }
+ }
+
+
+ /* Find all floating point compare + branch insns. If possible,
+ reverse the comparison & the branch to avoid add,tr insns. */
+ for (insn = insns; insn; insn = next_insn (insn))
+ {
+ rtx tmp, next;
+
+ /* Ignore anything that isn't an INSN. */
+ if (GET_CODE (insn) != INSN)
+ continue;
+
+ tmp = PATTERN (insn);
+
+ /* It must be a set. */
+ if (GET_CODE (tmp) != SET)
+ continue;
+
+ /* The destination must be CCFP, which is register zero. */
+ tmp = SET_DEST (tmp);
+ if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
+ continue;
+
+ /* INSN should be a set of CCFP.
+
+ See if the result of this insn is used in a reversed FP
+ conditional branch. If so, reverse our condition and
+ the branch. Doing so avoids useless add,tr insns. */
+ next = next_insn (insn);
+ while (next)
+ {
+ /* Jumps, calls and labels stop our search. */
+ if (GET_CODE (next) == JUMP_INSN
+ || GET_CODE (next) == CALL_INSN
+ || GET_CODE (next) == CODE_LABEL)
+ break;
+
+ /* As does another fcmp insn. */
+ if (GET_CODE (next) == INSN
+ && GET_CODE (PATTERN (next)) == SET
+ && GET_CODE (SET_DEST (PATTERN (next))) == REG
+ && REGNO (SET_DEST (PATTERN (next))) == 0)
+ break;
+
+ next = next_insn (next);
+ }
+
+ /* Is NEXT_INSN a branch? */
+ if (next
+ && GET_CODE (next) == JUMP_INSN)
+ {
+ rtx pattern = PATTERN (next);
+
+ /* If it a reversed fp conditional branch (eg uses add,tr)
+ and CCFP dies, then reverse our conditional and the branch
+ to avoid the add,tr. */
+ if (GET_CODE (pattern) == SET
+ && SET_DEST (pattern) == pc_rtx
+ && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
+ && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
+ && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
+ && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
+ && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
+ && (fcmp_count == fbranch_count
+ || (check_notes
+ && find_regno_note (next, REG_DEAD, 0))))
+ {
+ /* Reverse the branch. */
+ tmp = XEXP (SET_SRC (pattern), 1);
+ XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
+ XEXP (SET_SRC (pattern), 2) = tmp;
+ INSN_CODE (next) = -1;
+
+ /* Reverse our condition. */
+ tmp = PATTERN (insn);
+ PUT_CODE (XEXP (tmp, 1),
+ reverse_condition (GET_CODE (XEXP (tmp, 1))));
+ }
+ }
+ }
+ }
+
+ pass = !pass;
+
+}
+
+/* You may have trouble believing this, but this is the HP-PA stack
+ layout. Wow.
+
+ Offset Contents
+
+ Variable arguments (optional; any number may be allocated)
+
+ SP-(4*(N+9)) arg word N
+ : :
+ SP-56 arg word 5
+ SP-52 arg word 4
+
+ Fixed arguments (must be allocated; may remain unused)
+
+ SP-48 arg word 3
+ SP-44 arg word 2
+ SP-40 arg word 1
+ SP-36 arg word 0
+
+ Frame Marker
+
+ SP-32 External Data Pointer (DP)
+ SP-28 External sr4
+ SP-24 External/stub RP (RP')
+ SP-20 Current RP
+ SP-16 Static Link
+ SP-12 Clean up
+ SP-8 Calling Stub RP (RP'')
+ SP-4 Previous SP
+
+ Top of Frame
+
+ SP-0 Stack Pointer (points to next available address)
+
+*/
+
+/* This function saves registers as follows. Registers marked with ' are
+ this function's registers (as opposed to the previous function's).
+ If a frame_pointer isn't needed, r4 is saved as a general register;
+ the space for the frame pointer is still allocated, though, to keep
+ things simple.
+
+
+ Top of Frame
+
+ SP (FP') Previous FP
+ SP + 4 Alignment filler (sigh)
+ SP + 8 Space for locals reserved here.
+ .
+ .
+ .
+ SP + n All call saved register used.
+ .
+ .
+ .
+ SP + o All call saved fp registers used.
+ .
+ .
+ .
+ SP + p (SP') points to next available address.
+
+*/
+
+/* Emit RTL to store REG at the memory location specified by BASE+DISP.
+ Handle case where DISP > 8k by using the add_high_const pattern.
+
+ Note in DISP > 8k case, we will leave the high part of the address
+ in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
+static void
+store_reg (reg, disp, base)
+ int reg, disp, base;
+{
+ if (VAL_14_BITS_P (disp))
+ {
+ emit_move_insn (gen_rtx_MEM (SImode,
+ gen_rtx_PLUS (SImode,
+ gen_rtx_REG (SImode, base),
+ GEN_INT (disp))),
+ gen_rtx_REG (SImode, reg));
+ }
+ else
+ {
+ emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
+ gen_rtx_REG (SImode, base),
+ GEN_INT (disp)));
+ emit_move_insn (gen_rtx_MEM (SImode,
+ gen_rtx_LO_SUM (SImode,
+ gen_rtx_REG (SImode, 1),
+ GEN_INT (disp))),
+ gen_rtx_REG (SImode, reg));
+ }
+}
+
+/* Emit RTL to load REG from the memory location specified by BASE+DISP.
+ Handle case where DISP > 8k by using the add_high_const pattern.
+
+ Note in DISP > 8k case, we will leave the high part of the address
+ in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
+static void
+load_reg (reg, disp, base)
+ int reg, disp, base;
+{
+ if (VAL_14_BITS_P (disp))
+ {
+ emit_move_insn (gen_rtx_REG (SImode, reg),
+ gen_rtx_MEM (SImode,
+ gen_rtx_PLUS (SImode,
+ gen_rtx_REG (SImode, base),
+ GEN_INT (disp))));
+ }
+ else
+ {
+ emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
+ gen_rtx_REG (SImode, base),
+ GEN_INT (disp)));
+ emit_move_insn (gen_rtx_REG (SImode, reg),
+ gen_rtx_MEM (SImode,
+ gen_rtx_LO_SUM (SImode,
+ gen_rtx_REG (SImode, 1),
+ GEN_INT (disp))));
+ }
+}
+
+/* Emit RTL to set REG to the value specified by BASE+DISP.
+ Handle case where DISP > 8k by using the add_high_const pattern.
+
+ Note in DISP > 8k case, we will leave the high part of the address
+ in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
+static void
+set_reg_plus_d(reg, base, disp)
+ int reg, base, disp;
+{
+ if (VAL_14_BITS_P (disp))
+ {
+ emit_move_insn (gen_rtx_REG (SImode, reg),
+ gen_rtx_PLUS (SImode,
+ gen_rtx_REG (SImode, base),
+ GEN_INT (disp)));
+ }
+ else
+ {
+ emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1),
+ gen_rtx_REG (SImode, base),
+ GEN_INT (disp)));
+ emit_move_insn (gen_rtx_REG (SImode, reg),
+ gen_rtx_LO_SUM (SImode,
+ gen_rtx_REG (SImode, 1),
+ GEN_INT (disp)));
+ }
+}
+
+/* Global variables set by FUNCTION_PROLOGUE. */
+/* Size of frame. Need to know this to emit return insns from
+ leaf procedures. */
+static int actual_fsize;
+static int local_fsize, save_fregs;
+
+int
+compute_frame_size (size, fregs_live)
+ int size;
+ int *fregs_live;
+{
+ extern int current_function_outgoing_args_size;
+ int i, fsize;
+
+ /* 8 is space for frame pointer + filler. If any frame is allocated
+ we need to add this in because of STARTING_FRAME_OFFSET. */
+ fsize = size + (size || frame_pointer_needed ? 8 : 0);
+
+ /* We must leave enough space for all the callee saved registers
+ from 3 .. highest used callee save register since we don't
+ know if we're going to have an inline or out of line prologue
+ and epilogue. */
+ for (i = 18; i >= 3; i--)
+ if (regs_ever_live[i])
+ {
+ fsize += 4 * (i - 2);
+ break;
+ }
+
+ /* Round the stack. */
+ fsize = (fsize + 7) & ~7;
+
+ /* We must leave enough space for all the callee saved registers
+ from 3 .. highest used callee save register since we don't
+ know if we're going to have an inline or out of line prologue
+ and epilogue. */
+ for (i = 66; i >= 48; i -= 2)
+ if (regs_ever_live[i] || regs_ever_live[i + 1])
+ {
+ if (fregs_live)
+ *fregs_live = 1;
+
+ fsize += 4 * (i - 46);
+ break;
+ }
+
+ fsize += current_function_outgoing_args_size;
+ if (! leaf_function_p () || fsize)
+ fsize += 32;
+ return (fsize + 63) & ~63;
+}
+
+rtx hp_profile_label_rtx;
+static char hp_profile_label_name[8];
+void
+output_function_prologue (file, size)
+ FILE *file;
+ int size ATTRIBUTE_UNUSED;
+{
+ /* The function's label and associated .PROC must never be
+ separated and must be output *after* any profiling declarations
+ to avoid changing spaces/subspaces within a procedure. */
+ ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
+ fputs ("\t.PROC\n", file);
+
+ /* hppa_expand_prologue does the dirty work now. We just need
+ to output the assembler directives which denote the start
+ of a function. */
+ fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
+ if (regs_ever_live[2] || profile_flag)
+ fputs (",CALLS,SAVE_RP", file);
+ else
+ fputs (",NO_CALLS", file);
+
+ if (frame_pointer_needed)
+ fputs (",SAVE_SP", file);
+
+ /* Pass on information about the number of callee register saves
+ performed in the prologue.
+
+ The compiler is supposed to pass the highest register number
+ saved, the assembler then has to adjust that number before
+ entering it into the unwind descriptor (to account for any
+ caller saved registers with lower register numbers than the
+ first callee saved register). */
+ if (gr_saved)
+ fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
+
+ if (fr_saved)
+ fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
+
+ fputs ("\n\t.ENTRY\n", file);
+
+ /* Horrid hack. emit_function_prologue will modify this RTL in
+ place to get the expected results. */
+ if (profile_flag)
+ ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
+ hp_profile_labelno);
+
+ /* If we're using GAS and not using the portable runtime model, then
+ we don't need to accumulate the total number of code bytes. */
+ if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
+ total_code_bytes = 0;
+ else if (insn_addresses)
+ {
+ unsigned int old_total = total_code_bytes;
+
+ total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
+ total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
+
+ /* Be prepared to handle overflows. */
+ total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
+ }
+ else
+ total_code_bytes = -1;
+
+ remove_useless_addtr_insns (get_insns (), 0);
+
+ /* Restore INSN_CODEs for insn which use unscaled indexed addresses. */
+ restore_unscaled_index_insn_codes (get_insns ());
+}
+
+void
+hppa_expand_prologue()
+{
+ extern char call_used_regs[];
+ int size = get_frame_size ();
+ int merge_sp_adjust_with_store = 0;
+ int i, offset;
+ rtx tmpreg, size_rtx;
+
+ gr_saved = 0;
+ fr_saved = 0;
+ save_fregs = 0;
+ local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
+ actual_fsize = compute_frame_size (size, &save_fregs);
+
+ /* Compute a few things we will use often. */
+ tmpreg = gen_rtx_REG (SImode, 1);
+ size_rtx = GEN_INT (actual_fsize);
+
+ /* Handle out of line prologues and epilogues. */
+ if (TARGET_SPACE)
+ {
+ rtx operands[2];
+ int saves = 0;
+ int outline_insn_count = 0;
+ int inline_insn_count = 0;
+
+ /* Count the number of insns for the inline and out of line
+ variants so we can choose one appropriately.
+
+ No need to screw with counting actual_fsize operations -- they're
+ done for both inline and out of line prologues. */
+ if (regs_ever_live[2])
+ inline_insn_count += 1;
+
+ if (! cint_ok_for_move (local_fsize))
+ outline_insn_count += 2;
+ else
+ outline_insn_count += 1;
+
+ /* Put the register save info into %r22. */
+ for (i = 18; i >= 3; i--)
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ {
+ /* -1 because the stack adjustment is normally done in
+ the same insn as a register save. */
+ inline_insn_count += (i - 2) - 1;
+ saves = i;
+ break;
+ }
+
+ for (i = 66; i >= 48; i -= 2)
+ if (regs_ever_live[i] || regs_ever_live[i + 1])
+ {
+ /* +1 needed as we load %r1 with the start of the freg
+ save area. */
+ inline_insn_count += (i/2 - 23) + 1;
+ saves |= ((i/2 - 12 ) << 16);
+ break;
+ }
+
+ if (frame_pointer_needed)
+ inline_insn_count += 3;
+
+ if (! cint_ok_for_move (saves))
+ outline_insn_count += 2;
+ else
+ outline_insn_count += 1;
+
+ if (TARGET_PORTABLE_RUNTIME)
+ outline_insn_count += 2;
+ else
+ outline_insn_count += 1;
+
+ /* If there's a lot of insns in the prologue, then do it as
+ an out-of-line sequence. */
+ if (inline_insn_count > outline_insn_count)
+ {
+ /* Put the local_fisze into %r19. */
+ operands[0] = gen_rtx_REG (SImode, 19);
+ operands[1] = GEN_INT (local_fsize);
+ emit_move_insn (operands[0], operands[1]);
+
+ /* Put the stack size into %r21. */
+ operands[0] = gen_rtx_REG (SImode, 21);
+ operands[1] = size_rtx;
+ emit_move_insn (operands[0], operands[1]);
+
+ operands[0] = gen_rtx_REG (SImode, 22);
+ operands[1] = GEN_INT (saves);
+ emit_move_insn (operands[0], operands[1]);
+
+ /* Now call the out-of-line prologue. */
+ emit_insn (gen_outline_prologue_call ());
+ emit_insn (gen_blockage ());
+
+ /* Note that we're using an out-of-line prologue. */
+ out_of_line_prologue_epilogue = 1;
+ return;
+ }
+ }
+
+ out_of_line_prologue_epilogue = 0;
+
+ /* Save RP first. The calling conventions manual states RP will
+ always be stored into the caller's frame at sp-20. */
+ if (regs_ever_live[2] || profile_flag)
+ store_reg (2, -20, STACK_POINTER_REGNUM);
+
+ /* Allocate the local frame and set up the frame pointer if needed. */
+ if (actual_fsize)
+ {
+ if (frame_pointer_needed)
+ {
+ /* Copy the old frame pointer temporarily into %r1. Set up the
+ new stack pointer, then store away the saved old frame pointer
+ into the stack at sp+actual_fsize and at the same time update
+ the stack pointer by actual_fsize bytes. Two versions, first
+ handles small (<8k) frames. The second handles large (>8k)
+ frames. */
+ emit_move_insn (tmpreg, frame_pointer_rtx);
+ emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+ if (VAL_14_BITS_P (actual_fsize))
+ emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
+ else
+ {
+ /* It is incorrect to store the saved frame pointer at *sp,
+ then increment sp (writes beyond the current stack boundary).
+
+ So instead use stwm to store at *sp and post-increment the
+ stack pointer as an atomic operation. Then increment sp to
+ finish allocating the new frame. */
+ emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ actual_fsize - 64);
+ }
+ }
+ /* no frame pointer needed. */
+ else
+ {
+ /* In some cases we can perform the first callee register save
+ and allocating the stack frame at the same time. If so, just
+ make a note of it and defer allocating the frame until saving
+ the callee registers. */
+ if (VAL_14_BITS_P (-actual_fsize)
+ && local_fsize == 0
+ && ! profile_flag
+ && ! flag_pic)
+ merge_sp_adjust_with_store = 1;
+ /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
+ else if (actual_fsize != 0)
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ actual_fsize);
+ }
+ }
+
+ /* The hppa calling conventions say that %r19, the pic offset
+ register, is saved at sp - 32 (in this function's frame) when
+ generating PIC code. FIXME: What is the correct thing to do
+ for functions which make no calls and allocate no frame? Do
+ we need to allocate a frame, or can we just omit the save? For
+ now we'll just omit the save. */
+ if (actual_fsize != 0 && flag_pic)
+ store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
+
+ /* Profiling code.
+
+ Instead of taking one argument, the counter label, as most normal
+ mcounts do, _mcount appears to behave differently on the HPPA. It
+ takes the return address of the caller, the address of this routine,
+ and the address of the label. Also, it isn't magic, so
+ argument registers have to be preserved. */
+ if (profile_flag)
+ {
+ int pc_offset, i, arg_offset, basereg, offsetadj;
+
+ pc_offset = 4 + (frame_pointer_needed
+ ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
+ : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
+
+ /* When the function has a frame pointer, use it as the base
+ register for saving/restore registers. Else use the stack
+ pointer. Adjust the offset according to the frame size if
+ this function does not have a frame pointer. */
+
+ basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
+ : STACK_POINTER_REGNUM;
+ offsetadj = frame_pointer_needed ? 0 : actual_fsize;
+
+ /* Horrid hack. emit_function_prologue will modify this RTL in
+ place to get the expected results. sprintf here is just to
+ put something in the name. */
+ sprintf(hp_profile_label_name, "LP$%04d", -1);
+ hp_profile_label_rtx = gen_rtx_SYMBOL_REF (SImode, hp_profile_label_name);
+ if (current_function_returns_struct)
+ store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
+
+ for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
+ if (regs_ever_live [i])
+ {
+ store_reg (i, arg_offset, basereg);
+ /* Deal with arg_offset not fitting in 14 bits. */
+ pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
+ }
+
+ emit_move_insn (gen_rtx_REG (SImode, 26), gen_rtx_REG (SImode, 2));
+ emit_move_insn (tmpreg, gen_rtx_HIGH (SImode, hp_profile_label_rtx));
+ emit_move_insn (gen_rtx_REG (SImode, 24),
+ gen_rtx_LO_SUM (SImode, tmpreg, hp_profile_label_rtx));
+ /* %r25 is set from within the output pattern. */
+ emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
+
+ /* Restore argument registers. */
+ for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
+ if (regs_ever_live [i])
+ load_reg (i, arg_offset, basereg);
+
+ if (current_function_returns_struct)
+ load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
+
+ }
+
+ /* Normal register save.
+
+ Do not save the frame pointer in the frame_pointer_needed case. It
+ was done earlier. */
+ if (frame_pointer_needed)
+ {
+ for (i = 18, offset = local_fsize; i >= 4; i--)
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ {
+ store_reg (i, offset, FRAME_POINTER_REGNUM);
+ offset += 4;
+ gr_saved++;
+ }
+ /* Account for %r3 which is saved in a special place. */
+ gr_saved++;
+ }
+ /* No frame pointer needed. */
+ else
+ {
+ for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ {
+ /* If merge_sp_adjust_with_store is nonzero, then we can
+ optimize the first GR save. */
+ if (merge_sp_adjust_with_store)
+ {
+ merge_sp_adjust_with_store = 0;
+ emit_insn (gen_post_stwm (stack_pointer_rtx,
+ gen_rtx_REG (SImode, i),
+ GEN_INT (-offset)));
+ }
+ else
+ store_reg (i, offset, STACK_POINTER_REGNUM);
+ offset += 4;
+ gr_saved++;
+ }
+
+ /* If we wanted to merge the SP adjustment with a GR save, but we never
+ did any GR saves, then just emit the adjustment here. */
+ if (merge_sp_adjust_with_store)
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ actual_fsize);
+ }
+
+ /* Align pointer properly (doubleword boundary). */
+ offset = (offset + 7) & ~7;
+
+ /* Floating point register store. */
+ if (save_fregs)
+ {
+ /* First get the frame or stack pointer to the start of the FP register
+ save area. */
+ if (frame_pointer_needed)
+ set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
+ else
+ set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
+
+ /* Now actually save the FP registers. */
+ for (i = 66; i >= 48; i -= 2)
+ {
+ if (regs_ever_live[i] || regs_ever_live[i + 1])
+ {
+ emit_move_insn (gen_rtx_MEM (DFmode,
+ gen_rtx_POST_INC (DFmode, tmpreg)),
+ gen_rtx_REG (DFmode, i));
+ fr_saved++;
+ }
+ }
+ }
+
+ /* When generating PIC code it is necessary to save/restore the
+ PIC register around each function call. We used to do this
+ in the call patterns themselves, but that implementation
+ made incorrect assumptions about using global variables to hold
+ per-function rtl code generated in the backend.
+
+ So instead, we copy the PIC register into a reserved callee saved
+ register in the prologue. Then after each call we reload the PIC
+ register from the callee saved register. We also reload the PIC
+ register from the callee saved register in the epilogue ensure the
+ PIC register is valid at function exit.
+
+ This may (depending on the exact characteristics of the function)
+ even be more efficient.
+
+ Avoid this if the callee saved register wasn't used (these are
+ leaf functions). */
+ if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
+ emit_move_insn (gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
+ gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM));
+}
+
+
+void
+output_function_epilogue (file, size)
+ FILE *file;
+ int size ATTRIBUTE_UNUSED;
+{
+ rtx insn = get_last_insn ();
+
+ /* hppa_expand_epilogue does the dirty work now. We just need
+ to output the assembler directives which denote the end
+ of a function.
+
+ To make debuggers happy, emit a nop if the epilogue was completely
+ eliminated due to a volatile call as the last insn in the
+ current function. That way the return address (in %r2) will
+ always point to a valid instruction in the current function. */
+
+ /* Get the last real insn. */
+ if (GET_CODE (insn) == NOTE)
+ insn = prev_real_insn (insn);
+
+ /* If it is a sequence, then look inside. */
+ if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+ insn = XVECEXP (PATTERN (insn), 0, 0);
+
+ /* If insn is a CALL_INSN, then it must be a call to a volatile
+ function (otherwise there would be epilogue insns). */
+ if (insn && GET_CODE (insn) == CALL_INSN)
+ fputs ("\tnop\n", file);
+
+ fputs ("\t.EXIT\n\t.PROCEND\n", file);
+
+ /* Free up stuff we don't need anymore. */
+ if (unscaled_index_insn_codes)
+ free (unscaled_index_insn_codes);
+ max_unscaled_index_insn_codes_uid = 0;
+}
+
+void
+hppa_expand_epilogue ()
+{
+ rtx tmpreg;
+ int offset,i;
+ int merge_sp_adjust_with_load = 0;
+
+ /* Handle out of line prologues and epilogues. */
+ if (TARGET_SPACE && out_of_line_prologue_epilogue)
+ {
+ int saves = 0;
+ rtx operands[2];
+
+ /* Put the register save info into %r22. */
+ for (i = 18; i >= 3; i--)
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ {
+ saves = i;
+ break;
+ }
+
+ for (i = 66; i >= 48; i -= 2)
+ if (regs_ever_live[i] || regs_ever_live[i + 1])
+ {
+ saves |= ((i/2 - 12 ) << 16);
+ break;
+ }
+
+ emit_insn (gen_blockage ());
+
+ /* Put the local_fisze into %r19. */
+ operands[0] = gen_rtx_REG (SImode, 19);
+ operands[1] = GEN_INT (local_fsize);
+ emit_move_insn (operands[0], operands[1]);
+
+ /* Put the stack size into %r21. */
+ operands[0] = gen_rtx_REG (SImode, 21);
+ operands[1] = GEN_INT (actual_fsize);
+ emit_move_insn (operands[0], operands[1]);
+
+ operands[0] = gen_rtx_REG (SImode, 22);
+ operands[1] = GEN_INT (saves);
+ emit_move_insn (operands[0], operands[1]);
+
+ /* Now call the out-of-line epilogue. */
+ emit_insn (gen_outline_epilogue_call ());
+ return;
+ }
+
+ /* We will use this often. */
+ tmpreg = gen_rtx_REG (SImode, 1);
+
+ /* Try to restore RP early to avoid load/use interlocks when
+ RP gets used in the return (bv) instruction. This appears to still
+ be necessary even when we schedule the prologue and epilogue. */
+ if (frame_pointer_needed
+ && (regs_ever_live [2] || profile_flag))
+ load_reg (2, -20, FRAME_POINTER_REGNUM);
+
+ /* No frame pointer, and stack is smaller than 8k. */
+ else if (! frame_pointer_needed
+ && VAL_14_BITS_P (actual_fsize + 20)
+ && (regs_ever_live[2] || profile_flag))
+ load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
+
+ /* General register restores. */
+ if (frame_pointer_needed)
+ {
+ for (i = 18, offset = local_fsize; i >= 4; i--)
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ {
+ load_reg (i, offset, FRAME_POINTER_REGNUM);
+ offset += 4;
+ }
+ }
+ else
+ {
+ for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
+ {
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ {
+ /* Only for the first load.
+ merge_sp_adjust_with_load holds the register load
+ with which we will merge the sp adjustment. */
+ if (VAL_14_BITS_P (actual_fsize + 20)
+ && local_fsize == 0
+ && ! merge_sp_adjust_with_load)
+ merge_sp_adjust_with_load = i;
+ else
+ load_reg (i, offset, STACK_POINTER_REGNUM);
+ offset += 4;
+ }
+ }
+ }
+
+ /* Align pointer properly (doubleword boundary). */
+ offset = (offset + 7) & ~7;
+
+ /* FP register restores. */
+ if (save_fregs)
+ {
+ /* Adjust the register to index off of. */
+ if (frame_pointer_needed)
+ set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
+ else
+ set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
+
+ /* Actually do the restores now. */
+ for (i = 66; i >= 48; i -= 2)
+ {
+ if (regs_ever_live[i] || regs_ever_live[i + 1])
+ {
+ emit_move_insn (gen_rtx_REG (DFmode, i),
+ gen_rtx_MEM (DFmode,
+ gen_rtx_POST_INC (DFmode, tmpreg)));
+ }
+ }
+ }
+
+ /* Emit a blockage insn here to keep these insns from being moved to
+ an earlier spot in the epilogue, or into the main instruction stream.
+
+ This is necessary as we must not cut the stack back before all the
+ restores are finished. */
+ emit_insn (gen_blockage ());
+ /* No frame pointer, but we have a stack greater than 8k. We restore
+ %r2 very late in this case. (All other cases are restored as early
+ as possible.) */
+ if (! frame_pointer_needed
+ && ! VAL_14_BITS_P (actual_fsize + 20)
+ && (regs_ever_live[2] || profile_flag))
+ {
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ - actual_fsize);
+
+ /* This used to try and be clever by not depending on the value in
+ %r30 and instead use the value held in %r1 (so that the 2nd insn
+ which sets %r30 could be put in the delay slot of the return insn).
+
+ That won't work since if the stack is exactly 8k set_reg_plus_d
+ doesn't set %r1, just %r30. */
+ load_reg (2, - 20, STACK_POINTER_REGNUM);
+ }
+
+ /* Reset stack pointer (and possibly frame pointer). The stack
+ pointer is initially set to fp + 64 to avoid a race condition. */
+ else if (frame_pointer_needed)
+ {
+ set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
+ emit_insn (gen_pre_ldwm (frame_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-64)));
+ }
+ /* If we were deferring a callee register restore, do it now. */
+ else if (! frame_pointer_needed && merge_sp_adjust_with_load)
+ emit_insn (gen_pre_ldwm (gen_rtx_REG (SImode, merge_sp_adjust_with_load),
+ stack_pointer_rtx,
+ GEN_INT (- actual_fsize)));
+ else if (actual_fsize != 0)
+ set_reg_plus_d (STACK_POINTER_REGNUM,
+ STACK_POINTER_REGNUM,
+ - actual_fsize);
+}
+
+/* Fetch the return address for the frame COUNT steps up from
+ the current frame, after the prologue. FRAMEADDR is the
+ frame pointer of the COUNT frame.
+
+ We want to ignore any export stub remnants here.
+
+ The value returned is used in two different ways:
+
+ 1. To find a function's caller.
+
+ 2. To change the return address for a function.
+
+ This function handles most instances of case 1; however, it will
+ fail if there are two levels of stubs to execute on the return
+ path. The only way I believe that can happen is if the return value
+ needs a parameter relocation, which never happens for C code.
+
+ This function handles most instances of case 2; however, it will
+ fail if we did not originally have stub code on the return path
+ but will need code on the new return path. This can happen if
+ the caller & callee are both in the main program, but the new
+ return location is in a shared library.
+
+ To handle this correctly we need to set the return pointer at
+ frame-20 to point to a return stub frame-24 to point to the
+ location we wish to return to. */
+
+rtx
+return_addr_rtx (count, frameaddr)
+ int count ATTRIBUTE_UNUSED;
+ rtx frameaddr;
+{
+ rtx label;
+ rtx saved_rp;
+ rtx ins;
+
+ saved_rp = gen_reg_rtx (Pmode);
+
+ /* First, we start off with the normal return address pointer from
+ -20[frameaddr]. */
+
+ emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
+
+ /* Get pointer to the instruction stream. We have to mask out the
+ privilege level from the two low order bits of the return address
+ pointer here so that ins will point to the start of the first
+ instruction that would have been executed if we returned. */
+ ins = copy_to_reg (gen_rtx_AND (Pmode,
+ copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)),
+ MASK_RETURN_ADDR));
+ label = gen_label_rtx ();
+
+ /* Check the instruction stream at the normal return address for the
+ export stub:
+
+ 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
+ 0x004010a1 | stub+12: ldsid (sr0,rp),r1
+ 0x00011820 | stub+16: mtsp r1,sr0
+ 0xe0400002 | stub+20: be,n 0(sr0,rp)
+
+ If it is an export stub, than our return address is really in
+ -24[frameaddr]. */
+
+ emit_cmp_insn (gen_rtx_MEM (SImode, ins),
+ GEN_INT (0x4bc23fd1),
+ NE, NULL_RTX, SImode, 1, 0);
+ emit_jump_insn (gen_bne (label));
+
+ emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
+ GEN_INT (0x004010a1),
+ NE, NULL_RTX, SImode, 1, 0);
+ emit_jump_insn (gen_bne (label));
+
+ emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
+ GEN_INT (0x00011820),
+ NE, NULL_RTX, SImode, 1, 0);
+ emit_jump_insn (gen_bne (label));
+
+ emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
+ GEN_INT (0xe0400002),
+ NE, NULL_RTX, SImode, 1, 0);
+
+ /* If there is no export stub then just use our initial guess of
+ -20[frameaddr]. */
+
+ emit_jump_insn (gen_bne (label));
+
+ /* Here we know that our return address pointer points to an export
+ stub. We don't want to return the address of the export stub,
+ but rather the return address that leads back into user code.
+ That return address is stored at -24[frameaddr]. */
+
+ emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
+
+ emit_label (label);
+ return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp));
+}
+
+/* This is only valid once reload has completed because it depends on
+ knowing exactly how much (if any) frame there is and...
+
+ It's only valid if there is no frame marker to de-allocate and...
+
+ It's only valid if %r2 hasn't been saved into the caller's frame
+ (we're not profiling and %r2 isn't live anywhere). */
+int
+hppa_can_use_return_insn_p ()
+{
+ return (reload_completed
+ && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
+ && ! profile_flag
+ && ! regs_ever_live[2]
+ && ! frame_pointer_needed);
+}
+
+void
+emit_bcond_fp (code, operand0)
+ enum rtx_code code;
+ rtx operand0;
+{
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ gen_rtx_fmt_ee (code,
+ VOIDmode,
+ gen_rtx_REG (CCFPmode, 0),
+ const0_rtx),
+ gen_rtx_LABEL_REF (VOIDmode, operand0),
+ pc_rtx)));
+
+}
+
+rtx
+gen_cmp_fp (code, operand0, operand1)
+ enum rtx_code code;
+ rtx operand0, operand1;
+{
+ return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
+ gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
+}
+
+/* Adjust the cost of a scheduling dependency. Return the new cost of
+ a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
+
+int
+pa_adjust_cost (insn, link, dep_insn, cost)
+ rtx insn;
+ rtx link;
+ rtx dep_insn;
+ int cost;
+{
+ enum attr_type attr_type;
+
+ if (! recog_memoized (insn))
+ return 0;
+
+ /* CYGNUS LOCAL PA8000/law */
+ /* No cost adjustments are needed for the PA8000 */
+ if (pa_cpu == PROCESSOR_8000)
+ return 0;
+ /* END CYGNUS LOCAL */
+
+ attr_type = get_attr_type (insn);
+
+ if (REG_NOTE_KIND (link) == 0)
+ {
+ /* Data dependency; DEP_INSN writes a register that INSN reads some
+ cycles later. */
+
+ if (attr_type == TYPE_FPSTORE)
+ {
+ rtx pat = PATTERN (insn);
+ rtx dep_pat = PATTERN (dep_insn);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ /* This happens for the fstXs,mb patterns. */
+ pat = XVECEXP (pat, 0, 0);
+ }
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ /* If this happens, we have to extend this to schedule
+ optimally. Return 0 for now. */
+ return 0;
+
+ if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+ {
+ if (! recog_memoized (dep_insn))
+ return 0;
+ /* DEP_INSN is writing its result to the register
+ being stored in the fpstore INSN. */
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_FPLOAD:
+ /* This cost 3 cycles, not 2 as the md says for the
+ 700 and 7100. */
+ return cost + 1;
+
+ case TYPE_FPALU:
+ case TYPE_FPMULSGL:
+ case TYPE_FPMULDBL:
+ case TYPE_FPDIVSGL:
+ case TYPE_FPDIVDBL:
+ case TYPE_FPSQRTSGL:
+ case TYPE_FPSQRTDBL:
+ /* In these important cases, we save one cycle compared to
+ when flop instruction feed each other. */
+ return cost - 1;
+
+ default:
+ return cost;
+ }
+ }
+ }
+
+ /* For other data dependencies, the default cost specified in the
+ md is correct. */
+ return cost;
+ }
+ else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
+ {
+ /* Anti dependency; DEP_INSN reads a register that INSN writes some
+ cycles later. */
+
+ if (attr_type == TYPE_FPLOAD)
+ {
+ rtx pat = PATTERN (insn);
+ rtx dep_pat = PATTERN (dep_insn);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ /* This happens for the fldXs,mb patterns. */
+ pat = XVECEXP (pat, 0, 0);
+ }
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ /* If this happens, we have to extend this to schedule
+ optimally. Return 0 for now. */
+ return 0;
+
+ if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
+ {
+ if (! recog_memoized (dep_insn))
+ return 0;
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_FPALU:
+ case TYPE_FPMULSGL:
+ case TYPE_FPMULDBL:
+ case TYPE_FPDIVSGL:
+ case TYPE_FPDIVDBL:
+ case TYPE_FPSQRTSGL:
+ case TYPE_FPSQRTDBL:
+ /* A fpload can't be issued until one cycle before a
+ preceding arithmetic operation has finished if
+ the target of the fpload is any of the sources
+ (or destination) of the arithmetic operation. */
+ return cost - 1;
+
+ default:
+ return 0;
+ }
+ }
+ }
+ else if (attr_type == TYPE_FPALU)
+ {
+ rtx pat = PATTERN (insn);
+ rtx dep_pat = PATTERN (dep_insn);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ /* This happens for the fldXs,mb patterns. */
+ pat = XVECEXP (pat, 0, 0);
+ }
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ /* If this happens, we have to extend this to schedule
+ optimally. Return 0 for now. */
+ return 0;
+
+ if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
+ {
+ if (! recog_memoized (dep_insn))
+ return 0;
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_FPDIVSGL:
+ case TYPE_FPDIVDBL:
+ case TYPE_FPSQRTSGL:
+ case TYPE_FPSQRTDBL:
+ /* An ALU flop can't be issued until two cycles before a
+ preceding divide or sqrt operation has finished if
+ the target of the ALU flop is any of the sources
+ (or destination) of the divide or sqrt operation. */
+ return cost - 2;
+
+ default:
+ return 0;
+ }
+ }
+ }
+
+ /* For other anti dependencies, the cost is 0. */
+ return 0;
+ }
+ else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+ {
+ /* Output dependency; DEP_INSN writes a register that INSN writes some
+ cycles later. */
+ if (attr_type == TYPE_FPLOAD)
+ {
+ rtx pat = PATTERN (insn);
+ rtx dep_pat = PATTERN (dep_insn);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ /* This happens for the fldXs,mb patterns. */
+ pat = XVECEXP (pat, 0, 0);
+ }
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ /* If this happens, we have to extend this to schedule
+ optimally. Return 0 for now. */
+ return 0;
+
+ if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
+ {
+ if (! recog_memoized (dep_insn))
+ return 0;
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_FPALU:
+ case TYPE_FPMULSGL:
+ case TYPE_FPMULDBL:
+ case TYPE_FPDIVSGL:
+ case TYPE_FPDIVDBL:
+ case TYPE_FPSQRTSGL:
+ case TYPE_FPSQRTDBL:
+ /* A fpload can't be issued until one cycle before a
+ preceding arithmetic operation has finished if
+ the target of the fpload is the destination of the
+ arithmetic operation. */
+ return cost - 1;
+
+ default:
+ return 0;
+ }
+ }
+ }
+ else if (attr_type == TYPE_FPALU)
+ {
+ rtx pat = PATTERN (insn);
+ rtx dep_pat = PATTERN (dep_insn);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ /* This happens for the fldXs,mb patterns. */
+ pat = XVECEXP (pat, 0, 0);
+ }
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ /* If this happens, we have to extend this to schedule
+ optimally. Return 0 for now. */
+ return 0;
+
+ if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
+ {
+ if (! recog_memoized (dep_insn))
+ return 0;
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_FPDIVSGL:
+ case TYPE_FPDIVDBL:
+ case TYPE_FPSQRTSGL:
+ case TYPE_FPSQRTDBL:
+ /* An ALU flop can't be issued until two cycles before a
+ preceding divide or sqrt operation has finished if
+ the target of the ALU flop is also the target of
+ the divide or sqrt operation. */
+ return cost - 2;
+
+ default:
+ return 0;
+ }
+ }
+ }
+
+ /* For other output dependencies, the cost is 0. */
+ return 0;
+ }
+ else
+ abort ();
+}
+
+/* Return any length adjustment needed by INSN which already has its length
+ computed as LENGTH. Return zero if no adjustment is necessary.
+
+ For the PA: function calls, millicode calls, and backwards short
+ conditional branches with unfilled delay slots need an adjustment by +1
+ (to account for the NOP which will be inserted into the instruction stream).
+
+ Also compute the length of an inline block move here as it is too
+ complicated to express as a length attribute in pa.md. */
+int
+pa_adjust_insn_length (insn, length)
+ rtx insn;
+ int length;
+{
+ rtx pat = PATTERN (insn);
+
+ /* Call insns which are *not* indirect and have unfilled delay slots. */
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+
+ if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
+ && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
+ return 4;
+ else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
+ && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
+ == SYMBOL_REF)
+ return 4;
+ else
+ return 0;
+ }
+ /* Jumps inside switch tables which have unfilled delay slots
+ also need adjustment. */
+ else if (GET_CODE (insn) == JUMP_INSN
+ && simplejump_p (insn)
+ && GET_MODE (insn) == SImode)
+ return 4;
+ /* Millicode insn with an unfilled delay slot. */
+ else if (GET_CODE (insn) == INSN
+ && GET_CODE (pat) != SEQUENCE
+ && GET_CODE (pat) != USE
+ && GET_CODE (pat) != CLOBBER
+ && get_attr_type (insn) == TYPE_MILLI)
+ return 4;
+ /* Block move pattern. */
+ else if (GET_CODE (insn) == INSN
+ && GET_CODE (pat) == PARALLEL
+ && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
+ && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
+ && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
+ && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
+ return compute_movstrsi_length (insn) - 4;
+ /* Conditional branch with an unfilled delay slot. */
+ else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
+ {
+ /* Adjust a short backwards conditional with an unfilled delay slot. */
+ if (GET_CODE (pat) == SET
+ && length == 4
+ && ! forward_branch_p (insn))
+ return 4;
+ else if (GET_CODE (pat) == PARALLEL
+ && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
+ && length == 4)
+ return 4;
+ /* Adjust dbra insn with short backwards conditional branch with
+ unfilled delay slot -- only for case where counter is in a
+ general register register. */
+ else if (GET_CODE (pat) == PARALLEL
+ && GET_CODE (XVECEXP (pat, 0, 1)) == SET
+ && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
+ && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
+ && length == 4
+ && ! forward_branch_p (insn))
+ return 4;
+ else
+ return 0;
+ }
+ return 0;
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+ CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+ For `%' followed by punctuation, CODE is the punctuation and X is null. */
+
+void
+print_operand (file, x, code)
+ FILE *file;
+ rtx x;
+ int code;
+{
+ switch (code)
+ {
+ case '#':
+ /* Output a 'nop' if there's nothing for the delay slot. */
+ if (dbr_sequence_length () == 0)
+ fputs ("\n\tnop", file);
+ return;
+ case '*':
+ /* Output an nullification completer if there's nothing for the */
+ /* delay slot or nullification is requested. */
+ if (dbr_sequence_length () == 0 ||
+ (final_sequence &&
+ INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
+ fputs (",n", file);
+ return;
+ case 'R':
+ /* Print out the second register name of a register pair.
+ I.e., R (6) => 7. */
+ fputs (reg_names[REGNO (x)+1], file);
+ return;
+ case 'r':
+ /* A register or zero. */
+ if (x == const0_rtx
+ || (x == CONST0_RTX (DFmode))
+ || (x == CONST0_RTX (SFmode)))
+ {
+ fputs ("0", file);
+ return;
+ }
+ else
+ break;
+ case 'C': /* Plain (C)ondition */
+ case 'X':
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("=", file); break;
+ case NE:
+ fputs ("<>", file); break;
+ case GT:
+ fputs (">", file); break;
+ case GE:
+ fputs (">=", file); break;
+ case GEU:
+ fputs (">>=", file); break;
+ case GTU:
+ fputs (">>", file); break;
+ case LT:
+ fputs ("<", file); break;
+ case LE:
+ fputs ("<=", file); break;
+ case LEU:
+ fputs ("<<=", file); break;
+ case LTU:
+ fputs ("<<", file); break;
+ default:
+ abort ();
+ }
+ return;
+ case 'N': /* Condition, (N)egated */
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("<>", file); break;
+ case NE:
+ fputs ("=", file); break;
+ case GT:
+ fputs ("<=", file); break;
+ case GE:
+ fputs ("<", file); break;
+ case GEU:
+ fputs ("<<", file); break;
+ case GTU:
+ fputs ("<<=", file); break;
+ case LT:
+ fputs (">=", file); break;
+ case LE:
+ fputs (">", file); break;
+ case LEU:
+ fputs (">>", file); break;
+ case LTU:
+ fputs (">>=", file); break;
+ default:
+ abort ();
+ }
+ return;
+ /* For floating point comparisons. Need special conditions to deal
+ with NaNs properly. */
+ case 'Y':
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("!=", file); break;
+ case NE:
+ fputs ("=", file); break;
+ case GT:
+ fputs ("<=", file); break;
+ case GE:
+ fputs ("<", file); break;
+ case LT:
+ fputs (">=", file); break;
+ case LE:
+ fputs (">", file); break;
+ default:
+ abort ();
+ }
+ return;
+ case 'S': /* Condition, operands are (S)wapped. */
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("=", file); break;
+ case NE:
+ fputs ("<>", file); break;
+ case GT:
+ fputs ("<", file); break;
+ case GE:
+ fputs ("<=", file); break;
+ case GEU:
+ fputs ("<<=", file); break;
+ case GTU:
+ fputs ("<<", file); break;
+ case LT:
+ fputs (">", file); break;
+ case LE:
+ fputs (">=", file); break;
+ case LEU:
+ fputs (">>=", file); break;
+ case LTU:
+ fputs (">>", file); break;
+ default:
+ abort ();
+ }
+ return;
+ case 'B': /* Condition, (B)oth swapped and negate. */
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("<>", file); break;
+ case NE:
+ fputs ("=", file); break;
+ case GT:
+ fputs (">=", file); break;
+ case GE:
+ fputs (">", file); break;
+ case GEU:
+ fputs (">>", file); break;
+ case GTU:
+ fputs (">>=", file); break;
+ case LT:
+ fputs ("<=", file); break;
+ case LE:
+ fputs ("<", file); break;
+ case LEU:
+ fputs ("<<", file); break;
+ case LTU:
+ fputs ("<<=", file); break;
+ default:
+ abort ();
+ }
+ return;
+ case 'k':
+ if (GET_CODE (x) == CONST_INT)
+ {
+ fprintf (file, "%d", ~INTVAL (x));
+ return;
+ }
+ abort();
+ case 'L':
+ if (GET_CODE (x) == CONST_INT)
+ {
+ fprintf (file, "%d", 32 - (INTVAL (x) & 31));
+ return;
+ }
+ abort();
+ case 'O':
+ if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
+ {
+ fprintf (file, "%d", exact_log2 (INTVAL (x)));
+ return;
+ }
+ abort();
+ case 'P':
+ if (GET_CODE (x) == CONST_INT)
+ {
+ fprintf (file, "%d", 31 - (INTVAL (x) & 31));
+ return;
+ }
+ abort();
+ case 'I':
+ if (GET_CODE (x) == CONST_INT)
+ fputs ("i", file);
+ return;
+ case 'M':
+ case 'F':
+ switch (GET_CODE (XEXP (x, 0)))
+ {
+ case PRE_DEC:
+ case PRE_INC:
+ fputs ("s,mb", file);
+ break;
+ case POST_DEC:
+ case POST_INC:
+ fputs ("s,ma", file);
+ break;
+ case PLUS:
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
+ fputs ("x,s", file);
+ else if (code == 'F')
+ fputs ("s", file);
+ break;
+ default:
+ if (code == 'F')
+ fputs ("s", file);
+ break;
+ }
+ return;
+ case 'G':
+ output_global_address (file, x, 0);
+ return;
+ case 'H':
+ output_global_address (file, x, 1);
+ return;
+ case 0: /* Don't do anything special */
+ break;
+ case 'Z':
+ {
+ unsigned op[3];
+ compute_zdepi_operands (INTVAL (x), op);
+ fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
+ return;
+ }
+ default:
+ abort ();
+ }
+ if (GET_CODE (x) == REG)
+ {
+ fputs (reg_names [REGNO (x)], file);
+ if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0)
+ fputs ("L", file);
+ }
+ else if (GET_CODE (x) == MEM)
+ {
+ int size = GET_MODE_SIZE (GET_MODE (x));
+ rtx base = XEXP (XEXP (x, 0), 0);
+ switch (GET_CODE (XEXP (x, 0)))
+ {
+ case PRE_DEC:
+ case POST_DEC:
+ fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
+ break;
+ case PRE_INC:
+ case POST_INC:
+ fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
+ break;
+ default:
+ if (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
+ fprintf (file, "%s(0,%s)",
+ reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
+ reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
+ else if (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
+ fprintf (file, "%s(0,%s)",
+ reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
+ reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
+ else
+ output_address (XEXP (x, 0));
+ break;
+ }
+ }
+ else
+ output_addr_const (file, x);
+}
+
+/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
+
+void
+output_global_address (file, x, round_constant)
+ FILE *file;
+ rtx x;
+ int round_constant;
+{
+
+ /* Imagine (high (const (plus ...))). */
+ if (GET_CODE (x) == HIGH)
+ x = XEXP (x, 0);
+
+ if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
+ assemble_name (file, XSTR (x, 0));
+ else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
+ {
+ assemble_name (file, XSTR (x, 0));
+ fputs ("-$global$", file);
+ }
+ else if (GET_CODE (x) == CONST)
+ {
+ char *sep = "";
+ int offset = 0; /* assembler wants -$global$ at end */
+ rtx base = NULL_RTX;
+
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
+ {
+ base = XEXP (XEXP (x, 0), 0);
+ output_addr_const (file, base);
+ }
+ else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
+ offset = INTVAL (XEXP (XEXP (x, 0), 0));
+ else abort ();
+
+ if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
+ {
+ base = XEXP (XEXP (x, 0), 1);
+ output_addr_const (file, base);
+ }
+ else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+ offset = INTVAL (XEXP (XEXP (x, 0),1));
+ else abort ();
+
+ /* How bogus. The compiler is apparently responsible for
+ rounding the constant if it uses an LR field selector.
+
+ The linker and/or assembler seem a better place since
+ they have to do this kind of thing already.
+
+ If we fail to do this, HP's optimizing linker may eliminate
+ an addil, but not update the ldw/stw/ldo instruction that
+ uses the result of the addil. */
+ if (round_constant)
+ offset = ((offset + 0x1000) & ~0x1fff);
+
+ if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ if (offset < 0)
+ {
+ offset = -offset;
+ sep = "-";
+ }
+ else
+ sep = "+";
+ }
+ else if (GET_CODE (XEXP (x, 0)) == MINUS
+ && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
+ sep = "-";
+ else abort ();
+
+ if (!read_only_operand (base) && !flag_pic)
+ fputs ("-$global$", file);
+ if (offset)
+ fprintf (file,"%s%d", sep, offset);
+ }
+ else
+ output_addr_const (file, x);
+}
+
+void
+output_deferred_plabels (file)
+ FILE *file;
+{
+ int i;
+ /* If we have deferred plabels, then we need to switch into the data
+ section and align it to a 4 byte boundary before we output the
+ deferred plabels. */
+ if (n_deferred_plabels)
+ {
+ data_section ();
+ ASM_OUTPUT_ALIGN (file, 2);
+ }
+
+ /* Now output the deferred plabels. */
+ for (i = 0; i < n_deferred_plabels; i++)
+ {
+ ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
+ assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode,
+ deferred_plabels[i].name), 4, 1);
+ }
+}
+
+/* HP's millicode routines mean something special to the assembler.
+ Keep track of which ones we have used. */
+
+enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
+static char imported[(int)end1000];
+static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
+static char import_string[] = ".IMPORT $$....,MILLICODE";
+#define MILLI_START 10
+
+static void
+import_milli (code)
+ enum millicodes code;
+{
+ char str[sizeof (import_string)];
+
+ if (!imported[(int)code])
+ {
+ imported[(int)code] = 1;
+ strcpy (str, import_string);
+ strncpy (str + MILLI_START, milli_names[(int)code], 4);
+ output_asm_insn (str, 0);
+ }
+}
+
+/* The register constraints have put the operands and return value in
+ the proper registers. */
+
+char *
+output_mul_insn (unsignedp, insn)
+ int unsignedp ATTRIBUTE_UNUSED;
+ rtx insn;
+{
+ import_milli (mulI);
+ return output_millicode_call (insn, gen_rtx_SYMBOL_REF (SImode, "$$mulI"));
+}
+
+/* Emit the rtl for doing a division by a constant. */
+
+/* Do magic division millicodes exist for this value? */
+static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
+ 1, 1};
+
+/* We'll use an array to keep track of the magic millicodes and
+ whether or not we've used them already. [n][0] is signed, [n][1] is
+ unsigned. */
+
+static int div_milli[16][2];
+
+int
+div_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ return (mode == SImode
+ && ((GET_CODE (op) == REG && REGNO (op) == 25)
+ || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
+ && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
+}
+
+int
+emit_hpdiv_const (operands, unsignedp)
+ rtx *operands;
+ int unsignedp;
+{
+ if (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) < 16
+ && magic_milli[INTVAL (operands[2])])
+ {
+ emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
+ emit
+ (gen_rtx
+ (PARALLEL, VOIDmode,
+ gen_rtvec (5, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
+ gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
+ SImode,
+ gen_rtx_REG (SImode, 26),
+ operands[2])),
+ gen_rtx_CLOBBER (VOIDmode, operands[3]),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31)))));
+ emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
+ return 1;
+ }
+ return 0;
+}
+
+char *
+output_div_insn (operands, unsignedp, insn)
+ rtx *operands;
+ int unsignedp;
+ rtx insn;
+{
+ int divisor;
+
+ /* If the divisor is a constant, try to use one of the special
+ opcodes .*/
+ if (GET_CODE (operands[0]) == CONST_INT)
+ {
+ static char buf[100];
+ divisor = INTVAL (operands[0]);
+ if (!div_milli[divisor][unsignedp])
+ {
+ div_milli[divisor][unsignedp] = 1;
+ if (unsignedp)
+ output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
+ else
+ output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
+ }
+ if (unsignedp)
+ {
+ sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
+ return output_millicode_call (insn,
+ gen_rtx_SYMBOL_REF (SImode, buf));
+ }
+ else
+ {
+ sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
+ return output_millicode_call (insn,
+ gen_rtx_SYMBOL_REF (SImode, buf));
+ }
+ }
+ /* Divisor isn't a special constant. */
+ else
+ {
+ if (unsignedp)
+ {
+ import_milli (divU);
+ return output_millicode_call (insn,
+ gen_rtx_SYMBOL_REF (SImode, "$$divU"));
+ }
+ else
+ {
+ import_milli (divI);
+ return output_millicode_call (insn,
+ gen_rtx_SYMBOL_REF (SImode, "$$divI"));
+ }
+ }
+}
+
+/* Output a $$rem millicode to do mod. */
+
+char *
+output_mod_insn (unsignedp, insn)
+ int unsignedp;
+ rtx insn;
+{
+ if (unsignedp)
+ {
+ import_milli (remU);
+ return output_millicode_call (insn,
+ gen_rtx_SYMBOL_REF (SImode, "$$remU"));
+ }
+ else
+ {
+ import_milli (remI);
+ return output_millicode_call (insn,
+ gen_rtx_SYMBOL_REF (SImode, "$$remI"));
+ }
+}
+
+void
+output_arg_descriptor (call_insn)
+ rtx call_insn;
+{
+ char *arg_regs[4];
+ enum machine_mode arg_mode;
+ rtx link;
+ int i, output_flag = 0;
+ int regno;
+
+ for (i = 0; i < 4; i++)
+ arg_regs[i] = 0;
+
+ /* Specify explicitly that no argument relocations should take place
+ if using the portable runtime calling conventions. */
+ if (TARGET_PORTABLE_RUNTIME)
+ {
+ fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
+ asm_out_file);
+ return;
+ }
+
+ if (GET_CODE (call_insn) != CALL_INSN)
+ abort ();
+ for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1))
+ {
+ rtx use = XEXP (link, 0);
+
+ if (! (GET_CODE (use) == USE
+ && GET_CODE (XEXP (use, 0)) == REG
+ && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+ continue;
+
+ arg_mode = GET_MODE (XEXP (use, 0));
+ regno = REGNO (XEXP (use, 0));
+ if (regno >= 23 && regno <= 26)
+ {
+ arg_regs[26 - regno] = "GR";
+ if (arg_mode == DImode)
+ arg_regs[25 - regno] = "GR";
+ }
+ else if (regno >= 32 && regno <= 39)
+ {
+ if (arg_mode == SFmode)
+ arg_regs[(regno - 32) / 2] = "FR";
+ else
+ {
+#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
+ arg_regs[(regno - 34) / 2] = "FR";
+ arg_regs[(regno - 34) / 2 + 1] = "FU";
+#else
+ arg_regs[(regno - 34) / 2] = "FU";
+ arg_regs[(regno - 34) / 2 + 1] = "FR";
+#endif
+ }
+ }
+ }
+ fputs ("\t.CALL ", asm_out_file);
+ for (i = 0; i < 4; i++)
+ {
+ if (arg_regs[i])
+ {
+ if (output_flag++)
+ fputc (',', asm_out_file);
+ fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
+ }
+ }
+ fputc ('\n', asm_out_file);
+}
+
+/* Return the class of any secondary reload register that is needed to
+ move IN into a register in class CLASS using mode MODE.
+
+ Profiling has showed this routine and its descendants account for
+ a significant amount of compile time (~7%). So it has been
+ optimized to reduce redundant computations and eliminate useless
+ function calls.
+
+ It might be worthwhile to try and make this a leaf function too. */
+
+enum reg_class
+secondary_reload_class (class, mode, in)
+ enum reg_class class;
+ enum machine_mode mode;
+ rtx in;
+{
+ int regno, is_symbolic;
+
+ /* Trying to load a constant into a FP register during PIC code
+ generation will require %r1 as a scratch register. */
+ if (flag_pic == 2
+ && GET_MODE_CLASS (mode) == MODE_INT
+ && FP_REG_CLASS_P (class)
+ && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE))
+ return R1_REGS;
+
+ /* Profiling showed the PA port spends about 1.3% of its compilation
+ time in true_regnum from calls inside secondary_reload_class. */
+
+ if (GET_CODE (in) == REG)
+ {
+ regno = REGNO (in);
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ regno = true_regnum (in);
+ }
+ else if (GET_CODE (in) == SUBREG)
+ regno = true_regnum (in);
+ else
+ regno = -1;
+
+ /* If we have something like (mem (mem (...)), we can safely assume the
+ inner MEM will end up in a general register after reloading, so there's
+ no need for a secondary reload. */
+ if (GET_CODE (in) == MEM
+ && GET_CODE (XEXP (in, 0)) == MEM)
+ return NO_REGS;
+
+ /* Handle out of range displacement for integer mode loads/stores of
+ FP registers. */
+ if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
+ && GET_MODE_CLASS (mode) == MODE_INT
+ && FP_REG_CLASS_P (class))
+ || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
+ return GENERAL_REGS;
+
+ if (GET_CODE (in) == HIGH)
+ in = XEXP (in, 0);
+
+ /* Profiling has showed GCC spends about 2.6% of its compilation
+ time in symbolic_operand from calls inside secondary_reload_class.
+
+ We use an inline copy and only compute its return value once to avoid
+ useless work. */
+ switch (GET_CODE (in))
+ {
+ rtx tmp;
+
+ case SYMBOL_REF:
+ case LABEL_REF:
+ is_symbolic = 1;
+ break;
+ case CONST:
+ tmp = XEXP (in, 0);
+ is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF
+ || GET_CODE (XEXP (tmp, 0)) == LABEL_REF)
+ && GET_CODE (XEXP (tmp, 1)) == CONST_INT);
+ break;
+
+ default:
+ is_symbolic = 0;
+ break;
+ }
+
+ if (!flag_pic
+ && is_symbolic
+ && read_only_operand (in))
+ return NO_REGS;
+
+ if (class != R1_REGS && is_symbolic)
+ return R1_REGS;
+
+ return NO_REGS;
+}
+
+enum direction
+function_arg_padding (mode, type)
+ enum machine_mode mode;
+ tree type;
+{
+ int size;
+
+ if (mode == BLKmode)
+ {
+ if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
+ size = int_size_in_bytes (type) * BITS_PER_UNIT;
+ else
+ return upward; /* Don't know if this is right, but */
+ /* same as old definition. */
+ }
+ else
+ size = GET_MODE_BITSIZE (mode);
+ if (size < PARM_BOUNDARY)
+ return downward;
+ else if (size % PARM_BOUNDARY)
+ return upward;
+ else
+ return none;
+}
+
+
+/* Do what is necessary for `va_start'. The argument is ignored;
+ We look at the current function to determine if stdargs or varargs
+ is used and fill in an initial va_list. A pointer to this constructor
+ is returned. */
+
+struct rtx_def *
+hppa_builtin_saveregs (arglist)
+ tree arglist ATTRIBUTE_UNUSED;
+{
+ rtx offset, dest;
+ tree fntype = TREE_TYPE (current_function_decl);
+ int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+ != void_type_node)))
+ ? UNITS_PER_WORD : 0);
+
+ if (argadj)
+ offset = plus_constant (current_function_arg_offset_rtx, argadj);
+ else
+ offset = current_function_arg_offset_rtx;
+
+ /* Store general registers on the stack. */
+ dest = gen_rtx_MEM (BLKmode,
+ plus_constant (current_function_internal_arg_pointer,
+ -16));
+ move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD);
+
+ /* move_block_from_reg will emit code to store the argument registers
+ individually as scalar stores.
+
+ However, other insns may later load from the same addresses for
+ a structure load (passing a struct to a varargs routine).
+
+ The alias code assumes that such aliasing can never happen, so we
+ have to keep memory referencing insns from moving up beyond the
+ last argument register store. So we emit a blockage insn here. */
+ emit_insn (gen_blockage ());
+
+ if (current_function_check_memory_usage)
+ emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
+ dest, ptr_mode,
+ GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype),
+ GEN_INT (MEMORY_USE_RW),
+ TYPE_MODE (integer_type_node));
+
+ return copy_to_reg (expand_binop (Pmode, add_optab,
+ current_function_internal_arg_pointer,
+ offset, 0, 0, OPTAB_LIB_WIDEN));
+}
+
+/* This routine handles all the normal conditional branch sequences we
+ might need to generate. It handles compare immediate vs compare
+ register, nullification of delay slots, varying length branches,
+ negated branches, and all combinations of the above. It returns the
+ output appropriate to emit the branch corresponding to all given
+ parameters. */
+
+char *
+output_cbranch (operands, nullify, length, negated, insn)
+ rtx *operands;
+ int nullify, length, negated;
+ rtx insn;
+{
+ static char buf[100];
+ int useskip = 0;
+
+ /* A conditional branch to the following instruction (eg the delay slot) is
+ asking for a disaster. This can happen when not optimizing.
+
+ In such cases it is safe to emit nothing. */
+
+ if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+ return "";
+
+ /* If this is a long branch with its delay slot unfilled, set `nullify'
+ as it can nullify the delay slot and save a nop. */
+ if (length == 8 && dbr_sequence_length () == 0)
+ nullify = 1;
+
+ /* If this is a short forward conditional branch which did not get
+ its delay slot filled, the delay slot can still be nullified. */
+ if (! nullify && length == 4 && dbr_sequence_length () == 0)
+ nullify = forward_branch_p (insn);
+
+ /* A forward branch over a single nullified insn can be done with a
+ comclr instruction. This avoids a single cycle penalty due to
+ mis-predicted branch if we fall through (branch not taken). */
+ if (length == 4
+ && next_real_insn (insn) != 0
+ && get_attr_length (next_real_insn (insn)) == 4
+ && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
+ && nullify)
+ useskip = 1;
+
+ switch (length)
+ {
+ /* All short conditional branches except backwards with an unfilled
+ delay slot. */
+ case 4:
+ if (useskip)
+ strcpy (buf, "com%I2clr,");
+ else
+ strcpy (buf, "com%I2b,");
+ if (negated)
+ strcat (buf, "%B3");
+ else
+ strcat (buf, "%S3");
+ if (useskip)
+ strcat (buf, " %2,%r1,0");
+ else if (nullify)
+ strcat (buf, ",n %2,%r1,%0");
+ else
+ strcat (buf, " %2,%r1,%0");
+ break;
+
+ /* All long conditionals. Note an short backward branch with an
+ unfilled delay slot is treated just like a long backward branch
+ with an unfilled delay slot. */
+ case 8:
+ /* Handle weird backwards branch with a filled delay slot
+ with is nullified. */
+ if (dbr_sequence_length () != 0
+ && ! forward_branch_p (insn)
+ && nullify)
+ {
+ strcpy (buf, "com%I2b,");
+ if (negated)
+ strcat (buf, "%S3");
+ else
+ strcat (buf, "%B3");
+ strcat (buf, ",n %2,%r1,.+12\n\tbl %0,0");
+ }
+ /* Handle short backwards branch with an unfilled delay slot.
+ Using a comb;nop rather than comiclr;bl saves 1 cycle for both
+ taken and untaken branches. */
+ else if (dbr_sequence_length () == 0
+ && ! forward_branch_p (insn)
+ && insn_addresses
+ && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
+ - insn_addresses[INSN_UID (insn)] - 8))
+ {
+ strcpy (buf, "com%I2b,");
+ if (negated)
+ strcat (buf, "%B3 %2,%r1,%0%#");
+ else
+ strcat (buf, "%S3 %2,%r1,%0%#");
+ }
+ else
+ {
+ strcpy (buf, "com%I2clr,");
+ if (negated)
+ strcat (buf, "%S3");
+ else
+ strcat (buf, "%B3");
+ if (nullify)
+ strcat (buf, " %2,%r1,0\n\tbl,n %0,0");
+ else
+ strcat (buf, " %2,%r1,0\n\tbl %0,0");
+ }
+ break;
+
+ case 20:
+ /* Very long branch. Right now we only handle these when not
+ optimizing. See "jump" pattern in pa.md for details. */
+ if (optimize)
+ abort ();
+
+ /* Create a reversed conditional branch which branches around
+ the following insns. */
+ if (negated)
+ strcpy (buf, "com%I2b,%S3,n %2,%r1,.+20");
+ else
+ strcpy (buf, "com%I2b,%B3,n %2,%r1,.+20");
+ output_asm_insn (buf, operands);
+
+ /* Output an insn to save %r1. */
+ output_asm_insn ("stw %%r1,-16(%%r30)", operands);
+
+ /* Now output a very long branch to the original target. */
+ output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
+
+ /* Now restore the value of %r1 in the delay slot. We're not
+ optimizing so we know nothing else can be in the delay slot. */
+ return "ldw -16(%%r30),%%r1";
+
+ case 28:
+ /* Very long branch when generating PIC code. Right now we only
+ handle these when not optimizing. See "jump" pattern in pa.md
+ for details. */
+ if (optimize)
+ abort ();
+
+ /* Create a reversed conditional branch which branches around
+ the following insns. */
+ if (negated)
+ strcpy (buf, "com%I2b,%S3,n %2,%r1,.+28");
+ else
+ strcpy (buf, "com%I2b,%B3,n %2,%r1,.+28");
+ output_asm_insn (buf, operands);
+
+ /* Output an insn to save %r1. */
+ output_asm_insn ("stw %%r1,-16(%%r30)", operands);
+
+ /* Now output a very long PIC branch to the original target. */
+ {
+ rtx xoperands[5];
+
+ xoperands[0] = operands[0];
+ xoperands[1] = operands[1];
+ xoperands[2] = operands[2];
+ xoperands[3] = operands[3];
+ xoperands[4] = gen_label_rtx ();
+
+ output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (xoperands[4]));
+ output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands);
+ }
+
+ /* Now restore the value of %r1 in the delay slot. We're not
+ optimizing so we know nothing else can be in the delay slot. */
+ return "ldw -16(%%r30),%%r1";
+
+ default:
+ abort();
+ }
+ return buf;
+}
+
+/* This routine handles all the branch-on-bit conditional branch sequences we
+ might need to generate. It handles nullification of delay slots,
+ varying length branches, negated branches and all combinations of the
+ above. it returns the appropriate output template to emit the branch. */
+
+char *
+output_bb (operands, nullify, length, negated, insn, which)
+ rtx *operands ATTRIBUTE_UNUSED;
+ int nullify, length, negated;
+ rtx insn;
+ int which;
+{
+ static char buf[100];
+ int useskip = 0;
+
+ /* A conditional branch to the following instruction (eg the delay slot) is
+ asking for a disaster. I do not think this can happen as this pattern
+ is only used when optimizing; jump optimization should eliminate the
+ jump. But be prepared just in case. */
+
+ if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+ return "";
+
+ /* If this is a long branch with its delay slot unfilled, set `nullify'
+ as it can nullify the delay slot and save a nop. */
+ if (length == 8 && dbr_sequence_length () == 0)
+ nullify = 1;
+
+ /* If this is a short forward conditional branch which did not get
+ its delay slot filled, the delay slot can still be nullified. */
+ if (! nullify && length == 4 && dbr_sequence_length () == 0)
+ nullify = forward_branch_p (insn);
+
+ /* A forward branch over a single nullified insn can be done with a
+ extrs instruction. This avoids a single cycle penalty due to
+ mis-predicted branch if we fall through (branch not taken). */
+
+ if (length == 4
+ && next_real_insn (insn) != 0
+ && get_attr_length (next_real_insn (insn)) == 4
+ && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
+ && nullify)
+ useskip = 1;
+
+ switch (length)
+ {
+
+ /* All short conditional branches except backwards with an unfilled
+ delay slot. */
+ case 4:
+ if (useskip)
+ strcpy (buf, "extrs,");
+ else
+ strcpy (buf, "bb,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, ">=");
+ else
+ strcat (buf, "<");
+ if (useskip)
+ strcat (buf, " %0,%1,1,0");
+ else if (nullify && negated)
+ strcat (buf, ",n %0,%1,%3");
+ else if (nullify && ! negated)
+ strcat (buf, ",n %0,%1,%2");
+ else if (! nullify && negated)
+ strcat (buf, "%0,%1,%3");
+ else if (! nullify && ! negated)
+ strcat (buf, " %0,%1,%2");
+ break;
+
+ /* All long conditionals. Note an short backward branch with an
+ unfilled delay slot is treated just like a long backward branch
+ with an unfilled delay slot. */
+ case 8:
+ /* Handle weird backwards branch with a filled delay slot
+ with is nullified. */
+ if (dbr_sequence_length () != 0
+ && ! forward_branch_p (insn)
+ && nullify)
+ {
+ strcpy (buf, "bb,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, "<");
+ else
+ strcat (buf, ">=");
+ if (negated)
+ strcat (buf, ",n %0,%1,.+12\n\tbl %3,0");
+ else
+ strcat (buf, ",n %0,%1,.+12\n\tbl %2,0");
+ }
+ /* Handle short backwards branch with an unfilled delay slot.
+ Using a bb;nop rather than extrs;bl saves 1 cycle for both
+ taken and untaken branches. */
+ else if (dbr_sequence_length () == 0
+ && ! forward_branch_p (insn)
+ && insn_addresses
+ && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
+ - insn_addresses[INSN_UID (insn)] - 8))
+ {
+ strcpy (buf, "bb,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, ">=");
+ else
+ strcat (buf, "<");
+ if (negated)
+ strcat (buf, " %0,%1,%3%#");
+ else
+ strcat (buf, " %0,%1,%2%#");
+ }
+ else
+ {
+ strcpy (buf, "extrs,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, "<");
+ else
+ strcat (buf, ">=");
+ if (nullify && negated)
+ strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
+ else if (nullify && ! negated)
+ strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
+ else if (negated)
+ strcat (buf, " %0,%1,1,0\n\tbl %3,0");
+ else
+ strcat (buf, " %0,%1,1,0\n\tbl %2,0");
+ }
+ break;
+
+ default:
+ abort();
+ }
+ return buf;
+}
+
+/* This routine handles all the branch-on-variable-bit conditional branch
+ sequences we might need to generate. It handles nullification of delay
+ slots, varying length branches, negated branches and all combinations
+ of the above. it returns the appropriate output template to emit the
+ branch. */
+
+char *
+output_bvb (operands, nullify, length, negated, insn, which)
+ rtx *operands ATTRIBUTE_UNUSED;
+ int nullify, length, negated;
+ rtx insn;
+ int which;
+{
+ static char buf[100];
+ int useskip = 0;
+
+ /* A conditional branch to the following instruction (eg the delay slot) is
+ asking for a disaster. I do not think this can happen as this pattern
+ is only used when optimizing; jump optimization should eliminate the
+ jump. But be prepared just in case. */
+
+ if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+ return "";
+
+ /* If this is a long branch with its delay slot unfilled, set `nullify'
+ as it can nullify the delay slot and save a nop. */
+ if (length == 8 && dbr_sequence_length () == 0)
+ nullify = 1;
+
+ /* If this is a short forward conditional branch which did not get
+ its delay slot filled, the delay slot can still be nullified. */
+ if (! nullify && length == 4 && dbr_sequence_length () == 0)
+ nullify = forward_branch_p (insn);
+
+ /* A forward branch over a single nullified insn can be done with a
+ extrs instruction. This avoids a single cycle penalty due to
+ mis-predicted branch if we fall through (branch not taken). */
+
+ if (length == 4
+ && next_real_insn (insn) != 0
+ && get_attr_length (next_real_insn (insn)) == 4
+ && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
+ && nullify)
+ useskip = 1;
+
+ switch (length)
+ {
+
+ /* All short conditional branches except backwards with an unfilled
+ delay slot. */
+ case 4:
+ if (useskip)
+ strcpy (buf, "vextrs,");
+ else
+ strcpy (buf, "bvb,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, ">=");
+ else
+ strcat (buf, "<");
+ if (useskip)
+ strcat (buf, " %0,1,0");
+ else if (nullify && negated)
+ strcat (buf, ",n %0,%3");
+ else if (nullify && ! negated)
+ strcat (buf, ",n %0,%2");
+ else if (! nullify && negated)
+ strcat (buf, "%0,%3");
+ else if (! nullify && ! negated)
+ strcat (buf, " %0,%2");
+ break;
+
+ /* All long conditionals. Note an short backward branch with an
+ unfilled delay slot is treated just like a long backward branch
+ with an unfilled delay slot. */
+ case 8:
+ /* Handle weird backwards branch with a filled delay slot
+ with is nullified. */
+ if (dbr_sequence_length () != 0
+ && ! forward_branch_p (insn)
+ && nullify)
+ {
+ strcpy (buf, "bvb,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, "<");
+ else
+ strcat (buf, ">=");
+ if (negated)
+ strcat (buf, ",n %0,.+12\n\tbl %3,0");
+ else
+ strcat (buf, ",n %0,.+12\n\tbl %2,0");
+ }
+ /* Handle short backwards branch with an unfilled delay slot.
+ Using a bb;nop rather than extrs;bl saves 1 cycle for both
+ taken and untaken branches. */
+ else if (dbr_sequence_length () == 0
+ && ! forward_branch_p (insn)
+ && insn_addresses
+ && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
+ - insn_addresses[INSN_UID (insn)] - 8))
+ {
+ strcpy (buf, "bvb,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, ">=");
+ else
+ strcat (buf, "<");
+ if (negated)
+ strcat (buf, " %0,%3%#");
+ else
+ strcat (buf, " %0,%2%#");
+ }
+ else
+ {
+ strcpy (buf, "vextrs,");
+ if ((which == 0 && negated)
+ || (which == 1 && ! negated))
+ strcat (buf, "<");
+ else
+ strcat (buf, ">=");
+ if (nullify && negated)
+ strcat (buf, " %0,1,0\n\tbl,n %3,0");
+ else if (nullify && ! negated)
+ strcat (buf, " %0,1,0\n\tbl,n %2,0");
+ else if (negated)
+ strcat (buf, " %0,1,0\n\tbl %3,0");
+ else
+ strcat (buf, " %0,1,0\n\tbl %2,0");
+ }
+ break;
+
+ default:
+ abort();
+ }
+ return buf;
+}
+
+/* Return the output template for emitting a dbra type insn.
+
+ Note it may perform some output operations on its own before
+ returning the final output string. */
+char *
+output_dbra (operands, insn, which_alternative)
+ rtx *operands;
+ rtx insn;
+ int which_alternative;
+{
+
+ /* A conditional branch to the following instruction (eg the delay slot) is
+ asking for a disaster. Be prepared! */
+
+ if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+ {
+ if (which_alternative == 0)
+ return "ldo %1(%0),%0";
+ else if (which_alternative == 1)
+ {
+ output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
+ output_asm_insn ("ldw -16(0,%%r30),%4",operands);
+ output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
+ return "fldws -16(0,%%r30),%0";
+ }
+ else
+ {
+ output_asm_insn ("ldw %0,%4", operands);
+ return "ldo %1(%4),%4\n\tstw %4,%0";
+ }
+ }
+
+ if (which_alternative == 0)
+ {
+ int nullify = INSN_ANNULLED_BRANCH_P (insn);
+ int length = get_attr_length (insn);
+
+ /* If this is a long branch with its delay slot unfilled, set `nullify'
+ as it can nullify the delay slot and save a nop. */
+ if (length == 8 && dbr_sequence_length () == 0)
+ nullify = 1;
+
+ /* If this is a short forward conditional branch which did not get
+ its delay slot filled, the delay slot can still be nullified. */
+ if (! nullify && length == 4 && dbr_sequence_length () == 0)
+ nullify = forward_branch_p (insn);
+
+ /* Handle short versions first. */
+ if (length == 4 && nullify)
+ return "addib,%C2,n %1,%0,%3";
+ else if (length == 4 && ! nullify)
+ return "addib,%C2 %1,%0,%3";
+ else if (length == 8)
+ {
+ /* Handle weird backwards branch with a fulled delay slot
+ which is nullified. */
+ if (dbr_sequence_length () != 0
+ && ! forward_branch_p (insn)
+ && nullify)
+ return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
+ /* Handle short backwards branch with an unfilled delay slot.
+ Using a addb;nop rather than addi;bl saves 1 cycle for both
+ taken and untaken branches. */
+ else if (dbr_sequence_length () == 0
+ && ! forward_branch_p (insn)
+ && insn_addresses
+ && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
+ - insn_addresses[INSN_UID (insn)] - 8))
+ return "addib,%C2 %1,%0,%3%#";
+
+ /* Handle normal cases. */
+ if (nullify)
+ return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
+ else
+ return "addi,%N2 %1,%0,%0\n\tbl %3,0";
+ }
+ else
+ abort();
+ }
+ /* Deal with gross reload from FP register case. */
+ else if (which_alternative == 1)
+ {
+ /* Move loop counter from FP register to MEM then into a GR,
+ increment the GR, store the GR into MEM, and finally reload
+ the FP register from MEM from within the branch's delay slot. */
+ output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
+ output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
+ if (get_attr_length (insn) == 24)
+ return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
+ else
+ return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
+ }
+ /* Deal with gross reload from memory case. */
+ else
+ {
+ /* Reload loop counter from memory, the store back to memory
+ happens in the branch's delay slot. */
+ output_asm_insn ("ldw %0,%4", operands);
+ if (get_attr_length (insn) == 12)
+ return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
+ else
+ return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
+ }
+}
+
+/* Return the output template for emitting a dbra type insn.
+
+ Note it may perform some output operations on its own before
+ returning the final output string. */
+char *
+output_movb (operands, insn, which_alternative, reverse_comparison)
+ rtx *operands;
+ rtx insn;
+ int which_alternative;
+ int reverse_comparison;
+{
+
+ /* A conditional branch to the following instruction (eg the delay slot) is
+ asking for a disaster. Be prepared! */
+
+ if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+ {
+ if (which_alternative == 0)
+ return "copy %1,%0";
+ else if (which_alternative == 1)
+ {
+ output_asm_insn ("stw %1,-16(0,%%r30)",operands);
+ return "fldws -16(0,%%r30),%0";
+ }
+ else if (which_alternative == 2)
+ return "stw %1,%0";
+ else
+ return "mtsar %r1";
+ }
+
+ /* Support the second variant. */
+ if (reverse_comparison)
+ PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
+
+ if (which_alternative == 0)
+ {
+ int nullify = INSN_ANNULLED_BRANCH_P (insn);
+ int length = get_attr_length (insn);
+
+ /* If this is a long branch with its delay slot unfilled, set `nullify'
+ as it can nullify the delay slot and save a nop. */
+ if (length == 8 && dbr_sequence_length () == 0)
+ nullify = 1;
+
+ /* If this is a short forward conditional branch which did not get
+ its delay slot filled, the delay slot can still be nullified. */
+ if (! nullify && length == 4 && dbr_sequence_length () == 0)
+ nullify = forward_branch_p (insn);
+
+ /* Handle short versions first. */
+ if (length == 4 && nullify)
+ return "movb,%C2,n %1,%0,%3";
+ else if (length == 4 && ! nullify)
+ return "movb,%C2 %1,%0,%3";
+ else if (length == 8)
+ {
+ /* Handle weird backwards branch with a filled delay slot
+ which is nullified. */
+ if (dbr_sequence_length () != 0
+ && ! forward_branch_p (insn)
+ && nullify)
+ return "movb,%N2,n %1,%0,.+12\n\tbl %3,0";
+
+ /* Handle short backwards branch with an unfilled delay slot.
+ Using a movb;nop rather than or;bl saves 1 cycle for both
+ taken and untaken branches. */
+ else if (dbr_sequence_length () == 0
+ && ! forward_branch_p (insn)
+ && insn_addresses
+ && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))]
+ - insn_addresses[INSN_UID (insn)] - 8))
+ return "movb,%C2 %1,%0,%3%#";
+ /* Handle normal cases. */
+ if (nullify)
+ return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
+ else
+ return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
+ }
+ else
+ abort();
+ }
+ /* Deal with gross reload from FP register case. */
+ else if (which_alternative == 1)
+ {
+ /* Move loop counter from FP register to MEM then into a GR,
+ increment the GR, store the GR into MEM, and finally reload
+ the FP register from MEM from within the branch's delay slot. */
+ output_asm_insn ("stw %1,-16(0,%%r30)",operands);
+ if (get_attr_length (insn) == 12)
+ return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
+ else
+ return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
+ }
+ /* Deal with gross reload from memory case. */
+ else if (which_alternative == 2)
+ {
+ /* Reload loop counter from memory, the store back to memory
+ happens in the branch's delay slot. */
+ if (get_attr_length (insn) == 8)
+ return "comb,%S2 0,%1,%3\n\tstw %1,%0";
+ else
+ return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
+ }
+ /* Handle SAR as a destination. */
+ else
+ {
+ if (get_attr_length (insn) == 8)
+ return "comb,%S2 0,%1,%3\n\tmtsar %r1";
+ else
+ return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1";
+ }
+}
+
+
+/* INSN is a millicode call. It may have an unconditional jump in its delay
+ slot.
+
+ CALL_DEST is the routine we are calling. */
+
+char *
+output_millicode_call (insn, call_dest)
+ rtx insn;
+ rtx call_dest;
+{
+ int distance;
+ rtx xoperands[4];
+ rtx seq_insn;
+
+ /* Handle common case -- empty delay slot or no jump in the delay slot,
+ and we're sure that the branch will reach the beginning of the $CODE$
+ subspace. */
+ if ((dbr_sequence_length () == 0
+ && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28))
+ || (dbr_sequence_length () != 0
+ && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
+ && get_attr_length (insn) == 4))
+ {
+ xoperands[0] = call_dest;
+ output_asm_insn ("bl %0,%%r31%#", xoperands);
+ return "";
+ }
+
+ /* This call may not reach the beginning of the $CODE$ subspace. */
+ if (get_attr_length (insn) > 4)
+ {
+ int delay_insn_deleted = 0;
+ rtx xoperands[2];
+
+ /* We need to emit an inline long-call branch. */
+ if (dbr_sequence_length () != 0
+ && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
+ {
+ /* A non-jump insn in the delay slot. By definition we can
+ emit this insn before the call. */
+ final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
+
+ /* Now delete the delay insn. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ delay_insn_deleted = 1;
+ }
+
+ /* If we're allowed to use be/ble instructions, then this is the
+ best sequence to use for a long millicode call. */
+ if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
+ || ! (flag_pic || TARGET_PORTABLE_RUNTIME))
+ {
+ xoperands[0] = call_dest;
+ output_asm_insn ("ldil L%%%0,%%r31", xoperands);
+ output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands);
+ output_asm_insn ("nop", xoperands);
+ }
+ /* Pure portable runtime doesn't allow be/ble; we also don't have
+ PIC support int he assembler/linker, so this sequence is needed. */
+ else if (TARGET_PORTABLE_RUNTIME)
+ {
+ xoperands[0] = call_dest;
+ /* Get the address of our target into %r29. */
+ output_asm_insn ("ldil L%%%0,%%r29", xoperands);
+ output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
+
+ /* Get our return address into %r31. */
+ output_asm_insn ("blr 0,%%r31", xoperands);
+
+ /* Jump to our target address in %r29. */
+ output_asm_insn ("bv,n 0(%%r29)", xoperands);
+
+ /* Empty delay slot. Note this insn gets fetched twice and
+ executed once. To be safe we use a nop. */
+ output_asm_insn ("nop", xoperands);
+ return "";
+ }
+ /* PIC long millicode call sequence. */
+ else
+ {
+ xoperands[0] = call_dest;
+ xoperands[1] = gen_label_rtx ();
+ /* Get our address + 8 into %r1. */
+ output_asm_insn ("bl .+8,%%r1", xoperands);
+
+ /* Add %r1 to the offset of our target from the next insn. */
+ output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (xoperands[1]));
+ output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
+
+ /* Get the return address into %r31. */
+ output_asm_insn ("blr 0,%%r31", xoperands);
+
+ /* Branch to our target which is in %r1. */
+ output_asm_insn ("bv,n 0(%%r1)", xoperands);
+
+ /* Empty delay slot. Note this insn gets fetched twice and
+ executed once. To be safe we use a nop. */
+ output_asm_insn ("nop", xoperands);
+ }
+
+ /* If we had a jump in the call's delay slot, output it now. */
+ if (dbr_sequence_length () != 0
+ && !delay_insn_deleted)
+ {
+ xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+ output_asm_insn ("b,n %0", xoperands);
+
+ /* Now delete the delay insn. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ }
+ return "";
+ }
+
+ /* This call has an unconditional jump in its delay slot and the
+ call is known to reach its target or the beginning of the current
+ subspace. */
+
+ /* Use the containing sequence insn's address. */
+ seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+
+ distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
+ - insn_addresses[INSN_UID (seq_insn)] - 8;
+
+ /* If the branch was too far away, emit a normal call followed
+ by a nop, followed by the unconditional branch.
+
+ If the branch is close, then adjust %r2 from within the
+ call's delay slot. */
+
+ xoperands[0] = call_dest;
+ xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+ if (! VAL_14_BITS_P (distance))
+ output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands);
+ else
+ {
+ xoperands[3] = gen_label_rtx ();
+ output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (xoperands[3]));
+ }
+
+ /* Delete the jump. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ return "";
+}
+
+extern struct obstack permanent_obstack;
+extern struct obstack *saveable_obstack;
+extern struct obstack *rtl_obstack;
+extern struct obstack *current_obstack;
+
+/* INSN is either a function call. It may have an unconditional jump
+ in its delay slot.
+
+ CALL_DEST is the routine we are calling. */
+
+char *
+output_call (insn, call_dest)
+ rtx insn;
+ rtx call_dest;
+{
+ int distance;
+ rtx xoperands[4];
+ rtx seq_insn;
+
+ /* Handle common case -- empty delay slot or no jump in the delay slot,
+ and we're sure that the branch will reach the beginning of the $CODE$
+ subspace. */
+ if ((dbr_sequence_length () == 0
+ && get_attr_length (insn) == 8)
+ || (dbr_sequence_length () != 0
+ && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
+ && get_attr_length (insn) == 4))
+ {
+ xoperands[0] = call_dest;
+ output_asm_insn ("bl %0,%%r2%#", xoperands);
+ return "";
+ }
+
+ /* This call may not reach the beginning of the $CODE$ subspace. */
+ if (get_attr_length (insn) > 8)
+ {
+ int delay_insn_deleted = 0;
+ rtx xoperands[2];
+ rtx link;
+
+ /* We need to emit an inline long-call branch. Furthermore,
+ because we're changing a named function call into an indirect
+ function call well after the parameters have been set up, we
+ need to make sure any FP args appear in both the integer
+ and FP registers. Also, we need move any delay slot insn
+ out of the delay slot. And finally, we can't rely on the linker
+ being able to fix the call to $$dyncall! -- Yuk!. */
+ if (dbr_sequence_length () != 0
+ && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
+ {
+ /* A non-jump insn in the delay slot. By definition we can
+ emit this insn before the call (and in fact before argument
+ relocating. */
+ final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
+
+ /* Now delete the delay insn. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ delay_insn_deleted = 1;
+ }
+
+ /* Now copy any FP arguments into integer registers. */
+ for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+ {
+ int arg_mode, regno;
+ rtx use = XEXP (link, 0);
+ if (! (GET_CODE (use) == USE
+ && GET_CODE (XEXP (use, 0)) == REG
+ && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+ continue;
+
+ arg_mode = GET_MODE (XEXP (use, 0));
+ regno = REGNO (XEXP (use, 0));
+ /* Is it a floating point register? */
+ if (regno >= 32 && regno <= 39)
+ {
+ /* Copy from the FP register into an integer register
+ (via memory). */
+ if (arg_mode == SFmode)
+ {
+ xoperands[0] = XEXP (use, 0);
+ xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
+ output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
+ output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+ }
+ else
+ {
+ xoperands[0] = XEXP (use, 0);
+ xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
+ output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
+ output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
+ output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+ }
+ }
+ }
+
+ /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since
+ we don't have any direct calls in that case. */
+ {
+ int i;
+ char *name = XSTR (call_dest, 0);
+
+ /* See if we have already put this function on the list
+ of deferred plabels. This list is generally small,
+ so a liner search is not too ugly. If it proves too
+ slow replace it with something faster. */
+ for (i = 0; i < n_deferred_plabels; i++)
+ if (strcmp (name, deferred_plabels[i].name) == 0)
+ break;
+
+ /* If the deferred plabel list is empty, or this entry was
+ not found on the list, create a new entry on the list. */
+ if (deferred_plabels == NULL || i == n_deferred_plabels)
+ {
+ struct obstack *ambient_obstack = current_obstack;
+ struct obstack *ambient_rtl_obstack = rtl_obstack;
+ char *real_name;
+
+ /* Any RTL we create here needs to live until the end of
+ the compilation unit and therefore must live on the
+ permanent obstack. */
+ current_obstack = &permanent_obstack;
+ rtl_obstack = &permanent_obstack;
+
+ if (deferred_plabels == 0)
+ deferred_plabels = (struct deferred_plabel *)
+ xmalloc (1 * sizeof (struct deferred_plabel));
+ else
+ deferred_plabels = (struct deferred_plabel *)
+ xrealloc (deferred_plabels,
+ ((n_deferred_plabels + 1)
+ * sizeof (struct deferred_plabel)));
+
+ i = n_deferred_plabels++;
+ deferred_plabels[i].internal_label = gen_label_rtx ();
+ deferred_plabels[i].name = obstack_alloc (&permanent_obstack,
+ strlen (name) + 1);
+ strcpy (deferred_plabels[i].name, name);
+
+ /* Switch back to normal obstack allocation. */
+ current_obstack = ambient_obstack;
+ rtl_obstack = ambient_rtl_obstack;
+
+ /* Gross. We have just implicitly taken the address of this
+ function, mark it as such. */
+ STRIP_NAME_ENCODING (real_name, name);
+ TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1;
+ }
+
+ /* We have to load the address of the function using a procedure
+ label (plabel). Inline plabels can lose for PIC and other
+ cases, so avoid them by creating a 32bit plabel in the data
+ segment. */
+ if (flag_pic)
+ {
+ xoperands[0] = deferred_plabels[i].internal_label;
+ xoperands[1] = gen_label_rtx ();
+
+ output_asm_insn ("addil LT%%%0,%%r19", xoperands);
+ output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands);
+ output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands);
+
+ /* Get our address + 8 into %r1. */
+ output_asm_insn ("bl .+8,%%r1", xoperands);
+
+ /* Add %r1 to the offset of dyncall from the next insn. */
+ output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (xoperands[1]));
+ output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands);
+
+ /* Get the return address into %r31. */
+ output_asm_insn ("blr 0,%%r31", xoperands);
+
+ /* Branch to our target which is in %r1. */
+ output_asm_insn ("bv 0(%%r1)", xoperands);
+
+ /* Copy the return address into %r2 also. */
+ output_asm_insn ("copy %%r31,%%r2", xoperands);
+ }
+ else
+ {
+ xoperands[0] = deferred_plabels[i].internal_label;
+
+ /* Get the address of our target into %r22. */
+ output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands);
+ output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands);
+
+ /* Get the high part of the address of $dyncall into %r2, then
+ add in the low part in the branch instruction. */
+ output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands);
+ output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands);
+
+ /* Copy the return pointer into both %r31 and %r2. */
+ output_asm_insn ("copy %%r31,%%r2", xoperands);
+ }
+ }
+
+ /* If we had a jump in the call's delay slot, output it now. */
+ if (dbr_sequence_length () != 0
+ && !delay_insn_deleted)
+ {
+ xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+ output_asm_insn ("b,n %0", xoperands);
+
+ /* Now delete the delay insn. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ }
+ return "";
+ }
+
+ /* This call has an unconditional jump in its delay slot and the
+ call is known to reach its target or the beginning of the current
+ subspace. */
+
+ /* Use the containing sequence insn's address. */
+ seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+
+ distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
+ - insn_addresses[INSN_UID (seq_insn)] - 8;
+
+ /* If the branch was too far away, emit a normal call followed
+ by a nop, followed by the unconditional branch.
+
+ If the branch is close, then adjust %r2 from within the
+ call's delay slot. */
+
+ xoperands[0] = call_dest;
+ xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+ if (! VAL_14_BITS_P (distance))
+ output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
+ else
+ {
+ xoperands[3] = gen_label_rtx ();
+ output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (xoperands[3]));
+ }
+
+ /* Delete the jump. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ return "";
+}
+
+/* In HPUX 8.0's shared library scheme, special relocations are needed
+ for function labels if they might be passed to a function
+ in a shared library (because shared libraries don't live in code
+ space), and special magic is needed to construct their address.
+
+ For reasons too disgusting to describe storage for the new name
+ is allocated either on the saveable_obstack (released at function
+ exit) or on the permanent_obstack for things that can never change
+ (libcall names for example). */
+
+void
+hppa_encode_label (sym, permanent)
+ rtx sym;
+ int permanent;
+{
+ char *str = XSTR (sym, 0);
+ int len = strlen (str);
+ char *newstr;
+
+ newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack),
+ len + 2);
+
+ if (str[0] == '*')
+ *newstr++ = *str++;
+ strcpy (newstr + 1, str);
+ *newstr = '@';
+ XSTR (sym,0) = newstr;
+}
+
+int
+function_label_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
+}
+
+/* Returns 1 if OP is a function label involved in a simple addition
+ with a constant. Used to keep certain patterns from matching
+ during instruction combination. */
+int
+is_function_label_plus_const (op)
+ rtx op;
+{
+ /* Strip off any CONST. */
+ if (GET_CODE (op) == CONST)
+ op = XEXP (op, 0);
+
+ return (GET_CODE (op) == PLUS
+ && function_label_operand (XEXP (op, 0), Pmode)
+ && GET_CODE (XEXP (op, 1)) == CONST_INT);
+}
+
+/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
+ use in fmpyadd instructions. */
+int
+fmpyaddoperands (operands)
+ rtx *operands;
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ /* Must be a floating point mode. */
+ if (mode != SFmode && mode != DFmode)
+ return 0;
+
+ /* All modes must be the same. */
+ if (! (mode == GET_MODE (operands[1])
+ && mode == GET_MODE (operands[2])
+ && mode == GET_MODE (operands[3])
+ && mode == GET_MODE (operands[4])
+ && mode == GET_MODE (operands[5])))
+ return 0;
+
+ /* All operands must be registers. */
+ if (! (GET_CODE (operands[1]) == REG
+ && GET_CODE (operands[2]) == REG
+ && GET_CODE (operands[3]) == REG
+ && GET_CODE (operands[4]) == REG
+ && GET_CODE (operands[5]) == REG))
+ return 0;
+
+ /* Only 2 real operands to the addition. One of the input operands must
+ be the same as the output operand. */
+ if (! rtx_equal_p (operands[3], operands[4])
+ && ! rtx_equal_p (operands[3], operands[5]))
+ return 0;
+
+ /* Inout operand of add can not conflict with any operands from multiply. */
+ if (rtx_equal_p (operands[3], operands[0])
+ || rtx_equal_p (operands[3], operands[1])
+ || rtx_equal_p (operands[3], operands[2]))
+ return 0;
+
+ /* multiply can not feed into addition operands. */
+ if (rtx_equal_p (operands[4], operands[0])
+ || rtx_equal_p (operands[5], operands[0]))
+ return 0;
+
+ /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
+ if (mode == SFmode
+ && (REGNO (operands[0]) < 57
+ || REGNO (operands[1]) < 57
+ || REGNO (operands[2]) < 57
+ || REGNO (operands[3]) < 57
+ || REGNO (operands[4]) < 57
+ || REGNO (operands[5]) < 57))
+ return 0;
+
+ /* Passed. Operands are suitable for fmpyadd. */
+ return 1;
+}
+
+/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
+ use in fmpysub instructions. */
+int
+fmpysuboperands (operands)
+ rtx *operands;
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ /* Must be a floating point mode. */
+ if (mode != SFmode && mode != DFmode)
+ return 0;
+
+ /* All modes must be the same. */
+ if (! (mode == GET_MODE (operands[1])
+ && mode == GET_MODE (operands[2])
+ && mode == GET_MODE (operands[3])
+ && mode == GET_MODE (operands[4])
+ && mode == GET_MODE (operands[5])))
+ return 0;
+
+ /* All operands must be registers. */
+ if (! (GET_CODE (operands[1]) == REG
+ && GET_CODE (operands[2]) == REG
+ && GET_CODE (operands[3]) == REG
+ && GET_CODE (operands[4]) == REG
+ && GET_CODE (operands[5]) == REG))
+ return 0;
+
+ /* Only 2 real operands to the subtraction. Subtraction is not a commutative
+ operation, so operands[4] must be the same as operand[3]. */
+ if (! rtx_equal_p (operands[3], operands[4]))
+ return 0;
+
+ /* multiply can not feed into subtraction. */
+ if (rtx_equal_p (operands[5], operands[0]))
+ return 0;
+
+ /* Inout operand of sub can not conflict with any operands from multiply. */
+ if (rtx_equal_p (operands[3], operands[0])
+ || rtx_equal_p (operands[3], operands[1])
+ || rtx_equal_p (operands[3], operands[2]))
+ return 0;
+
+ /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
+ if (mode == SFmode
+ && (REGNO (operands[0]) < 57
+ || REGNO (operands[1]) < 57
+ || REGNO (operands[2]) < 57
+ || REGNO (operands[3]) < 57
+ || REGNO (operands[4]) < 57
+ || REGNO (operands[5]) < 57))
+ return 0;
+
+ /* Passed. Operands are suitable for fmpysub. */
+ return 1;
+}
+
+int
+plus_xor_ior_operator (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
+ || GET_CODE (op) == IOR);
+}
+
+/* Return 1 if the given constant is 2, 4, or 8. These are the valid
+ constants for shadd instructions. */
+static int
+shadd_constant_p (val)
+ int val;
+{
+ if (val == 2 || val == 4 || val == 8)
+ return 1;
+ else
+ return 0;
+}
+
+/* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
+ the valid constant for shadd instructions. */
+int
+shadd_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
+}
+
+/* Return 1 if OP is valid as a base register in a reg + reg address. */
+
+int
+basereg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ /* cse will create some unscaled indexed addresses, however; it
+ generally isn't a win on the PA, so avoid creating unscaled
+ indexed addresses until after cse is finished. */
+ if (!cse_not_expected)
+ return 0;
+
+ /* Once reload has started everything is considered valid. Reload should
+ only create indexed addresses using the stack/frame pointer, and any
+ others were checked for validity when created by the combine pass.
+
+ Also allow any register when TARGET_NO_SPACE_REGS is in effect since
+ we don't have to worry about the braindamaged implicit space register
+ selection using the basereg only (rather than effective address)
+ screwing us over. */
+ if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
+ return (GET_CODE (op) == REG);
+
+ /* Stack is always OK for indexing. */
+ if (op == stack_pointer_rtx)
+ return 1;
+
+ /* While it's always safe to index off the frame pointer, it's not
+ always profitable, particularly when the frame pointer is being
+ eliminated. */
+ if (! flag_omit_frame_pointer && op == frame_pointer_rtx)
+ return 1;
+
+ /* The only other valid OPs are pseudo registers with
+ REGNO_POINTER_FLAG set. */
+ if (GET_CODE (op) != REG
+ || REGNO (op) < FIRST_PSEUDO_REGISTER
+ || ! register_operand (op, mode))
+ return 0;
+
+ return REGNO_POINTER_FLAG (REGNO (op));
+}
+
+/* Return 1 if this operand is anything other than a hard register. */
+
+int
+non_hard_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
+}
+
+/* Return 1 if INSN branches forward. Should be using insn_addresses
+ to avoid walking through all the insns... */
+static int
+forward_branch_p (insn)
+ rtx insn;
+{
+ rtx label = JUMP_LABEL (insn);
+
+ while (insn)
+ {
+ if (insn == label)
+ break;
+ else
+ insn = NEXT_INSN (insn);
+ }
+
+ return (insn == label);
+}
+
+/* Return 1 if OP is an equality comparison, else return 0. */
+int
+eq_neq_comparison_operator (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
+}
+
+/* Return 1 if OP is an operator suitable for use in a movb instruction. */
+int
+movb_comparison_operator (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == EQ || GET_CODE (op) == NE
+ || GET_CODE (op) == LT || GET_CODE (op) == GE);
+}
+
+/* Return 1 if INSN is in the delay slot of a call instruction. */
+int
+jump_in_call_delay (insn)
+ rtx insn;
+{
+
+ if (GET_CODE (insn) != JUMP_INSN)
+ return 0;
+
+ if (PREV_INSN (insn)
+ && PREV_INSN (PREV_INSN (insn))
+ && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
+ {
+ rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
+
+ return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
+ && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
+
+ }
+ else
+ return 0;
+}
+
+/* Output an unconditional move and branch insn. */
+
+char *
+output_parallel_movb (operands, length)
+ rtx *operands;
+ int length;
+{
+ /* These are the cases in which we win. */
+ if (length == 4)
+ return "mov%I1b,tr %1,%0,%2";
+
+ /* None of these cases wins, but they don't lose either. */
+ if (dbr_sequence_length () == 0)
+ {
+ /* Nothing in the delay slot, fake it by putting the combined
+ insn (the copy or add) in the delay slot of a bl. */
+ if (GET_CODE (operands[1]) == CONST_INT)
+ return "bl %2,0\n\tldi %1,%0";
+ else
+ return "bl %2,0\n\tcopy %1,%0";
+ }
+ else
+ {
+ /* Something in the delay slot, but we've got a long branch. */
+ if (GET_CODE (operands[1]) == CONST_INT)
+ return "ldi %1,%0\n\tbl %2,0";
+ else
+ return "copy %1,%0\n\tbl %2,0";
+ }
+}
+
+/* Output an unconditional add and branch insn. */
+
+char *
+output_parallel_addb (operands, length)
+ rtx *operands;
+ int length;
+{
+ /* To make life easy we want operand0 to be the shared input/output
+ operand and operand1 to be the readonly operand. */
+ if (operands[0] == operands[1])
+ operands[1] = operands[2];
+
+ /* These are the cases in which we win. */
+ if (length == 4)
+ return "add%I1b,tr %1,%0,%3";
+
+ /* None of these cases win, but they don't lose either. */
+ if (dbr_sequence_length () == 0)
+ {
+ /* Nothing in the delay slot, fake it by putting the combined
+ insn (the copy or add) in the delay slot of a bl. */
+ return "bl %3,0\n\tadd%I1 %1,%0,%0";
+ }
+ else
+ {
+ /* Something in the delay slot, but we've got a long branch. */
+ return "add%I1 %1,%0,%0\n\tbl %3,0";
+ }
+}
+
+/* Return nonzero if INSN (a jump insn) immediately follows a call to
+ a named function. This is used to discourage creating parallel movb/addb
+ insns since a jump which immediately follows a call can execute in the
+ delay slot of the call.
+
+ It is also used to avoid filling the delay slot of a jump which
+ immediately follows a call since the jump can usually be eliminated
+ completely by modifying RP in the delay slot of the call. */
+
+int
+following_call (insn)
+ rtx insn;
+{
+ /* CYGNUS LOCAL PA8000/law */
+ /* We do not parallel movb,addb or place jumps into call delay slots when
+ optimizing for the PA8000. */
+ if (pa_cpu != PROCESSOR_8000)
+ return 0;
+ /* END CYGNUS LOCAL */
+
+ /* Find the previous real insn, skipping NOTEs. */
+ insn = PREV_INSN (insn);
+ while (insn && GET_CODE (insn) == NOTE)
+ insn = PREV_INSN (insn);
+
+ /* Check for CALL_INSNs and millicode calls. */
+ if (insn
+ && ((GET_CODE (insn) == CALL_INSN
+ && get_attr_type (insn) != TYPE_DYNCALL)
+ || (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) != SEQUENCE
+ && GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER
+ && get_attr_type (insn) == TYPE_MILLI)))
+ return 1;
+
+ return 0;
+}
+
+/* Restore any INSN_CODEs for insns with unscaled indexed addresses since
+ the INSN_CODE might be clobberd by rerecognition triggered by reorg. */
+
+static void
+restore_unscaled_index_insn_codes (insns)
+ rtx insns;
+{
+ rtx insn;
+
+ for (insn = insns; insn; insn = NEXT_INSN (insn))
+ {
+ if (INSN_UID (insn) < max_unscaled_index_insn_codes_uid
+ && unscaled_index_insn_codes[INSN_UID (insn)] != -1)
+ INSN_CODE (insn) = unscaled_index_insn_codes[INSN_UID (insn)];
+ }
+}
+
+/* Severe braindamage:
+
+ On the PA, address computations within MEM expressions are not
+ commutative because of the implicit space register selection
+ from the base register (instead of the entire effective address).
+
+ Because of this mis-feature we have to know which register in a reg+reg
+ address is the base and which is the index.
+
+ Before reload, the base can be identified by REGNO_POINTER_FLAG. We use
+ this to force base + index addresses to match a different insn than
+ index + base addresses.
+
+ We assume that no pass during or after reload creates new unscaled indexed
+ addresses, so any unscaled indexed address we find after reload must have
+ at one time been recognized a base + index or index + base and we accept
+ any register as a base register.
+
+ This scheme assumes that no pass during/after reload will rerecognize an
+ insn with an unscaled indexed address. This failed due to a reorg call
+ to rerecognize certain insns.
+
+ So, we record if an insn uses an unscaled indexed address and which
+ register is the base (via recording of the INSN_CODE for such insns).
+
+ Just before we output code for the function, we make sure all the insns
+ using unscaled indexed addresses have the same INSN_CODE as they did
+ immediately before delay slot scheduling.
+
+ This is extremely gross. Long term, I'd like to be able to look at
+ REG_POINTER_FLAG to handle these kinds of problems. */
+
+static void
+record_unscaled_index_insn_codes (insns)
+ rtx insns;
+{
+ rtx insn;
+
+ max_unscaled_index_insn_codes_uid = get_max_uid ();
+ unscaled_index_insn_codes
+ = (int *)xmalloc (max_unscaled_index_insn_codes_uid * sizeof (int));
+ memset (unscaled_index_insn_codes, -1,
+ max_unscaled_index_insn_codes_uid * sizeof (int));
+
+ for (insn = insns; insn; insn = NEXT_INSN (insn))
+ {
+ rtx set = single_set (insn);
+ rtx mem = NULL_RTX;
+
+ /* Ignore anything that isn't a normal SET. */
+ if (set == NULL_RTX)
+ continue;
+
+ /* No insns can have more than one MEM. */
+ if (GET_CODE (SET_SRC (set)) == MEM)
+ mem = SET_SRC (set);
+
+ if (GET_CODE (SET_DEST (set)) == MEM)
+ mem = SET_DEST (set);
+
+ /* If neither operand is a mem, then there's nothing to do. */
+ if (mem == NULL_RTX)
+ continue;
+
+ if (GET_CODE (XEXP (mem, 0)) != PLUS)
+ continue;
+
+ /* If both are REGs (or SUBREGs), then record the insn code for
+ this insn. */
+ if (REG_P (XEXP (XEXP (mem, 0), 0)) && REG_P (XEXP (XEXP (mem, 0), 1)))
+ unscaled_index_insn_codes[INSN_UID (insn)] = INSN_CODE (insn);
+ }
+}
+
+/* We use this hook to perform a PA specific optimization which is difficult
+ to do in earlier passes.
+
+ We want the delay slots of branches within jump tables to be filled.
+ None of the compiler passes at the moment even has the notion that a
+ PA jump table doesn't contain addresses, but instead contains actual
+ instructions!
+
+ Because we actually jump into the table, the addresses of each entry
+ must stay constant in relation to the beginning of the table (which
+ itself must stay constant relative to the instruction to jump into
+ it). I don't believe we can guarantee earlier passes of the compiler
+ will adhere to those rules.
+
+ So, late in the compilation process we find all the jump tables, and
+ expand them into real code -- eg each entry in the jump table vector
+ will get an appropriate label followed by a jump to the final target.
+
+ Reorg and the final jump pass can then optimize these branches and
+ fill their delay slots. We end up with smaller, more efficient code.
+
+ The jump instructions within the table are special; we must be able
+ to identify them during assembly output (if the jumps don't get filled
+ we need to emit a nop rather than nullifying the delay slot)). We
+ identify jumps in switch tables by marking the SET with DImode.
+
+ We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
+ insns. This serves two purposes, first it prevents jump.c from
+ noticing that the last N entries in the table jump to the instruction
+ immediately after the table and deleting the jumps. Second, those
+ insns mark where we should emit .begin_brtab and .end_brtab directives
+ when using GAS (allows for better link time optimizations). */
+
+void
+pa_reorg (insns)
+ rtx insns;
+{
+ rtx insn;
+
+ /* Keep track of which insns have unscaled indexed addresses, and which
+ register is the base address in such insns. */
+ record_unscaled_index_insn_codes (insns);
+
+ remove_useless_addtr_insns (insns, 1);
+
+ /* CYGNUS LOCAL PA8000/law */
+ /* These optimizations hurt PA8000 performance. */
+ if (pa_cpu != PROCESSOR_8000)
+ pa_combine_instructions (get_insns ());
+ /* END CYGNUS LOCAL */
+
+ /* This is fairly cheap, so always run it if optimizing. */
+ if (optimize > 0 && !TARGET_BIG_SWITCH)
+ {
+ /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
+ insns = get_insns ();
+ for (insn = insns; insn; insn = NEXT_INSN (insn))
+ {
+ rtx pattern, tmp, location;
+ unsigned int length, i;
+
+ /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
+ if (GET_CODE (insn) != JUMP_INSN
+ || (GET_CODE (PATTERN (insn)) != ADDR_VEC
+ && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
+ continue;
+
+ /* Emit marker for the beginning of the branch table. */
+ emit_insn_before (gen_begin_brtab (), insn);
+
+ pattern = PATTERN (insn);
+ location = PREV_INSN (insn);
+ length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
+
+ for (i = 0; i < length; i++)
+ {
+ /* Emit a label before each jump to keep jump.c from
+ removing this code. */
+ tmp = gen_label_rtx ();
+ LABEL_NUSES (tmp) = 1;
+ emit_label_after (tmp, location);
+ location = NEXT_INSN (location);
+
+ if (GET_CODE (pattern) == ADDR_VEC)
+ {
+ /* Emit the jump itself. */
+ tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
+ tmp = emit_jump_insn_after (tmp, location);
+ JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
+ /* It is easy to rely on the branch table markers
+ during assembly output to trigger the correct code
+ for a switch table jump with an unfilled delay slot,
+
+ However, that requires state and assumes that we look
+ at insns in order.
+
+ We can't make such assumptions when computing the length
+ of instructions. Ugh. We could walk the insn chain to
+ determine if this instruction is in a branch table, but
+ that can get rather expensive, particularly during the
+ branch shortening phase of the compiler.
+
+ So instead we mark this jump as being special. This is
+ far from ideal and knows that no code after this will
+ muck around with the mode of the JUMP_INSN itself. */
+ PUT_MODE (tmp, SImode);
+ LABEL_NUSES (JUMP_LABEL (tmp))++;
+ location = NEXT_INSN (location);
+ }
+ else
+ {
+ /* Emit the jump itself. */
+ tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
+ tmp = emit_jump_insn_after (tmp, location);
+ JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
+ /* It is easy to rely on the branch table markers
+ during assembly output to trigger the correct code
+ for a switch table jump with an unfilled delay slot,
+
+ However, that requires state and assumes that we look
+ at insns in order.
+
+ We can't make such assumptions when computing the length
+ of instructions. Ugh. We could walk the insn chain to
+ determine if this instruction is in a branch table, but
+ that can get rather expensive, particularly during the
+ branch shortening phase of the compiler.
+
+ So instead we mark this jump as being special. This is
+ far from ideal and knows that no code after this will
+ muck around with the mode of the JUMP_INSN itself. */
+ PUT_MODE (tmp, SImode);
+ LABEL_NUSES (JUMP_LABEL (tmp))++;
+ location = NEXT_INSN (location);
+ }
+
+ /* Emit a BARRIER after the jump. */
+ emit_barrier_after (location);
+ location = NEXT_INSN (location);
+ }
+
+ /* Emit marker for the end of the branch table. */
+ emit_insn_before (gen_end_brtab (), location);
+ location = NEXT_INSN (location);
+ emit_barrier_after (location);
+
+ /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
+ delete_insn (insn);
+ }
+ }
+ else
+ {
+ /* Sill need an end_brtab insn. */
+ insns = get_insns ();
+ for (insn = insns; insn; insn = NEXT_INSN (insn))
+ {
+ /* Find an ADDR_VEC insn. */
+ if (GET_CODE (insn) != JUMP_INSN
+ || (GET_CODE (PATTERN (insn)) != ADDR_VEC
+ && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
+ continue;
+
+ /* Now generate markers for the beginning and end of the
+ branch table. */
+ emit_insn_before (gen_begin_brtab (), insn);
+ emit_insn_after (gen_end_brtab (), insn);
+ }
+ }
+}
+
+/* The PA has a number of odd instructions which can perform multiple
+ tasks at once. On first generation PA machines (PA1.0 and PA1.1)
+ it may be profitable to combine two instructions into one instruction
+ with two outputs. It's not profitable PA2.0 machines because the
+ two outputs would take two slots in the reorder buffers.
+
+ This routine finds instructions which can be combined and combines
+ them. We only support some of the potential combinations, and we
+ only try common ways to find suitable instructions.
+
+ * addb can add two registers or a register and a small integer
+ and jump to a nearby (+-8k) location. Normally the jump to the
+ nearby location is conditional on the result of the add, but by
+ using the "true" condition we can make the jump unconditional.
+ Thus addb can perform two independent operations in one insn.
+
+ * movb is similar to addb in that it can perform a reg->reg
+ or small immediate->reg copy and jump to a nearby (+-8k location).
+
+ * fmpyadd and fmpysub can perform a FP multiply and either an
+ FP add or FP sub if the operands of the multiply and add/sub are
+ independent (there are other minor restrictions). Note both
+ the fmpy and fadd/fsub can in theory move to better spots according
+ to data dependencies, but for now we require the fmpy stay at a
+ fixed location.
+
+ * Many of the memory operations can perform pre & post updates
+ of index registers. GCC's pre/post increment/decrement addressing
+ is far too simple to take advantage of all the possibilities. This
+ pass may not be suitable since those insns may not be independent.
+
+ * comclr can compare two ints or an int and a register, nullify
+ the following instruction and zero some other register. This
+ is more difficult to use as it's harder to find an insn which
+ will generate a comclr than finding something like an unconditional
+ branch. (conditional moves & long branches create comclr insns).
+
+ * Most arithmetic operations can conditionally skip the next
+ instruction. They can be viewed as "perform this operation
+ and conditionally jump to this nearby location" (where nearby
+ is an insns away). These are difficult to use due to the
+ branch length restrictions. */
+
+static void
+pa_combine_instructions (insns)
+ rtx insns ATTRIBUTE_UNUSED;
+{
+ rtx anchor, new;
+
+ /* This can get expensive since the basic algorithm is on the
+ order of O(n^2) (or worse). Only do it for -O2 or higher
+ levels of optimization. */
+ if (optimize < 2)
+ return;
+
+ /* Walk down the list of insns looking for "anchor" insns which
+ may be combined with "floating" insns. As the name implies,
+ "anchor" instructions don't move, while "floating" insns may
+ move around. */
+ new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
+ new = make_insn_raw (new);
+
+ for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
+ {
+ enum attr_pa_combine_type anchor_attr;
+ enum attr_pa_combine_type floater_attr;
+
+ /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
+ Also ignore any special USE insns. */
+ if ((GET_CODE (anchor) != INSN
+ && GET_CODE (anchor) != JUMP_INSN
+ && GET_CODE (anchor) != CALL_INSN)
+ || GET_CODE (PATTERN (anchor)) == USE
+ || GET_CODE (PATTERN (anchor)) == CLOBBER
+ || GET_CODE (PATTERN (anchor)) == ADDR_VEC
+ || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
+ continue;
+
+ anchor_attr = get_attr_pa_combine_type (anchor);
+ /* See if anchor is an insn suitable for combination. */
+ if (anchor_attr == PA_COMBINE_TYPE_FMPY
+ || anchor_attr == PA_COMBINE_TYPE_FADDSUB
+ || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+ && ! forward_branch_p (anchor)))
+ {
+ rtx floater;
+
+ for (floater = PREV_INSN (anchor);
+ floater;
+ floater = PREV_INSN (floater))
+ {
+ if (GET_CODE (floater) == NOTE
+ || (GET_CODE (floater) == INSN
+ && (GET_CODE (PATTERN (floater)) == USE
+ || GET_CODE (PATTERN (floater)) == CLOBBER)))
+ continue;
+
+ /* Anything except a regular INSN will stop our search. */
+ if (GET_CODE (floater) != INSN
+ || GET_CODE (PATTERN (floater)) == ADDR_VEC
+ || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
+ {
+ floater = NULL_RTX;
+ break;
+ }
+
+ /* See if FLOATER is suitable for combination with the
+ anchor. */
+ floater_attr = get_attr_pa_combine_type (floater);
+ if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+ && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+ || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+ && floater_attr == PA_COMBINE_TYPE_FMPY))
+ {
+ /* If ANCHOR and FLOATER can be combined, then we're
+ done with this pass. */
+ if (pa_can_combine_p (new, anchor, floater, 0,
+ SET_DEST (PATTERN (floater)),
+ XEXP (SET_SRC (PATTERN (floater)), 0),
+ XEXP (SET_SRC (PATTERN (floater)), 1)))
+ break;
+ }
+
+ else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+ && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
+ {
+ if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
+ {
+ if (pa_can_combine_p (new, anchor, floater, 0,
+ SET_DEST (PATTERN (floater)),
+ XEXP (SET_SRC (PATTERN (floater)), 0),
+ XEXP (SET_SRC (PATTERN (floater)), 1)))
+ break;
+ }
+ else
+ {
+ if (pa_can_combine_p (new, anchor, floater, 0,
+ SET_DEST (PATTERN (floater)),
+ SET_SRC (PATTERN (floater)),
+ SET_SRC (PATTERN (floater))))
+ break;
+ }
+ }
+ }
+
+ /* If we didn't find anything on the backwards scan try forwards. */
+ if (!floater
+ && (anchor_attr == PA_COMBINE_TYPE_FMPY
+ || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
+ {
+ for (floater = anchor; floater; floater = NEXT_INSN (floater))
+ {
+ if (GET_CODE (floater) == NOTE
+ || (GET_CODE (floater) == INSN
+ && (GET_CODE (PATTERN (floater)) == USE
+ || GET_CODE (PATTERN (floater)) == CLOBBER)))
+
+ continue;
+
+ /* Anything except a regular INSN will stop our search. */
+ if (GET_CODE (floater) != INSN
+ || GET_CODE (PATTERN (floater)) == ADDR_VEC
+ || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
+ {
+ floater = NULL_RTX;
+ break;
+ }
+
+ /* See if FLOATER is suitable for combination with the
+ anchor. */
+ floater_attr = get_attr_pa_combine_type (floater);
+ if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+ && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+ || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+ && floater_attr == PA_COMBINE_TYPE_FMPY))
+ {
+ /* If ANCHOR and FLOATER can be combined, then we're
+ done with this pass. */
+ if (pa_can_combine_p (new, anchor, floater, 1,
+ SET_DEST (PATTERN (floater)),
+ XEXP (SET_SRC (PATTERN(floater)),0),
+ XEXP(SET_SRC(PATTERN(floater)),1)))
+ break;
+ }
+ }
+ }
+
+ /* FLOATER will be nonzero if we found a suitable floating
+ insn for combination with ANCHOR. */
+ if (floater
+ && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+ || anchor_attr == PA_COMBINE_TYPE_FMPY))
+ {
+ /* Emit the new instruction and delete the old anchor. */
+ emit_insn_before (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2,
+ PATTERN (anchor),
+ PATTERN (floater))),
+ anchor);
+ PUT_CODE (anchor, NOTE);
+ NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (anchor) = 0;
+
+ /* Emit a special USE insn for FLOATER, then delete
+ the floating insn. */
+ emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
+ delete_insn (floater);
+
+ continue;
+ }
+ else if (floater
+ && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
+ {
+ rtx temp;
+ /* Emit the new_jump instruction and delete the old anchor. */
+ temp = emit_jump_insn_before (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2, PATTERN (anchor),
+ PATTERN (floater))),
+ anchor);
+ JUMP_LABEL (temp) = JUMP_LABEL (anchor);
+ PUT_CODE (anchor, NOTE);
+ NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (anchor) = 0;
+
+ /* Emit a special USE insn for FLOATER, then delete
+ the floating insn. */
+ emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
+ delete_insn (floater);
+ continue;
+ }
+ }
+ }
+}
+
+int
+pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
+ rtx new, anchor, floater;
+ int reversed;
+ rtx dest, src1, src2;
+{
+ int insn_code_number;
+ rtx start, end;
+
+ /* Create a PARALLEL with the patterns of ANCHOR and
+ FLOATER, try to recognize it, then test constraints
+ for the resulting pattern.
+
+ If the pattern doesn't match or the constraints
+ aren't met keep searching for a suitable floater
+ insn. */
+ XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
+ XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
+ INSN_CODE (new) = -1;
+ insn_code_number = recog_memoized (new);
+ if (insn_code_number < 0
+ || !constrain_operands (insn_code_number, 1))
+ return 0;
+
+ if (reversed)
+ {
+ start = anchor;
+ end = floater;
+ }
+ else
+ {
+ start = floater;
+ end = anchor;
+ }
+
+ /* There's up to three operands to consider. One
+ output and two inputs.
+
+ The output must not be used between FLOATER & ANCHOR
+ exclusive. The inputs must not be set between
+ FLOATER and ANCHOR exclusive. */
+
+ if (reg_used_between_p (dest, start, end))
+ return 0;
+
+ if (reg_set_between_p (src1, start, end))
+ return 0;
+
+ if (reg_set_between_p (src2, start, end))
+ return 0;
+
+ /* If we get here, then everything is good. */
+ return 1;
+}
+
+/* Return nonzero if sets and references for INSN are delayed.
+
+ Millicode insns are actually function calls with some special
+ constraints on arguments and register usage.
+
+ Millicode calls always expect their arguments in the integer argument
+ registers, and always return their result in %r29 (ret1). They
+ are expected to clobber their arguments, %r1, %r29, and %r31 and
+ nothing else.
+
+ By considering this effects delayed reorg reorg can put insns
+ which set the argument registers into the delay slot of the millicode
+ call -- thus they act more like traditional CALL_INSNs.
+
+ get_attr_type will try to recognize the given insn, so make sure to
+ filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+ in particular. */
+int
+insn_sets_and_refs_are_delayed (insn)
+ rtx insn;
+{
+ return ((GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) != SEQUENCE
+ && GET_CODE (PATTERN (insn)) != USE
+ && GET_CODE (PATTERN (insn)) != CLOBBER
+ && get_attr_type (insn) == TYPE_MILLI));
+}