diff options
Diffstat (limited to 'gcc/config/pa/pa.c')
-rwxr-xr-x | gcc/config/pa/pa.c | 6491 |
1 files changed, 0 insertions, 6491 deletions
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c deleted file mode 100755 index de7f698..0000000 --- a/gcc/config/pa/pa.c +++ /dev/null @@ -1,6491 +0,0 @@ -/* Subroutines for insn-output.c for HPPA. - Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc. - Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#include "config.h" -#include "system.h" - -#include "rtl.h" -#include "regs.h" -#include "hard-reg-set.h" -#include "real.h" -#include "insn-config.h" -#include "conditions.h" -#include "insn-flags.h" -#include "output.h" -#include "insn-attr.h" -#include "flags.h" -#include "tree.h" -#include "reload.h" -#include "c-tree.h" -#include "expr.h" -#include "obstack.h" -#include "toplev.h" - -static void restore_unscaled_index_insn_codes PROTO((rtx)); -static void record_unscaled_index_insn_codes PROTO((rtx)); -static void pa_combine_instructions PROTO((rtx)); -static int pa_can_combine_p PROTO((rtx, rtx, rtx, int, rtx, rtx, rtx)); -static int forward_branch_p PROTO((rtx)); -static int shadd_constant_p PROTO((int)); - -/* Save the operands last given to a compare for use when we - generate a scc or bcc insn. */ - -rtx hppa_compare_op0, hppa_compare_op1; -enum cmp_type hppa_branch_type; - -/* Which cpu we are scheduling for. */ -enum processor_type pa_cpu; - -/* String to hold which cpu we are scheduling for. */ -char *pa_cpu_string; - -/* Set by the FUNCTION_PROFILER macro. */ -int hp_profile_labelno; - -/* Counts for the number of callee-saved general and floating point - registers which were saved by the current function's prologue. */ -static int gr_saved, fr_saved; - -/* Whether or not the current function uses an out-of-line prologue - and epilogue. */ -static int out_of_line_prologue_epilogue; - -static rtx find_addr_reg (); - -/* Keep track of the number of bytes we have output in the CODE subspaces - during this compilation so we'll know when to emit inline long-calls. */ - -unsigned int total_code_bytes; - -/* Variables to handle plabels that we discover are necessary at assembly - output time. They are output after the current function. */ - -struct deferred_plabel -{ - rtx internal_label; - char *name; -} *deferred_plabels = 0; -int n_deferred_plabels = 0; - -/* Array indexed by INSN_UIDs holding the INSN_CODE of an insn which - uses an unscaled indexed address before delay slot scheduling. */ -static int *unscaled_index_insn_codes; - -/* Upper bound for the array. */ -static int max_unscaled_index_insn_codes_uid; - -void -override_options () -{ - /* Default to 7100 scheduling. If the 7100LC scheduling ever - gets reasonably tuned, it should be the default since that - what most PAs sold now are. */ - if (pa_cpu_string == NULL - || ! strcmp (pa_cpu_string, "7100")) - { - pa_cpu_string = "7100"; - pa_cpu = PROCESSOR_7100; - } - else if (! strcmp (pa_cpu_string, "700")) - { - pa_cpu_string = "700"; - pa_cpu = PROCESSOR_700; - } - else if (! strcmp (pa_cpu_string, "7100LC")) - { - pa_cpu_string = "7100LC"; - pa_cpu = PROCESSOR_7100LC; - } - else if (! strcmp (pa_cpu_string, "7200")) - { - pa_cpu_string = "7200"; - pa_cpu = PROCESSOR_7200; - } - /* CYGNUS LOCAL PA8000/law */ - else if (! strcmp (pa_cpu_string, "8000")) - { - pa_cpu_string = "8000"; - pa_cpu = PROCESSOR_8000; - } - else - { - warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC, 7200 and 8000\n", pa_cpu_string); - } - /* END CYGNUS LOCAL */ - - if (flag_pic && TARGET_PORTABLE_RUNTIME) - { - warning ("PIC code generation is not supported in the portable runtime model\n"); - } - - if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)) - { - warning ("PIC code generation is not compatible with fast indirect calls\n"); - } - - if (flag_pic && profile_flag) - { - warning ("PIC code generation is not compatible with profiling\n"); - } - - if (TARGET_SPACE && (flag_pic || profile_flag)) - { - warning ("Out of line entry/exit sequences are not compatible\n"); - warning ("with PIC or profiling\n"); - } - - if (! TARGET_GAS && write_symbols != NO_DEBUG) - { - warning ("-g is only supported when using GAS on this processor,"); - warning ("-g option disabled."); - write_symbols = NO_DEBUG; - } -} - - -/* Return non-zero only if OP is a register of mode MODE, - or CONST0_RTX. */ -int -reg_or_0_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return (op == CONST0_RTX (mode) || register_operand (op, mode)); -} - -/* Return non-zero if OP is suitable for use in a call to a named - function. - - (???) For 2.5 try to eliminate either call_operand_address or - function_label_operand, they perform very similar functions. */ -int -call_operand_address (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME); -} - -/* Return 1 if X contains a symbolic expression. We know these - expressions will have one of a few well defined forms, so - we need only check those forms. */ -int -symbolic_expression_p (x) - register rtx x; -{ - - /* Strip off any HIGH. */ - if (GET_CODE (x) == HIGH) - x = XEXP (x, 0); - - return (symbolic_operand (x, VOIDmode)); -} - -int -symbolic_operand (op, mode) - register rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - switch (GET_CODE (op)) - { - case SYMBOL_REF: - case LABEL_REF: - return 1; - case CONST: - op = XEXP (op, 0); - return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF - || GET_CODE (XEXP (op, 0)) == LABEL_REF) - && GET_CODE (XEXP (op, 1)) == CONST_INT); - default: - return 0; - } -} - -/* Return truth value of statement that OP is a symbolic memory - operand of mode MODE. */ - -int -symbolic_memory_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); - if (GET_CODE (op) != MEM) - return 0; - op = XEXP (op, 0); - return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST - || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF); -} - -/* Return 1 if the operand is either a register or a memory operand that is - not symbolic. */ - -int -reg_or_nonsymb_mem_operand (op, mode) - register rtx op; - enum machine_mode mode; -{ - if (register_operand (op, mode)) - return 1; - - if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode)) - return 1; - - return 0; -} - -/* Return 1 if the operand is either a register, zero, or a memory operand - that is not symbolic. */ - -int -reg_or_0_or_nonsymb_mem_operand (op, mode) - register rtx op; - enum machine_mode mode; -{ - if (register_operand (op, mode)) - return 1; - - if (op == CONST0_RTX (mode)) - return 1; - - if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode)) - return 1; - - return 0; -} - -/* Accept any constant that can be moved in one instructions into a - general register. */ -int -cint_ok_for_move (intval) - HOST_WIDE_INT intval; -{ - /* OK if ldo, ldil, or zdepi, can be used. */ - return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0 - || zdepi_cint_p (intval)); -} - -/* Accept anything that can be moved in one instruction into a general - register. */ -int -move_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (register_operand (op, mode)) - return 1; - - if (GET_CODE (op) == CONSTANT_P_RTX) - return 1; - - if (GET_CODE (op) == CONST_INT) - return cint_ok_for_move (INTVAL (op)); - - if (GET_CODE (op) == SUBREG) - op = SUBREG_REG (op); - if (GET_CODE (op) != MEM) - return 0; - - op = XEXP (op, 0); - if (GET_CODE (op) == LO_SUM) - return (register_operand (XEXP (op, 0), Pmode) - && CONSTANT_P (XEXP (op, 1))); - - /* Since move_operand is only used for source operands, we can always - allow scaled indexing! */ - if (! TARGET_DISABLE_INDEXING - && GET_CODE (op) == PLUS - && ((GET_CODE (XEXP (op, 0)) == MULT - && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG - && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT - && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode) - && GET_CODE (XEXP (op, 1)) == REG) - || (GET_CODE (XEXP (op, 1)) == MULT - &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG - && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT - && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode) - && GET_CODE (XEXP (op, 0)) == REG))) - return 1; - - return memory_address_p (mode, op); -} - -/* Accept REG and any CONST_INT that can be moved in one instruction into a - general register. */ -int -reg_or_cint_move_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (register_operand (op, mode)) - return 1; - - if (GET_CODE (op) == CONST_INT) - return cint_ok_for_move (INTVAL (op)); - - return 0; -} - -int -pic_label_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - if (!flag_pic) - return 0; - - switch (GET_CODE (op)) - { - case LABEL_REF: - return 1; - case CONST: - op = XEXP (op, 0); - return (GET_CODE (XEXP (op, 0)) == LABEL_REF - && GET_CODE (XEXP (op, 1)) == CONST_INT); - default: - return 0; - } -} - -int -fp_reg_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return reg_renumber && FP_REG_P (op); -} - - - -/* Return truth value of whether OP can be used as an operand in a - three operand arithmetic insn that accepts registers of mode MODE - or 14-bit signed integers. */ -int -arith_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return (register_operand (op, mode) - || (GET_CODE (op) == CONST_INT && INT_14_BITS (op))); -} - -/* Return truth value of whether OP can be used as an operand in a - three operand arithmetic insn that accepts registers of mode MODE - or 11-bit signed integers. */ -int -arith11_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return (register_operand (op, mode) - || (GET_CODE (op) == CONST_INT && INT_11_BITS (op))); -} - -/* A constant integer suitable for use in a PRE_MODIFY memory - reference. */ -int -pre_cint_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT - && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10); -} - -/* A constant integer suitable for use in a POST_MODIFY memory - reference. */ -int -post_cint_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT - && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10); -} - -int -arith_double_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return (register_operand (op, mode) - || (GET_CODE (op) == CONST_DOUBLE - && GET_MODE (op) == mode - && VAL_14_BITS_P (CONST_DOUBLE_LOW (op)) - && ((CONST_DOUBLE_HIGH (op) >= 0) - == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0)))); -} - -/* Return truth value of whether OP is a integer which fits the - range constraining immediate operands in three-address insns, or - is an integer register. */ - -int -ireg_or_int5_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op)) - || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32)); -} - -/* Return truth value of whether OP is a integer which fits the - range constraining immediate operands in three-address insns. */ - -int -int5_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT && INT_5_BITS (op)); -} - -int -uint5_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op)); -} - -int -int11_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT && INT_11_BITS (op)); -} - -int -uint32_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ -#if HOST_BITS_PER_WIDE_INT > 32 - /* All allowed constants will fit a CONST_INT. */ - return (GET_CODE (op) == CONST_INT - && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L)); -#else - return (GET_CODE (op) == CONST_INT - || (GET_CODE (op) == CONST_DOUBLE - && CONST_DOUBLE_HIGH (op) == 0)); -#endif -} - -int -arith5_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return register_operand (op, mode) || int5_operand (op, mode); -} - -/* True iff zdepi can be used to generate this CONST_INT. */ -int -zdepi_cint_p (x) - unsigned HOST_WIDE_INT x; -{ - unsigned HOST_WIDE_INT lsb_mask, t; - - /* This might not be obvious, but it's at least fast. - This function is critical; we don't have the time loops would take. */ - lsb_mask = x & -x; - t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); - /* Return true iff t is a power of two. */ - return ((t & (t - 1)) == 0); -} - -/* True iff depi or extru can be used to compute (reg & mask). - Accept bit pattern like these: - 0....01....1 - 1....10....0 - 1..10..01..1 */ -int -and_mask_p (mask) - unsigned HOST_WIDE_INT mask; -{ - mask = ~mask; - mask += mask & -mask; - return (mask & (mask - 1)) == 0; -} - -/* True iff depi or extru can be used to compute (reg & OP). */ -int -and_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return (register_operand (op, mode) - || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op)))); -} - -/* True iff depi can be used to compute (reg | MASK). */ -int -ior_mask_p (mask) - unsigned HOST_WIDE_INT mask; -{ - mask += mask & -mask; - return (mask & (mask - 1)) == 0; -} - -/* True iff depi can be used to compute (reg | OP). */ -int -ior_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op))); -} - -int -lhs_lshift_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode); -} - -/* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx. - Such values can be the left hand side x in (x << r), using the zvdepi - instruction. */ -int -lhs_lshift_cint_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - unsigned HOST_WIDE_INT x; - if (GET_CODE (op) != CONST_INT) - return 0; - x = INTVAL (op) >> 4; - return (x & (x + 1)) == 0; -} - -int -arith32_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return register_operand (op, mode) || GET_CODE (op) == CONST_INT; -} - -int -pc_or_label_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF); -} - -/* Legitimize PIC addresses. If the address is already - position-independent, we return ORIG. Newly generated - position-independent addresses go to REG. If we need more - than one register, we lose. */ - -rtx -legitimize_pic_address (orig, mode, reg) - rtx orig, reg; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - rtx pic_ref = orig; - - /* Labels need special handling. */ - if (pic_label_operand (orig)) - { - emit_insn (gen_pic_load_label (reg, orig)); - current_function_uses_pic_offset_table = 1; - return reg; - } - if (GET_CODE (orig) == SYMBOL_REF) - { - if (reg == 0) - abort (); - - if (flag_pic == 2) - { - emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig)); - pic_ref - = gen_rtx_MEM (Pmode, - gen_rtx_LO_SUM (Pmode, reg, - gen_rtx_UNSPEC (SImode, - gen_rtvec (1, orig), - 0))); - } - else - pic_ref = gen_rtx_MEM (Pmode, - gen_rtx_PLUS (Pmode, - pic_offset_table_rtx, orig)); - current_function_uses_pic_offset_table = 1; - RTX_UNCHANGING_P (pic_ref) = 1; - emit_move_insn (reg, pic_ref); - return reg; - } - else if (GET_CODE (orig) == CONST) - { - rtx base; - - if (GET_CODE (XEXP (orig, 0)) == PLUS - && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) - return orig; - - if (reg == 0) - abort (); - - if (GET_CODE (XEXP (orig, 0)) == PLUS) - { - base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); - orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, - base == reg ? 0 : reg); - } - else abort (); - if (GET_CODE (orig) == CONST_INT) - { - if (INT_14_BITS (orig)) - return plus_constant_for_output (base, INTVAL (orig)); - orig = force_reg (Pmode, orig); - } - pic_ref = gen_rtx_PLUS (Pmode, base, orig); - /* Likewise, should we set special REG_NOTEs here? */ - } - return pic_ref; -} - -/* Try machine-dependent ways of modifying an illegitimate address - to be legitimate. If we find one, return the new, valid address. - This macro is used in only one place: `memory_address' in explow.c. - - OLDX is the address as it was before break_out_memory_refs was called. - In some cases it is useful to look at this to decide what needs to be done. - - MODE and WIN are passed so that this macro can use - GO_IF_LEGITIMATE_ADDRESS. - - It is always safe for this macro to do nothing. It exists to recognize - opportunities to optimize the output. - - For the PA, transform: - - memory(X + <large int>) - - into: - - if (<large int> & mask) >= 16 - Y = (<large int> & ~mask) + mask + 1 Round up. - else - Y = (<large int> & ~mask) Round down. - Z = X + Y - memory (Z + (<large int> - Y)); - - This is for CSE to find several similar references, and only use one Z. - - X can either be a SYMBOL_REF or REG, but because combine can not - perform a 4->2 combination we do nothing for SYMBOL_REF + D where - D will not fit in 14 bits. - - MODE_FLOAT references allow displacements which fit in 5 bits, so use - 0x1f as the mask. - - MODE_INT references allow displacements which fit in 14 bits, so use - 0x3fff as the mask. - - This relies on the fact that most mode MODE_FLOAT references will use FP - registers and most mode MODE_INT references will use integer registers. - (In the rare case of an FP register used in an integer MODE, we depend - on secondary reloads to clean things up.) - - - It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special - manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed - addressing modes to be used). - - Put X and Z into registers. Then put the entire expression into - a register. */ - -rtx -hppa_legitimize_address (x, oldx, mode) - rtx x, oldx ATTRIBUTE_UNUSED; - enum machine_mode mode; -{ - rtx orig = x; - - if (flag_pic) - return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); - - /* Strip off CONST. */ - if (GET_CODE (x) == CONST) - x = XEXP (x, 0); - - /* Special case. Get the SYMBOL_REF into a register and use indexing. - That should always be safe. */ - if (GET_CODE (x) == PLUS - && GET_CODE (XEXP (x, 0)) == REG - && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) - { - rtx reg = force_reg (SImode, XEXP (x, 1)); - return force_reg (SImode, gen_rtx_PLUS (SImode, reg, XEXP (x, 0))); - } - - /* Note we must reject symbols which represent function addresses - since the assembler/linker can't handle arithmetic on plabels. */ - if (GET_CODE (x) == PLUS - && GET_CODE (XEXP (x, 1)) == CONST_INT - && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF - && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) - || GET_CODE (XEXP (x, 0)) == REG)) - { - rtx int_part, ptr_reg; - int newoffset; - int offset = INTVAL (XEXP (x, 1)); - int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff; - - /* CYGNUS LOCAL pa8000/law */ - mask = (GET_MODE_CLASS (mode) == MODE_FLOAT - ? (TARGET_PARISC_2_0 ? 0x3fff : 0x1f) : 0x3fff); - /* END CYGNUS LOCAL */ - - /* Choose which way to round the offset. Round up if we - are >= halfway to the next boundary. */ - if ((offset & mask) >= ((mask + 1) / 2)) - newoffset = (offset & ~ mask) + mask + 1; - else - newoffset = (offset & ~ mask); - - /* If the newoffset will not fit in 14 bits (ldo), then - handling this would take 4 or 5 instructions (2 to load - the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to - add the new offset and the SYMBOL_REF.) Combine can - not handle 4->2 or 5->2 combinations, so do not create - them. */ - if (! VAL_14_BITS_P (newoffset) - && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) - { - rtx const_part - = gen_rtx_CONST (VOIDmode, gen_rtx_PLUS (Pmode, - XEXP (x, 0), - GEN_INT (newoffset))); - rtx tmp_reg - = force_reg (Pmode, - gen_rtx_HIGH (Pmode, const_part)); - ptr_reg - = force_reg (Pmode, - gen_rtx_LO_SUM (Pmode, tmp_reg, const_part)); - } - else - { - if (! VAL_14_BITS_P (newoffset)) - int_part = force_reg (Pmode, GEN_INT (newoffset)); - else - int_part = GEN_INT (newoffset); - - ptr_reg = force_reg (Pmode, - gen_rtx_PLUS (Pmode, - force_reg (Pmode, XEXP (x, 0)), - int_part)); - } - return plus_constant (ptr_reg, offset - newoffset); - } - - /* Handle (plus (mult (a) (shadd_constant)) (b)). */ - - if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT - && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) - && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o' - || GET_CODE (XEXP (x, 1)) == SUBREG) - && GET_CODE (XEXP (x, 1)) != CONST) - { - int val = INTVAL (XEXP (XEXP (x, 0), 1)); - rtx reg1, reg2; - - reg1 = XEXP (x, 1); - if (GET_CODE (reg1) != REG) - reg1 = force_reg (Pmode, force_operand (reg1, 0)); - - reg2 = XEXP (XEXP (x, 0), 0); - if (GET_CODE (reg2) != REG) - reg2 = force_reg (Pmode, force_operand (reg2, 0)); - - return force_reg (Pmode, gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, reg2, - GEN_INT (val)), - reg1)); - } - - /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). - - Only do so for floating point modes since this is more speculative - and we lose if it's an integer store. */ - if (GET_CODE (x) == PLUS - && GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT - && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT - && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) - && (mode == SFmode || mode == DFmode)) - { - - /* First, try and figure out what to use as a base register. */ - rtx reg1, reg2, base, idx, orig_base; - - reg1 = XEXP (XEXP (x, 0), 1); - reg2 = XEXP (x, 1); - base = NULL_RTX; - idx = NULL_RTX; - - /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], - then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll - know it's a base register below. */ - if (GET_CODE (reg1) != REG) - reg1 = force_reg (Pmode, force_operand (reg1, 0)); - - if (GET_CODE (reg2) != REG) - reg2 = force_reg (Pmode, force_operand (reg2, 0)); - - /* Figure out what the base and index are. */ - - if (GET_CODE (reg1) == REG - && REGNO_POINTER_FLAG (REGNO (reg1))) - { - base = reg1; - orig_base = XEXP (XEXP (x, 0), 1); - idx = gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, - XEXP (XEXP (XEXP (x, 0), 0), 0), - XEXP (XEXP (XEXP (x, 0), 0), 1)), - XEXP (x, 1)); - } - else if (GET_CODE (reg2) == REG - && REGNO_POINTER_FLAG (REGNO (reg2))) - { - base = reg2; - orig_base = XEXP (x, 1); - idx = XEXP (x, 0); - } - - if (base == 0) - return orig; - - /* If the index adds a large constant, try to scale the - constant so that it can be loaded with only one insn. */ - if (GET_CODE (XEXP (idx, 1)) == CONST_INT - && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) - / INTVAL (XEXP (XEXP (idx, 0), 1))) - && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) - { - /* Divide the CONST_INT by the scale factor, then add it to A. */ - int val = INTVAL (XEXP (idx, 1)); - - val /= INTVAL (XEXP (XEXP (idx, 0), 1)); - reg1 = XEXP (XEXP (idx, 0), 0); - if (GET_CODE (reg1) != REG) - reg1 = force_reg (Pmode, force_operand (reg1, 0)); - - reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); - - /* We can now generate a simple scaled indexed address. */ - return force_reg (Pmode, - gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, reg1, - XEXP (XEXP (idx, 0), 1)), - base)); - } - - /* If B + C is still a valid base register, then add them. */ - if (GET_CODE (XEXP (idx, 1)) == CONST_INT - && INTVAL (XEXP (idx, 1)) <= 4096 - && INTVAL (XEXP (idx, 1)) >= -4096) - { - int val = INTVAL (XEXP (XEXP (idx, 0), 1)); - rtx reg1, reg2; - - reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); - - reg2 = XEXP (XEXP (idx, 0), 0); - if (GET_CODE (reg2) != CONST_INT) - reg2 = force_reg (Pmode, force_operand (reg2, 0)); - - return force_reg (Pmode, gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, reg2, - GEN_INT (val)), - reg1)); - } - - /* Get the index into a register, then add the base + index and - return a register holding the result. */ - - /* First get A into a register. */ - reg1 = XEXP (XEXP (idx, 0), 0); - if (GET_CODE (reg1) != REG) - reg1 = force_reg (Pmode, force_operand (reg1, 0)); - - /* And get B into a register. */ - reg2 = XEXP (idx, 1); - if (GET_CODE (reg2) != REG) - reg2 = force_reg (Pmode, force_operand (reg2, 0)); - - reg1 = force_reg (Pmode, - gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, reg1, - XEXP (XEXP (idx, 0), 1)), - reg2)); - - /* Add the result to our base register and return. */ - return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); - - } - - /* Uh-oh. We might have an address for x[n-100000]. This needs - special handling to avoid creating an indexed memory address - with x-100000 as the base. - - If the constant part is small enough, then it's still safe because - there is a guard page at the beginning and end of the data segment. - - Scaled references are common enough that we want to try and rearrange the - terms so that we can use indexing for these addresses too. Only - do the optimization for floatint point modes. */ - - if (GET_CODE (x) == PLUS - && symbolic_expression_p (XEXP (x, 1))) - { - /* Ugly. We modify things here so that the address offset specified - by the index expression is computed first, then added to x to form - the entire address. */ - - rtx regx1, regx2, regy1, regy2, y; - - /* Strip off any CONST. */ - y = XEXP (x, 1); - if (GET_CODE (y) == CONST) - y = XEXP (y, 0); - - if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) - { - /* See if this looks like - (plus (mult (reg) (shadd_const)) - (const (plus (symbol_ref) (const_int)))) - - Where const_int is small. In that case the const - expression is a valid pointer for indexing. - - If const_int is big, but can be divided evenly by shadd_const - and added to (reg). This allows more scaled indexed addresses. */ - if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF - && GET_CODE (XEXP (x, 0)) == MULT - && GET_CODE (XEXP (y, 1)) == CONST_INT - && INTVAL (XEXP (y, 1)) >= -4096 - && INTVAL (XEXP (y, 1)) <= 4095 - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT - && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) - { - int val = INTVAL (XEXP (XEXP (x, 0), 1)); - rtx reg1, reg2; - - reg1 = XEXP (x, 1); - if (GET_CODE (reg1) != REG) - reg1 = force_reg (Pmode, force_operand (reg1, 0)); - - reg2 = XEXP (XEXP (x, 0), 0); - if (GET_CODE (reg2) != REG) - reg2 = force_reg (Pmode, force_operand (reg2, 0)); - - return force_reg (Pmode, - gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, reg2, - GEN_INT (val)), - reg1)); - } - else if ((mode == DFmode || mode == SFmode) - && GET_CODE (XEXP (y, 0)) == SYMBOL_REF - && GET_CODE (XEXP (x, 0)) == MULT - && GET_CODE (XEXP (y, 1)) == CONST_INT - && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT - && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) - { - regx1 - = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) - / INTVAL (XEXP (XEXP (x, 0), 1)))); - regx2 = XEXP (XEXP (x, 0), 0); - if (GET_CODE (regx2) != REG) - regx2 = force_reg (Pmode, force_operand (regx2, 0)); - regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, - regx2, regx1)); - return force_reg (Pmode, - gen_rtx_PLUS (Pmode, - gen_rtx_MULT (Pmode, regx2, - XEXP (XEXP (x, 0), - 1)), - force_reg (Pmode, XEXP (y, 0)))); - } - else if (GET_CODE (XEXP (y, 1)) == CONST_INT - && INTVAL (XEXP (y, 1)) >= -4096 - && INTVAL (XEXP (y, 1)) <= 4095) - { - /* This is safe because of the guard page at the - beginning and end of the data space. Just - return the original address. */ - return orig; - } - else - { - /* Doesn't look like one we can optimize. */ - regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); - regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); - regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); - regx1 = force_reg (Pmode, - gen_rtx_fmt_ee (GET_CODE (y), Pmode, - regx1, regy2)); - return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); - } - } - } - - return orig; -} - -/* For the HPPA, REG and REG+CONST is cost 0 - and addresses involving symbolic constants are cost 2. - - PIC addresses are very expensive. - - It is no coincidence that this has the same structure - as GO_IF_LEGITIMATE_ADDRESS. */ -int -hppa_address_cost (X) - rtx X; -{ - if (GET_CODE (X) == PLUS) - return 1; - else if (GET_CODE (X) == LO_SUM) - return 1; - else if (GET_CODE (X) == HIGH) - return 2; - return 4; -} - -/* Emit insns to move operands[1] into operands[0]. - - Return 1 if we have written out everything that needs to be done to - do the move. Otherwise, return 0 and the caller will emit the move - normally. */ - -int -emit_move_sequence (operands, mode, scratch_reg) - rtx *operands; - enum machine_mode mode; - rtx scratch_reg; -{ - register rtx operand0 = operands[0]; - register rtx operand1 = operands[1]; - register rtx tem; - - if (scratch_reg - && reload_in_progress && GET_CODE (operand0) == REG - && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) - operand0 = reg_equiv_mem[REGNO (operand0)]; - else if (scratch_reg - && reload_in_progress && GET_CODE (operand0) == SUBREG - && GET_CODE (SUBREG_REG (operand0)) == REG - && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) - { - SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))]; - operand0 = alter_subreg (operand0); - } - - if (scratch_reg - && reload_in_progress && GET_CODE (operand1) == REG - && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) - operand1 = reg_equiv_mem[REGNO (operand1)]; - else if (scratch_reg - && reload_in_progress && GET_CODE (operand1) == SUBREG - && GET_CODE (SUBREG_REG (operand1)) == REG - && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) - { - SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))]; - operand1 = alter_subreg (operand1); - } - - if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM - && ((tem = find_replacement (&XEXP (operand0, 0))) - != XEXP (operand0, 0))) - operand0 = gen_rtx_MEM (GET_MODE (operand0), tem); - if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM - && ((tem = find_replacement (&XEXP (operand1, 0))) - != XEXP (operand1, 0))) - operand1 = gen_rtx_MEM (GET_MODE (operand1), tem); - - /* Handle secondary reloads for loads/stores of FP registers from - REG+D addresses where D does not fit in 5 bits, including - (subreg (mem (addr))) cases. */ - if (fp_reg_operand (operand0, mode) - && ((GET_CODE (operand1) == MEM - && ! memory_address_p (DFmode, XEXP (operand1, 0))) - || ((GET_CODE (operand1) == SUBREG - && GET_CODE (XEXP (operand1, 0)) == MEM - && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0))))) - && scratch_reg) - { - if (GET_CODE (operand1) == SUBREG) - operand1 = XEXP (operand1, 0); - - scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg)); - - /* D might not fit in 14 bits either; for such cases load D into - scratch reg. */ - if (!memory_address_p (SImode, XEXP (operand1, 0))) - { - emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); - emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), - SImode, - XEXP (XEXP (operand1, 0), 0), - scratch_reg)); - } - else - emit_move_insn (scratch_reg, XEXP (operand1, 0)); - emit_insn (gen_rtx_SET (VOIDmode, operand0, gen_rtx_MEM (mode, - scratch_reg))); - return 1; - } - else if (fp_reg_operand (operand1, mode) - && ((GET_CODE (operand0) == MEM - && ! memory_address_p (DFmode, XEXP (operand0, 0))) - || ((GET_CODE (operand0) == SUBREG) - && GET_CODE (XEXP (operand0, 0)) == MEM - && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0)))) - && scratch_reg) - { - if (GET_CODE (operand0) == SUBREG) - operand0 = XEXP (operand0, 0); - - scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg)); - /* D might not fit in 14 bits either; for such cases load D into - scratch reg. */ - if (!memory_address_p (SImode, XEXP (operand0, 0))) - { - emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); - emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, - 0)), - SImode, - XEXP (XEXP (operand0, 0), - 0), - scratch_reg)); - } - else - emit_move_insn (scratch_reg, XEXP (operand0, 0)); - emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg), - operand1)); - return 1; - } - /* Handle secondary reloads for loads of FP registers from constant - expressions by forcing the constant into memory. - - use scratch_reg to hold the address of the memory location. - - ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return - NO_REGS when presented with a const_int and an register class - containing only FP registers. Doing so unfortunately creates - more problems than it solves. Fix this for 2.5. */ - else if (fp_reg_operand (operand0, mode) - && CONSTANT_P (operand1) - && scratch_reg) - { - rtx xoperands[2]; - - /* Force the constant into memory and put the address of the - memory location into scratch_reg. */ - xoperands[0] = scratch_reg; - xoperands[1] = XEXP (force_const_mem (mode, operand1), 0); - emit_move_sequence (xoperands, Pmode, 0); - - /* Now load the destination register. */ - emit_insn (gen_rtx_SET (mode, operand0, gen_rtx_MEM (mode, scratch_reg))); - return 1; - } - /* Handle secondary reloads for SAR. These occur when trying to load - the SAR from memory a FP register, or with a constant. */ - else if (GET_CODE (operand0) == REG - && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS - && (GET_CODE (operand1) == MEM - || GET_CODE (operand1) == CONST_INT - || (GET_CODE (operand1) == REG - && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))) - && scratch_reg) - { - /* D might not fit in 14 bits either; for such cases load D into - scratch reg. */ - if (GET_CODE (operand1) == MEM - && !memory_address_p (SImode, XEXP (operand1, 0))) - { - emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); - emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, - 0)), - SImode, - XEXP (XEXP (operand1, 0), - 0), - scratch_reg)); - emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand1), - scratch_reg)); - } - else - emit_move_insn (scratch_reg, operand1); - emit_move_insn (operand0, scratch_reg); - return 1; - } - /* Handle most common case: storing into a register. */ - else if (register_operand (operand0, mode)) - { - if (register_operand (operand1, mode) - || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1)) - || (operand1 == CONST0_RTX (mode)) - || (GET_CODE (operand1) == HIGH - && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) - /* Only `general_operands' can come here, so MEM is ok. */ - || GET_CODE (operand1) == MEM) - { - /* Run this case quickly. */ - emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); - return 1; - } - } - else if (GET_CODE (operand0) == MEM) - { - if (mode == DFmode && operand1 == CONST0_RTX (mode) - && !(reload_in_progress || reload_completed)) - { - rtx temp = gen_reg_rtx (DFmode); - - emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); - emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); - return 1; - } - if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) - { - /* Run this case quickly. */ - emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); - return 1; - } - if (! (reload_in_progress || reload_completed)) - { - operands[0] = validize_mem (operand0); - operands[1] = operand1 = force_reg (mode, operand1); - } - } - - /* Simplify the source if we need to. - Note we do have to handle function labels here, even though we do - not consider them legitimate constants. Loop optimizations can - call the emit_move_xxx with one as a source. */ - if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) - || function_label_operand (operand1, mode) - || (GET_CODE (operand1) == HIGH - && symbolic_operand (XEXP (operand1, 0), mode))) - { - int ishighonly = 0; - - if (GET_CODE (operand1) == HIGH) - { - ishighonly = 1; - operand1 = XEXP (operand1, 0); - } - if (symbolic_operand (operand1, mode)) - { - /* Argh. The assembler and linker can't handle arithmetic - involving plabels. - - So we force the plabel into memory, load operand0 from - the memory location, then add in the constant part. */ - if ((GET_CODE (operand1) == CONST - && GET_CODE (XEXP (operand1, 0)) == PLUS - && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode)) - || function_label_operand (operand1, mode)) - { - rtx temp, const_part; - - /* Figure out what (if any) scratch register to use. */ - if (reload_in_progress || reload_completed) - scratch_reg = scratch_reg ? scratch_reg : operand0; - else if (flag_pic) - scratch_reg = gen_reg_rtx (Pmode); - - if (GET_CODE (operand1) == CONST) - { - /* Save away the constant part of the expression. */ - const_part = XEXP (XEXP (operand1, 0), 1); - if (GET_CODE (const_part) != CONST_INT) - abort (); - - /* Force the function label into memory. */ - temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); - } - else - { - /* No constant part. */ - const_part = NULL_RTX; - - /* Force the function label into memory. */ - temp = force_const_mem (mode, operand1); - } - - - /* Get the address of the memory location. PIC-ify it if - necessary. */ - temp = XEXP (temp, 0); - if (flag_pic) - temp = legitimize_pic_address (temp, mode, scratch_reg); - - /* Put the address of the memory location into our destination - register. */ - operands[1] = temp; - emit_move_sequence (operands, mode, scratch_reg); - - /* Now load from the memory location into our destination - register. */ - operands[1] = gen_rtx_MEM (Pmode, operands[0]); - emit_move_sequence (operands, mode, scratch_reg); - - /* And add back in the constant part. */ - if (const_part != NULL_RTX) - expand_inc (operand0, const_part); - - return 1; - } - - if (flag_pic) - { - rtx temp; - - if (reload_in_progress || reload_completed) - temp = scratch_reg ? scratch_reg : operand0; - else - temp = gen_reg_rtx (Pmode); - - /* (const (plus (symbol) (const_int))) must be forced to - memory during/after reload if the const_int will not fit - in 14 bits. */ - if (GET_CODE (operand1) == CONST - && GET_CODE (XEXP (operand1, 0)) == PLUS - && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT - && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) - && (reload_completed || reload_in_progress) - && flag_pic) - { - operands[1] = force_const_mem (mode, operand1); - operands[1] = legitimize_pic_address (XEXP (operands[1], 0), - mode, temp); - emit_move_sequence (operands, mode, temp); - } - else - { - operands[1] = legitimize_pic_address (operand1, mode, temp); - emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); - } - } - /* On the HPPA, references to data space are supposed to use dp, - register 27, but showing it in the RTL inhibits various cse - and loop optimizations. */ - else - { - rtx temp, set; - - if (reload_in_progress || reload_completed) - temp = scratch_reg ? scratch_reg : operand0; - else - temp = gen_reg_rtx (mode); - - /* Loading a SYMBOL_REF into a register makes that register - safe to be used as the base in an indexed address. - - Don't mark hard registers though. That loses. */ - if (GET_CODE (operand0) == REG - && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) - REGNO_POINTER_FLAG (REGNO (operand0)) = 1; - if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) - REGNO_POINTER_FLAG (REGNO (temp)) = 1; - if (ishighonly) - set = gen_rtx_SET (mode, operand0, temp); - else - set = gen_rtx_SET (VOIDmode, operand0, - gen_rtx_LO_SUM (mode, temp, operand1)); - - emit_insn (gen_rtx_SET (VOIDmode, - temp, - gen_rtx_HIGH (mode, operand1))); - emit_insn (set); - - } - return 1; - } - else if (GET_CODE (operand1) != CONST_INT - || ! cint_ok_for_move (INTVAL (operand1))) - { - rtx temp; - - if (reload_in_progress || reload_completed) - temp = operand0; - else - temp = gen_reg_rtx (mode); - - emit_insn (gen_rtx_SET (VOIDmode, temp, - gen_rtx_HIGH (mode, operand1))); - operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); - } - } - /* Now have insn-emit do whatever it normally does. */ - return 0; -} - -/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning - it will need a link/runtime reloc). */ - -int -reloc_needed (exp) - tree exp; -{ - int reloc = 0; - - switch (TREE_CODE (exp)) - { - case ADDR_EXPR: - return 1; - - case PLUS_EXPR: - case MINUS_EXPR: - reloc = reloc_needed (TREE_OPERAND (exp, 0)); - reloc |= reloc_needed (TREE_OPERAND (exp, 1)); - break; - - case NOP_EXPR: - case CONVERT_EXPR: - case NON_LVALUE_EXPR: - reloc = reloc_needed (TREE_OPERAND (exp, 0)); - break; - - case CONSTRUCTOR: - { - register tree link; - for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link)) - if (TREE_VALUE (link) != 0) - reloc |= reloc_needed (TREE_VALUE (link)); - } - break; - - case ERROR_MARK: - break; - - default: - break; - } - return reloc; -} - -/* Does operand (which is a symbolic_operand) live in text space? If - so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */ - -int -read_only_operand (operand) - rtx operand; -{ - if (GET_CODE (operand) == CONST) - operand = XEXP (XEXP (operand, 0), 0); - if (flag_pic) - { - if (GET_CODE (operand) == SYMBOL_REF) - return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand); - } - else - { - if (GET_CODE (operand) == SYMBOL_REF) - return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand); - } - return 1; -} - - -/* Return the best assembler insn template - for moving operands[1] into operands[0] as a fullword. */ -char * -singlemove_string (operands) - rtx *operands; -{ - HOST_WIDE_INT intval; - - if (GET_CODE (operands[0]) == MEM) - return "stw %r1,%0"; - if (GET_CODE (operands[1]) == MEM) - return "ldw %1,%0"; - if (GET_CODE (operands[1]) == CONST_DOUBLE) - { - long i; - REAL_VALUE_TYPE d; - - if (GET_MODE (operands[1]) != SFmode) - abort (); - - /* Translate the CONST_DOUBLE to a CONST_INT with the same target - bit pattern. */ - REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); - REAL_VALUE_TO_TARGET_SINGLE (d, i); - - operands[1] = GEN_INT (i); - /* Fall through to CONST_INT case. */ - } - if (GET_CODE (operands[1]) == CONST_INT) - { - intval = INTVAL (operands[1]); - - if (VAL_14_BITS_P (intval)) - return "ldi %1,%0"; - else if ((intval & 0x7ff) == 0) - return "ldil L'%1,%0"; - else if (zdepi_cint_p (intval)) - return "zdepi %Z1,%0"; - else - return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; - } - return "copy %1,%0"; -} - - -/* Compute position (in OP[1]) and width (in OP[2]) - useful for copying IMM to a register using the zdepi - instructions. Store the immediate value to insert in OP[0]. */ -void -compute_zdepi_operands (imm, op) - unsigned HOST_WIDE_INT imm; - unsigned *op; -{ - int lsb, len; - - /* Find the least significant set bit in IMM. */ - for (lsb = 0; lsb < 32; lsb++) - { - if ((imm & 1) != 0) - break; - imm >>= 1; - } - - /* Choose variants based on *sign* of the 5-bit field. */ - if ((imm & 0x10) == 0) - len = (lsb <= 28) ? 4 : 32 - lsb; - else - { - /* Find the width of the bitstring in IMM. */ - for (len = 5; len < 32; len++) - { - if ((imm & (1 << len)) == 0) - break; - } - - /* Sign extend IMM as a 5-bit value. */ - imm = (imm & 0xf) - 0x10; - } - - op[0] = imm; - op[1] = 31 - lsb; - op[2] = len; -} - -/* Output assembler code to perform a doubleword move insn - with operands OPERANDS. */ - -char * -output_move_double (operands) - rtx *operands; -{ - enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; - rtx latehalf[2]; - rtx addreg0 = 0, addreg1 = 0; - - /* First classify both operands. */ - - if (REG_P (operands[0])) - optype0 = REGOP; - else if (offsettable_memref_p (operands[0])) - optype0 = OFFSOP; - else if (GET_CODE (operands[0]) == MEM) - optype0 = MEMOP; - else - optype0 = RNDOP; - - if (REG_P (operands[1])) - optype1 = REGOP; - else if (CONSTANT_P (operands[1])) - optype1 = CNSTOP; - else if (offsettable_memref_p (operands[1])) - optype1 = OFFSOP; - else if (GET_CODE (operands[1]) == MEM) - optype1 = MEMOP; - else - optype1 = RNDOP; - - /* Check for the cases that the operand constraints are not - supposed to allow to happen. Abort if we get one, - because generating code for these cases is painful. */ - - if (optype0 != REGOP && optype1 != REGOP) - abort (); - - /* Handle auto decrementing and incrementing loads and stores - specifically, since the structure of the function doesn't work - for them without major modification. Do it better when we learn - this port about the general inc/dec addressing of PA. - (This was written by tege. Chide him if it doesn't work.) */ - - if (optype0 == MEMOP) - { - /* We have to output the address syntax ourselves, since print_operand - doesn't deal with the addresses we want to use. Fix this later. */ - - rtx addr = XEXP (operands[0], 0); - if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) - { - rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); - - operands[0] = XEXP (addr, 0); - if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG) - abort (); - - if (!reg_overlap_mentioned_p (high_reg, addr)) - { - /* No overlap between high target register and address - register. (We do this in a non-obvious way to - save a register file writeback) */ - if (GET_CODE (addr) == POST_INC) - return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)"; - return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)"; - } - else - abort(); - } - else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) - { - rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); - - operands[0] = XEXP (addr, 0); - if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG) - abort (); - - if (!reg_overlap_mentioned_p (high_reg, addr)) - { - /* No overlap between high target register and address - register. (We do this in a non-obvious way to - save a register file writeback) */ - if (GET_CODE (addr) == PRE_INC) - return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)"; - return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)"; - } - else - abort(); - } - } - if (optype1 == MEMOP) - { - /* We have to output the address syntax ourselves, since print_operand - doesn't deal with the addresses we want to use. Fix this later. */ - - rtx addr = XEXP (operands[1], 0); - if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) - { - rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); - - operands[1] = XEXP (addr, 0); - if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG) - abort (); - - if (!reg_overlap_mentioned_p (high_reg, addr)) - { - /* No overlap between high target register and address - register. (We do this in a non-obvious way to - save a register file writeback) */ - if (GET_CODE (addr) == POST_INC) - return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0"; - return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0"; - } - else - { - /* This is an undefined situation. We should load into the - address register *and* update that register. Probably - we don't need to handle this at all. */ - if (GET_CODE (addr) == POST_INC) - return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0"; - return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0"; - } - } - else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) - { - rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); - - operands[1] = XEXP (addr, 0); - if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG) - abort (); - - if (!reg_overlap_mentioned_p (high_reg, addr)) - { - /* No overlap between high target register and address - register. (We do this in a non-obvious way to - save a register file writeback) */ - if (GET_CODE (addr) == PRE_INC) - return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0"; - return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0"; - } - else - { - /* This is an undefined situation. We should load into the - address register *and* update that register. Probably - we don't need to handle this at all. */ - if (GET_CODE (addr) == PRE_INC) - return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0"; - return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0"; - } - } - else if (GET_CODE (addr) == PLUS - && GET_CODE (XEXP (addr, 0)) == MULT) - { - rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); - - if (!reg_overlap_mentioned_p (high_reg, addr)) - { - rtx xoperands[3]; - - xoperands[0] = high_reg; - xoperands[1] = XEXP (addr, 1); - xoperands[2] = XEXP (XEXP (addr, 0), 0); - xoperands[3] = XEXP (XEXP (addr, 0), 1); - output_asm_insn ("sh%O3addl %2,%1,%0", xoperands); - return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0"; - } - else - { - rtx xoperands[3]; - - xoperands[0] = high_reg; - xoperands[1] = XEXP (addr, 1); - xoperands[2] = XEXP (XEXP (addr, 0), 0); - xoperands[3] = XEXP (XEXP (addr, 0), 1); - output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands); - return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0"; - } - - } - } - - /* If an operand is an unoffsettable memory ref, find a register - we can increment temporarily to make it refer to the second word. */ - - if (optype0 == MEMOP) - addreg0 = find_addr_reg (XEXP (operands[0], 0)); - - if (optype1 == MEMOP) - addreg1 = find_addr_reg (XEXP (operands[1], 0)); - - /* Ok, we can do one word at a time. - Normally we do the low-numbered word first. - - In either case, set up in LATEHALF the operands to use - for the high-numbered word and in some cases alter the - operands in OPERANDS to be suitable for the low-numbered word. */ - - if (optype0 == REGOP) - latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); - else if (optype0 == OFFSOP) - latehalf[0] = adj_offsettable_operand (operands[0], 4); - else - latehalf[0] = operands[0]; - - if (optype1 == REGOP) - latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); - else if (optype1 == OFFSOP) - latehalf[1] = adj_offsettable_operand (operands[1], 4); - else if (optype1 == CNSTOP) - split_double (operands[1], &operands[1], &latehalf[1]); - else - latehalf[1] = operands[1]; - - /* If the first move would clobber the source of the second one, - do them in the other order. - - This can happen in two cases: - - mem -> register where the first half of the destination register - is the same register used in the memory's address. Reload - can create such insns. - - mem in this case will be either register indirect or register - indirect plus a valid offset. - - register -> register move where REGNO(dst) == REGNO(src + 1) - someone (Tim/Tege?) claimed this can happen for parameter loads. - - Handle mem -> register case first. */ - if (optype0 == REGOP - && (optype1 == MEMOP || optype1 == OFFSOP) - && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, - operands[1], 0)) - { - /* Do the late half first. */ - if (addreg1) - output_asm_insn ("ldo 4(%0),%0", &addreg1); - output_asm_insn (singlemove_string (latehalf), latehalf); - - /* Then clobber. */ - if (addreg1) - output_asm_insn ("ldo -4(%0),%0", &addreg1); - return singlemove_string (operands); - } - - /* Now handle register -> register case. */ - if (optype0 == REGOP && optype1 == REGOP - && REGNO (operands[0]) == REGNO (operands[1]) + 1) - { - output_asm_insn (singlemove_string (latehalf), latehalf); - return singlemove_string (operands); - } - - /* Normal case: do the two words, low-numbered first. */ - - output_asm_insn (singlemove_string (operands), operands); - - /* Make any unoffsettable addresses point at high-numbered word. */ - if (addreg0) - output_asm_insn ("ldo 4(%0),%0", &addreg0); - if (addreg1) - output_asm_insn ("ldo 4(%0),%0", &addreg1); - - /* Do that word. */ - output_asm_insn (singlemove_string (latehalf), latehalf); - - /* Undo the adds we just did. */ - if (addreg0) - output_asm_insn ("ldo -4(%0),%0", &addreg0); - if (addreg1) - output_asm_insn ("ldo -4(%0),%0", &addreg1); - - return ""; -} - -char * -output_fp_move_double (operands) - rtx *operands; -{ - if (FP_REG_P (operands[0])) - { - if (FP_REG_P (operands[1]) - || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) - output_asm_insn ("fcpy,dbl %r1,%0", operands); - else - output_asm_insn ("fldd%F1 %1,%0", operands); - } - else if (FP_REG_P (operands[1])) - { - output_asm_insn ("fstd%F0 %1,%0", operands); - } - else if (operands[1] == CONST0_RTX (GET_MODE (operands[0]))) - { - if (GET_CODE (operands[0]) == REG) - { - rtx xoperands[2]; - xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); - xoperands[0] = operands[0]; - output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); - } - /* This is a pain. You have to be prepared to deal with an - arbitrary address here including pre/post increment/decrement. - - so avoid this in the MD. */ - else - abort (); - } - else abort (); - return ""; -} - -/* Return a REG that occurs in ADDR with coefficient 1. - ADDR can be effectively incremented by incrementing REG. */ - -static rtx -find_addr_reg (addr) - rtx addr; -{ - while (GET_CODE (addr) == PLUS) - { - if (GET_CODE (XEXP (addr, 0)) == REG) - addr = XEXP (addr, 0); - else if (GET_CODE (XEXP (addr, 1)) == REG) - addr = XEXP (addr, 1); - else if (CONSTANT_P (XEXP (addr, 0))) - addr = XEXP (addr, 1); - else if (CONSTANT_P (XEXP (addr, 1))) - addr = XEXP (addr, 0); - else - abort (); - } - if (GET_CODE (addr) == REG) - return addr; - abort (); -} - -/* Emit code to perform a block move. - - OPERANDS[0] is the destination pointer as a REG, clobbered. - OPERANDS[1] is the source pointer as a REG, clobbered. - OPERANDS[2] is a register for temporary storage. - OPERANDS[4] is the size as a CONST_INT - OPERANDS[3] is a register for temporary storage. - OPERANDS[5] is the alignment safe to use, as a CONST_INT. - OPERANDS[6] is another temporary register. */ - -char * -output_block_move (operands, size_is_constant) - rtx *operands; - int size_is_constant ATTRIBUTE_UNUSED; -{ - int align = INTVAL (operands[5]); - unsigned long n_bytes = INTVAL (operands[4]); - - /* We can't move more than four bytes at a time because the PA - has no longer integer move insns. (Could use fp mem ops?) */ - if (align > 4) - align = 4; - - /* Note that we know each loop below will execute at least twice - (else we would have open-coded the copy). */ - switch (align) - { - case 4: - /* Pre-adjust the loop counter. */ - operands[4] = GEN_INT (n_bytes - 8); - output_asm_insn ("ldi %4,%2", operands); - - /* Copying loop. */ - output_asm_insn ("ldws,ma 4(0,%1),%3", operands); - output_asm_insn ("ldws,ma 4(0,%1),%6", operands); - output_asm_insn ("stws,ma %3,4(0,%0)", operands); - output_asm_insn ("addib,>= -8,%2,.-12", operands); - output_asm_insn ("stws,ma %6,4(0,%0)", operands); - - /* Handle the residual. There could be up to 7 bytes of - residual to copy! */ - if (n_bytes % 8 != 0) - { - operands[4] = GEN_INT (n_bytes % 4); - if (n_bytes % 8 >= 4) - output_asm_insn ("ldws,ma 4(0,%1),%3", operands); - if (n_bytes % 4 != 0) - output_asm_insn ("ldw 0(0,%1),%6", operands); - if (n_bytes % 8 >= 4) - output_asm_insn ("stws,ma %3,4(0,%0)", operands); - if (n_bytes % 4 != 0) - output_asm_insn ("stbys,e %6,%4(0,%0)", operands); - } - return ""; - - case 2: - /* Pre-adjust the loop counter. */ - operands[4] = GEN_INT (n_bytes - 4); - output_asm_insn ("ldi %4,%2", operands); - - /* Copying loop. */ - output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); - output_asm_insn ("ldhs,ma 2(0,%1),%6", operands); - output_asm_insn ("sths,ma %3,2(0,%0)", operands); - output_asm_insn ("addib,>= -4,%2,.-12", operands); - output_asm_insn ("sths,ma %6,2(0,%0)", operands); - - /* Handle the residual. */ - if (n_bytes % 4 != 0) - { - if (n_bytes % 4 >= 2) - output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); - if (n_bytes % 2 != 0) - output_asm_insn ("ldb 0(0,%1),%6", operands); - if (n_bytes % 4 >= 2) - output_asm_insn ("sths,ma %3,2(0,%0)", operands); - if (n_bytes % 2 != 0) - output_asm_insn ("stb %6,0(0,%0)", operands); - } - return ""; - - case 1: - /* Pre-adjust the loop counter. */ - operands[4] = GEN_INT (n_bytes - 2); - output_asm_insn ("ldi %4,%2", operands); - - /* Copying loop. */ - output_asm_insn ("ldbs,ma 1(0,%1),%3", operands); - output_asm_insn ("ldbs,ma 1(0,%1),%6", operands); - output_asm_insn ("stbs,ma %3,1(0,%0)", operands); - output_asm_insn ("addib,>= -2,%2,.-12", operands); - output_asm_insn ("stbs,ma %6,1(0,%0)", operands); - - /* Handle the residual. */ - if (n_bytes % 2 != 0) - { - output_asm_insn ("ldb 0(0,%1),%3", operands); - output_asm_insn ("stb %3,0(0,%0)", operands); - } - return ""; - - default: - abort (); - } -} - -/* Count the number of insns necessary to handle this block move. - - Basic structure is the same as emit_block_move, except that we - count insns rather than emit them. */ - -int -compute_movstrsi_length (insn) - rtx insn; -{ - rtx pat = PATTERN (insn); - int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); - unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0)); - unsigned int n_insns = 0; - - /* We can't move more than four bytes at a time because the PA - has no longer integer move insns. (Could use fp mem ops?) */ - if (align > 4) - align = 4; - - /* The basic copying loop. */ - n_insns = 6; - - /* Residuals. */ - if (n_bytes % (2 * align) != 0) - { - if ((n_bytes % (2 * align)) >= align) - n_insns += 2; - - if ((n_bytes % align) != 0) - n_insns += 2; - } - - /* Lengths are expressed in bytes now; each insn is 4 bytes. */ - return n_insns * 4; -} - - -char * -output_and (operands) - rtx *operands; -{ - if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) - { - unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); - int ls0, ls1, ms0, p, len; - - for (ls0 = 0; ls0 < 32; ls0++) - if ((mask & (1 << ls0)) == 0) - break; - - for (ls1 = ls0; ls1 < 32; ls1++) - if ((mask & (1 << ls1)) != 0) - break; - - for (ms0 = ls1; ms0 < 32; ms0++) - if ((mask & (1 << ms0)) == 0) - break; - - if (ms0 != 32) - abort(); - - if (ls1 == 32) - { - len = ls0; - - if (len == 0) - abort (); - - operands[2] = GEN_INT (len); - return "extru %1,31,%2,%0"; - } - else - { - /* We could use this `depi' for the case above as well, but `depi' - requires one more register file access than an `extru'. */ - - p = 31 - ls0; - len = ls1 - ls0; - - operands[2] = GEN_INT (p); - operands[3] = GEN_INT (len); - return "depi 0,%2,%3,%0"; - } - } - else - return "and %1,%2,%0"; -} - -char * -output_ior (operands) - rtx *operands; -{ - unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); - int bs0, bs1, p, len; - - if (INTVAL (operands[2]) == 0) - return "copy %1,%0"; - - for (bs0 = 0; bs0 < 32; bs0++) - if ((mask & (1 << bs0)) != 0) - break; - - for (bs1 = bs0; bs1 < 32; bs1++) - if ((mask & (1 << bs1)) == 0) - break; - - if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask) - abort(); - - p = 31 - bs0; - len = bs1 - bs0; - - operands[2] = GEN_INT (p); - operands[3] = GEN_INT (len); - return "depi -1,%2,%3,%0"; -} - -/* Output an ascii string. */ -void -output_ascii (file, p, size) - FILE *file; - unsigned char *p; - int size; -{ - int i; - int chars_output; - unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ - - /* The HP assembler can only take strings of 256 characters at one - time. This is a limitation on input line length, *not* the - length of the string. Sigh. Even worse, it seems that the - restriction is in number of input characters (see \xnn & - \whatever). So we have to do this very carefully. */ - - fputs ("\t.STRING \"", file); - - chars_output = 0; - for (i = 0; i < size; i += 4) - { - int co = 0; - int io = 0; - for (io = 0, co = 0; io < MIN (4, size - i); io++) - { - register unsigned int c = p[i + io]; - - if (c == '\"' || c == '\\') - partial_output[co++] = '\\'; - if (c >= ' ' && c < 0177) - partial_output[co++] = c; - else - { - unsigned int hexd; - partial_output[co++] = '\\'; - partial_output[co++] = 'x'; - hexd = c / 16 - 0 + '0'; - if (hexd > '9') - hexd -= '9' - 'a' + 1; - partial_output[co++] = hexd; - hexd = c % 16 - 0 + '0'; - if (hexd > '9') - hexd -= '9' - 'a' + 1; - partial_output[co++] = hexd; - } - } - if (chars_output + co > 243) - { - fputs ("\"\n\t.STRING \"", file); - chars_output = 0; - } - fwrite (partial_output, 1, co, file); - chars_output += co; - co = 0; - } - fputs ("\"\n", file); -} - -/* Try to rewrite floating point comparisons & branches to avoid - useless add,tr insns. - - CHECK_NOTES is nonzero if we should examine REG_DEAD notes - to see if FPCC is dead. CHECK_NOTES is nonzero for the - first attempt to remove useless add,tr insns. It is zero - for the second pass as reorg sometimes leaves bogus REG_DEAD - notes lying around. - - When CHECK_NOTES is zero we can only eliminate add,tr insns - when there's a 1:1 correspondence between fcmp and ftest/fbranch - instructions. */ -void -remove_useless_addtr_insns (insns, check_notes) - rtx insns; - int check_notes; -{ - rtx insn; - static int pass = 0; - - /* This is fairly cheap, so always run it when optimizing. */ - if (optimize > 0) - { - int fcmp_count = 0; - int fbranch_count = 0; - - /* Walk all the insns in this function looking for fcmp & fbranch - instructions. Keep track of how many of each we find. */ - insns = get_insns (); - for (insn = insns; insn; insn = next_insn (insn)) - { - rtx tmp; - - /* Ignore anything that isn't an INSN or a JUMP_INSN. */ - if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) - continue; - - tmp = PATTERN (insn); - - /* It must be a set. */ - if (GET_CODE (tmp) != SET) - continue; - - /* If the destination is CCFP, then we've found an fcmp insn. */ - tmp = SET_DEST (tmp); - if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) - { - fcmp_count++; - continue; - } - - tmp = PATTERN (insn); - /* If this is an fbranch instruction, bump the fbranch counter. */ - if (GET_CODE (tmp) == SET - && SET_DEST (tmp) == pc_rtx - && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE - && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE - && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG - && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) - { - fbranch_count++; - continue; - } - } - - - /* Find all floating point compare + branch insns. If possible, - reverse the comparison & the branch to avoid add,tr insns. */ - for (insn = insns; insn; insn = next_insn (insn)) - { - rtx tmp, next; - - /* Ignore anything that isn't an INSN. */ - if (GET_CODE (insn) != INSN) - continue; - - tmp = PATTERN (insn); - - /* It must be a set. */ - if (GET_CODE (tmp) != SET) - continue; - - /* The destination must be CCFP, which is register zero. */ - tmp = SET_DEST (tmp); - if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) - continue; - - /* INSN should be a set of CCFP. - - See if the result of this insn is used in a reversed FP - conditional branch. If so, reverse our condition and - the branch. Doing so avoids useless add,tr insns. */ - next = next_insn (insn); - while (next) - { - /* Jumps, calls and labels stop our search. */ - if (GET_CODE (next) == JUMP_INSN - || GET_CODE (next) == CALL_INSN - || GET_CODE (next) == CODE_LABEL) - break; - - /* As does another fcmp insn. */ - if (GET_CODE (next) == INSN - && GET_CODE (PATTERN (next)) == SET - && GET_CODE (SET_DEST (PATTERN (next))) == REG - && REGNO (SET_DEST (PATTERN (next))) == 0) - break; - - next = next_insn (next); - } - - /* Is NEXT_INSN a branch? */ - if (next - && GET_CODE (next) == JUMP_INSN) - { - rtx pattern = PATTERN (next); - - /* If it a reversed fp conditional branch (eg uses add,tr) - and CCFP dies, then reverse our conditional and the branch - to avoid the add,tr. */ - if (GET_CODE (pattern) == SET - && SET_DEST (pattern) == pc_rtx - && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE - && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE - && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG - && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 - && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC - && (fcmp_count == fbranch_count - || (check_notes - && find_regno_note (next, REG_DEAD, 0)))) - { - /* Reverse the branch. */ - tmp = XEXP (SET_SRC (pattern), 1); - XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); - XEXP (SET_SRC (pattern), 2) = tmp; - INSN_CODE (next) = -1; - - /* Reverse our condition. */ - tmp = PATTERN (insn); - PUT_CODE (XEXP (tmp, 1), - reverse_condition (GET_CODE (XEXP (tmp, 1)))); - } - } - } - } - - pass = !pass; - -} - -/* You may have trouble believing this, but this is the HP-PA stack - layout. Wow. - - Offset Contents - - Variable arguments (optional; any number may be allocated) - - SP-(4*(N+9)) arg word N - : : - SP-56 arg word 5 - SP-52 arg word 4 - - Fixed arguments (must be allocated; may remain unused) - - SP-48 arg word 3 - SP-44 arg word 2 - SP-40 arg word 1 - SP-36 arg word 0 - - Frame Marker - - SP-32 External Data Pointer (DP) - SP-28 External sr4 - SP-24 External/stub RP (RP') - SP-20 Current RP - SP-16 Static Link - SP-12 Clean up - SP-8 Calling Stub RP (RP'') - SP-4 Previous SP - - Top of Frame - - SP-0 Stack Pointer (points to next available address) - -*/ - -/* This function saves registers as follows. Registers marked with ' are - this function's registers (as opposed to the previous function's). - If a frame_pointer isn't needed, r4 is saved as a general register; - the space for the frame pointer is still allocated, though, to keep - things simple. - - - Top of Frame - - SP (FP') Previous FP - SP + 4 Alignment filler (sigh) - SP + 8 Space for locals reserved here. - . - . - . - SP + n All call saved register used. - . - . - . - SP + o All call saved fp registers used. - . - . - . - SP + p (SP') points to next available address. - -*/ - -/* Emit RTL to store REG at the memory location specified by BASE+DISP. - Handle case where DISP > 8k by using the add_high_const pattern. - - Note in DISP > 8k case, we will leave the high part of the address - in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ -static void -store_reg (reg, disp, base) - int reg, disp, base; -{ - if (VAL_14_BITS_P (disp)) - { - emit_move_insn (gen_rtx_MEM (SImode, - gen_rtx_PLUS (SImode, - gen_rtx_REG (SImode, base), - GEN_INT (disp))), - gen_rtx_REG (SImode, reg)); - } - else - { - emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), - gen_rtx_REG (SImode, base), - GEN_INT (disp))); - emit_move_insn (gen_rtx_MEM (SImode, - gen_rtx_LO_SUM (SImode, - gen_rtx_REG (SImode, 1), - GEN_INT (disp))), - gen_rtx_REG (SImode, reg)); - } -} - -/* Emit RTL to load REG from the memory location specified by BASE+DISP. - Handle case where DISP > 8k by using the add_high_const pattern. - - Note in DISP > 8k case, we will leave the high part of the address - in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ -static void -load_reg (reg, disp, base) - int reg, disp, base; -{ - if (VAL_14_BITS_P (disp)) - { - emit_move_insn (gen_rtx_REG (SImode, reg), - gen_rtx_MEM (SImode, - gen_rtx_PLUS (SImode, - gen_rtx_REG (SImode, base), - GEN_INT (disp)))); - } - else - { - emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), - gen_rtx_REG (SImode, base), - GEN_INT (disp))); - emit_move_insn (gen_rtx_REG (SImode, reg), - gen_rtx_MEM (SImode, - gen_rtx_LO_SUM (SImode, - gen_rtx_REG (SImode, 1), - GEN_INT (disp)))); - } -} - -/* Emit RTL to set REG to the value specified by BASE+DISP. - Handle case where DISP > 8k by using the add_high_const pattern. - - Note in DISP > 8k case, we will leave the high part of the address - in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ -static void -set_reg_plus_d(reg, base, disp) - int reg, base, disp; -{ - if (VAL_14_BITS_P (disp)) - { - emit_move_insn (gen_rtx_REG (SImode, reg), - gen_rtx_PLUS (SImode, - gen_rtx_REG (SImode, base), - GEN_INT (disp))); - } - else - { - emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), - gen_rtx_REG (SImode, base), - GEN_INT (disp))); - emit_move_insn (gen_rtx_REG (SImode, reg), - gen_rtx_LO_SUM (SImode, - gen_rtx_REG (SImode, 1), - GEN_INT (disp))); - } -} - -/* Global variables set by FUNCTION_PROLOGUE. */ -/* Size of frame. Need to know this to emit return insns from - leaf procedures. */ -static int actual_fsize; -static int local_fsize, save_fregs; - -int -compute_frame_size (size, fregs_live) - int size; - int *fregs_live; -{ - extern int current_function_outgoing_args_size; - int i, fsize; - - /* 8 is space for frame pointer + filler. If any frame is allocated - we need to add this in because of STARTING_FRAME_OFFSET. */ - fsize = size + (size || frame_pointer_needed ? 8 : 0); - - /* We must leave enough space for all the callee saved registers - from 3 .. highest used callee save register since we don't - know if we're going to have an inline or out of line prologue - and epilogue. */ - for (i = 18; i >= 3; i--) - if (regs_ever_live[i]) - { - fsize += 4 * (i - 2); - break; - } - - /* Round the stack. */ - fsize = (fsize + 7) & ~7; - - /* We must leave enough space for all the callee saved registers - from 3 .. highest used callee save register since we don't - know if we're going to have an inline or out of line prologue - and epilogue. */ - for (i = 66; i >= 48; i -= 2) - if (regs_ever_live[i] || regs_ever_live[i + 1]) - { - if (fregs_live) - *fregs_live = 1; - - fsize += 4 * (i - 46); - break; - } - - fsize += current_function_outgoing_args_size; - if (! leaf_function_p () || fsize) - fsize += 32; - return (fsize + 63) & ~63; -} - -rtx hp_profile_label_rtx; -static char hp_profile_label_name[8]; -void -output_function_prologue (file, size) - FILE *file; - int size ATTRIBUTE_UNUSED; -{ - /* The function's label and associated .PROC must never be - separated and must be output *after* any profiling declarations - to avoid changing spaces/subspaces within a procedure. */ - ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); - fputs ("\t.PROC\n", file); - - /* hppa_expand_prologue does the dirty work now. We just need - to output the assembler directives which denote the start - of a function. */ - fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize); - if (regs_ever_live[2] || profile_flag) - fputs (",CALLS,SAVE_RP", file); - else - fputs (",NO_CALLS", file); - - if (frame_pointer_needed) - fputs (",SAVE_SP", file); - - /* Pass on information about the number of callee register saves - performed in the prologue. - - The compiler is supposed to pass the highest register number - saved, the assembler then has to adjust that number before - entering it into the unwind descriptor (to account for any - caller saved registers with lower register numbers than the - first callee saved register). */ - if (gr_saved) - fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); - - if (fr_saved) - fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); - - fputs ("\n\t.ENTRY\n", file); - - /* Horrid hack. emit_function_prologue will modify this RTL in - place to get the expected results. */ - if (profile_flag) - ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP", - hp_profile_labelno); - - /* If we're using GAS and not using the portable runtime model, then - we don't need to accumulate the total number of code bytes. */ - if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME) - total_code_bytes = 0; - else if (insn_addresses) - { - unsigned int old_total = total_code_bytes; - - total_code_bytes += insn_addresses[INSN_UID (get_last_insn())]; - total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT; - - /* Be prepared to handle overflows. */ - total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes; - } - else - total_code_bytes = -1; - - remove_useless_addtr_insns (get_insns (), 0); - - /* Restore INSN_CODEs for insn which use unscaled indexed addresses. */ - restore_unscaled_index_insn_codes (get_insns ()); -} - -void -hppa_expand_prologue() -{ - extern char call_used_regs[]; - int size = get_frame_size (); - int merge_sp_adjust_with_store = 0; - int i, offset; - rtx tmpreg, size_rtx; - - gr_saved = 0; - fr_saved = 0; - save_fregs = 0; - local_fsize = size + (size || frame_pointer_needed ? 8 : 0); - actual_fsize = compute_frame_size (size, &save_fregs); - - /* Compute a few things we will use often. */ - tmpreg = gen_rtx_REG (SImode, 1); - size_rtx = GEN_INT (actual_fsize); - - /* Handle out of line prologues and epilogues. */ - if (TARGET_SPACE) - { - rtx operands[2]; - int saves = 0; - int outline_insn_count = 0; - int inline_insn_count = 0; - - /* Count the number of insns for the inline and out of line - variants so we can choose one appropriately. - - No need to screw with counting actual_fsize operations -- they're - done for both inline and out of line prologues. */ - if (regs_ever_live[2]) - inline_insn_count += 1; - - if (! cint_ok_for_move (local_fsize)) - outline_insn_count += 2; - else - outline_insn_count += 1; - - /* Put the register save info into %r22. */ - for (i = 18; i >= 3; i--) - if (regs_ever_live[i] && ! call_used_regs[i]) - { - /* -1 because the stack adjustment is normally done in - the same insn as a register save. */ - inline_insn_count += (i - 2) - 1; - saves = i; - break; - } - - for (i = 66; i >= 48; i -= 2) - if (regs_ever_live[i] || regs_ever_live[i + 1]) - { - /* +1 needed as we load %r1 with the start of the freg - save area. */ - inline_insn_count += (i/2 - 23) + 1; - saves |= ((i/2 - 12 ) << 16); - break; - } - - if (frame_pointer_needed) - inline_insn_count += 3; - - if (! cint_ok_for_move (saves)) - outline_insn_count += 2; - else - outline_insn_count += 1; - - if (TARGET_PORTABLE_RUNTIME) - outline_insn_count += 2; - else - outline_insn_count += 1; - - /* If there's a lot of insns in the prologue, then do it as - an out-of-line sequence. */ - if (inline_insn_count > outline_insn_count) - { - /* Put the local_fisze into %r19. */ - operands[0] = gen_rtx_REG (SImode, 19); - operands[1] = GEN_INT (local_fsize); - emit_move_insn (operands[0], operands[1]); - - /* Put the stack size into %r21. */ - operands[0] = gen_rtx_REG (SImode, 21); - operands[1] = size_rtx; - emit_move_insn (operands[0], operands[1]); - - operands[0] = gen_rtx_REG (SImode, 22); - operands[1] = GEN_INT (saves); - emit_move_insn (operands[0], operands[1]); - - /* Now call the out-of-line prologue. */ - emit_insn (gen_outline_prologue_call ()); - emit_insn (gen_blockage ()); - - /* Note that we're using an out-of-line prologue. */ - out_of_line_prologue_epilogue = 1; - return; - } - } - - out_of_line_prologue_epilogue = 0; - - /* Save RP first. The calling conventions manual states RP will - always be stored into the caller's frame at sp-20. */ - if (regs_ever_live[2] || profile_flag) - store_reg (2, -20, STACK_POINTER_REGNUM); - - /* Allocate the local frame and set up the frame pointer if needed. */ - if (actual_fsize) - { - if (frame_pointer_needed) - { - /* Copy the old frame pointer temporarily into %r1. Set up the - new stack pointer, then store away the saved old frame pointer - into the stack at sp+actual_fsize and at the same time update - the stack pointer by actual_fsize bytes. Two versions, first - handles small (<8k) frames. The second handles large (>8k) - frames. */ - emit_move_insn (tmpreg, frame_pointer_rtx); - emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); - if (VAL_14_BITS_P (actual_fsize)) - emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx)); - else - { - /* It is incorrect to store the saved frame pointer at *sp, - then increment sp (writes beyond the current stack boundary). - - So instead use stwm to store at *sp and post-increment the - stack pointer as an atomic operation. Then increment sp to - finish allocating the new frame. */ - emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64))); - set_reg_plus_d (STACK_POINTER_REGNUM, - STACK_POINTER_REGNUM, - actual_fsize - 64); - } - } - /* no frame pointer needed. */ - else - { - /* In some cases we can perform the first callee register save - and allocating the stack frame at the same time. If so, just - make a note of it and defer allocating the frame until saving - the callee registers. */ - if (VAL_14_BITS_P (-actual_fsize) - && local_fsize == 0 - && ! profile_flag - && ! flag_pic) - merge_sp_adjust_with_store = 1; - /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */ - else if (actual_fsize != 0) - set_reg_plus_d (STACK_POINTER_REGNUM, - STACK_POINTER_REGNUM, - actual_fsize); - } - } - - /* The hppa calling conventions say that %r19, the pic offset - register, is saved at sp - 32 (in this function's frame) when - generating PIC code. FIXME: What is the correct thing to do - for functions which make no calls and allocate no frame? Do - we need to allocate a frame, or can we just omit the save? For - now we'll just omit the save. */ - if (actual_fsize != 0 && flag_pic) - store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM); - - /* Profiling code. - - Instead of taking one argument, the counter label, as most normal - mcounts do, _mcount appears to behave differently on the HPPA. It - takes the return address of the caller, the address of this routine, - and the address of the label. Also, it isn't magic, so - argument registers have to be preserved. */ - if (profile_flag) - { - int pc_offset, i, arg_offset, basereg, offsetadj; - - pc_offset = 4 + (frame_pointer_needed - ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20) - : (VAL_14_BITS_P (actual_fsize) ? 4 : 8)); - - /* When the function has a frame pointer, use it as the base - register for saving/restore registers. Else use the stack - pointer. Adjust the offset according to the frame size if - this function does not have a frame pointer. */ - - basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM - : STACK_POINTER_REGNUM; - offsetadj = frame_pointer_needed ? 0 : actual_fsize; - - /* Horrid hack. emit_function_prologue will modify this RTL in - place to get the expected results. sprintf here is just to - put something in the name. */ - sprintf(hp_profile_label_name, "LP$%04d", -1); - hp_profile_label_rtx = gen_rtx_SYMBOL_REF (SImode, hp_profile_label_name); - if (current_function_returns_struct) - store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg); - - for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4) - if (regs_ever_live [i]) - { - store_reg (i, arg_offset, basereg); - /* Deal with arg_offset not fitting in 14 bits. */ - pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8; - } - - emit_move_insn (gen_rtx_REG (SImode, 26), gen_rtx_REG (SImode, 2)); - emit_move_insn (tmpreg, gen_rtx_HIGH (SImode, hp_profile_label_rtx)); - emit_move_insn (gen_rtx_REG (SImode, 24), - gen_rtx_LO_SUM (SImode, tmpreg, hp_profile_label_rtx)); - /* %r25 is set from within the output pattern. */ - emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20))); - - /* Restore argument registers. */ - for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4) - if (regs_ever_live [i]) - load_reg (i, arg_offset, basereg); - - if (current_function_returns_struct) - load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg); - - } - - /* Normal register save. - - Do not save the frame pointer in the frame_pointer_needed case. It - was done earlier. */ - if (frame_pointer_needed) - { - for (i = 18, offset = local_fsize; i >= 4; i--) - if (regs_ever_live[i] && ! call_used_regs[i]) - { - store_reg (i, offset, FRAME_POINTER_REGNUM); - offset += 4; - gr_saved++; - } - /* Account for %r3 which is saved in a special place. */ - gr_saved++; - } - /* No frame pointer needed. */ - else - { - for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--) - if (regs_ever_live[i] && ! call_used_regs[i]) - { - /* If merge_sp_adjust_with_store is nonzero, then we can - optimize the first GR save. */ - if (merge_sp_adjust_with_store) - { - merge_sp_adjust_with_store = 0; - emit_insn (gen_post_stwm (stack_pointer_rtx, - gen_rtx_REG (SImode, i), - GEN_INT (-offset))); - } - else - store_reg (i, offset, STACK_POINTER_REGNUM); - offset += 4; - gr_saved++; - } - - /* If we wanted to merge the SP adjustment with a GR save, but we never - did any GR saves, then just emit the adjustment here. */ - if (merge_sp_adjust_with_store) - set_reg_plus_d (STACK_POINTER_REGNUM, - STACK_POINTER_REGNUM, - actual_fsize); - } - - /* Align pointer properly (doubleword boundary). */ - offset = (offset + 7) & ~7; - - /* Floating point register store. */ - if (save_fregs) - { - /* First get the frame or stack pointer to the start of the FP register - save area. */ - if (frame_pointer_needed) - set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset); - else - set_reg_plus_d (1, STACK_POINTER_REGNUM, offset); - - /* Now actually save the FP registers. */ - for (i = 66; i >= 48; i -= 2) - { - if (regs_ever_live[i] || regs_ever_live[i + 1]) - { - emit_move_insn (gen_rtx_MEM (DFmode, - gen_rtx_POST_INC (DFmode, tmpreg)), - gen_rtx_REG (DFmode, i)); - fr_saved++; - } - } - } - - /* When generating PIC code it is necessary to save/restore the - PIC register around each function call. We used to do this - in the call patterns themselves, but that implementation - made incorrect assumptions about using global variables to hold - per-function rtl code generated in the backend. - - So instead, we copy the PIC register into a reserved callee saved - register in the prologue. Then after each call we reload the PIC - register from the callee saved register. We also reload the PIC - register from the callee saved register in the epilogue ensure the - PIC register is valid at function exit. - - This may (depending on the exact characteristics of the function) - even be more efficient. - - Avoid this if the callee saved register wasn't used (these are - leaf functions). */ - if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED]) - emit_move_insn (gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM_SAVED), - gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM)); -} - - -void -output_function_epilogue (file, size) - FILE *file; - int size ATTRIBUTE_UNUSED; -{ - rtx insn = get_last_insn (); - - /* hppa_expand_epilogue does the dirty work now. We just need - to output the assembler directives which denote the end - of a function. - - To make debuggers happy, emit a nop if the epilogue was completely - eliminated due to a volatile call as the last insn in the - current function. That way the return address (in %r2) will - always point to a valid instruction in the current function. */ - - /* Get the last real insn. */ - if (GET_CODE (insn) == NOTE) - insn = prev_real_insn (insn); - - /* If it is a sequence, then look inside. */ - if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) - insn = XVECEXP (PATTERN (insn), 0, 0); - - /* If insn is a CALL_INSN, then it must be a call to a volatile - function (otherwise there would be epilogue insns). */ - if (insn && GET_CODE (insn) == CALL_INSN) - fputs ("\tnop\n", file); - - fputs ("\t.EXIT\n\t.PROCEND\n", file); - - /* Free up stuff we don't need anymore. */ - if (unscaled_index_insn_codes) - free (unscaled_index_insn_codes); - max_unscaled_index_insn_codes_uid = 0; -} - -void -hppa_expand_epilogue () -{ - rtx tmpreg; - int offset,i; - int merge_sp_adjust_with_load = 0; - - /* Handle out of line prologues and epilogues. */ - if (TARGET_SPACE && out_of_line_prologue_epilogue) - { - int saves = 0; - rtx operands[2]; - - /* Put the register save info into %r22. */ - for (i = 18; i >= 3; i--) - if (regs_ever_live[i] && ! call_used_regs[i]) - { - saves = i; - break; - } - - for (i = 66; i >= 48; i -= 2) - if (regs_ever_live[i] || regs_ever_live[i + 1]) - { - saves |= ((i/2 - 12 ) << 16); - break; - } - - emit_insn (gen_blockage ()); - - /* Put the local_fisze into %r19. */ - operands[0] = gen_rtx_REG (SImode, 19); - operands[1] = GEN_INT (local_fsize); - emit_move_insn (operands[0], operands[1]); - - /* Put the stack size into %r21. */ - operands[0] = gen_rtx_REG (SImode, 21); - operands[1] = GEN_INT (actual_fsize); - emit_move_insn (operands[0], operands[1]); - - operands[0] = gen_rtx_REG (SImode, 22); - operands[1] = GEN_INT (saves); - emit_move_insn (operands[0], operands[1]); - - /* Now call the out-of-line epilogue. */ - emit_insn (gen_outline_epilogue_call ()); - return; - } - - /* We will use this often. */ - tmpreg = gen_rtx_REG (SImode, 1); - - /* Try to restore RP early to avoid load/use interlocks when - RP gets used in the return (bv) instruction. This appears to still - be necessary even when we schedule the prologue and epilogue. */ - if (frame_pointer_needed - && (regs_ever_live [2] || profile_flag)) - load_reg (2, -20, FRAME_POINTER_REGNUM); - - /* No frame pointer, and stack is smaller than 8k. */ - else if (! frame_pointer_needed - && VAL_14_BITS_P (actual_fsize + 20) - && (regs_ever_live[2] || profile_flag)) - load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM); - - /* General register restores. */ - if (frame_pointer_needed) - { - for (i = 18, offset = local_fsize; i >= 4; i--) - if (regs_ever_live[i] && ! call_used_regs[i]) - { - load_reg (i, offset, FRAME_POINTER_REGNUM); - offset += 4; - } - } - else - { - for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--) - { - if (regs_ever_live[i] && ! call_used_regs[i]) - { - /* Only for the first load. - merge_sp_adjust_with_load holds the register load - with which we will merge the sp adjustment. */ - if (VAL_14_BITS_P (actual_fsize + 20) - && local_fsize == 0 - && ! merge_sp_adjust_with_load) - merge_sp_adjust_with_load = i; - else - load_reg (i, offset, STACK_POINTER_REGNUM); - offset += 4; - } - } - } - - /* Align pointer properly (doubleword boundary). */ - offset = (offset + 7) & ~7; - - /* FP register restores. */ - if (save_fregs) - { - /* Adjust the register to index off of. */ - if (frame_pointer_needed) - set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset); - else - set_reg_plus_d (1, STACK_POINTER_REGNUM, offset); - - /* Actually do the restores now. */ - for (i = 66; i >= 48; i -= 2) - { - if (regs_ever_live[i] || regs_ever_live[i + 1]) - { - emit_move_insn (gen_rtx_REG (DFmode, i), - gen_rtx_MEM (DFmode, - gen_rtx_POST_INC (DFmode, tmpreg))); - } - } - } - - /* Emit a blockage insn here to keep these insns from being moved to - an earlier spot in the epilogue, or into the main instruction stream. - - This is necessary as we must not cut the stack back before all the - restores are finished. */ - emit_insn (gen_blockage ()); - /* No frame pointer, but we have a stack greater than 8k. We restore - %r2 very late in this case. (All other cases are restored as early - as possible.) */ - if (! frame_pointer_needed - && ! VAL_14_BITS_P (actual_fsize + 20) - && (regs_ever_live[2] || profile_flag)) - { - set_reg_plus_d (STACK_POINTER_REGNUM, - STACK_POINTER_REGNUM, - - actual_fsize); - - /* This used to try and be clever by not depending on the value in - %r30 and instead use the value held in %r1 (so that the 2nd insn - which sets %r30 could be put in the delay slot of the return insn). - - That won't work since if the stack is exactly 8k set_reg_plus_d - doesn't set %r1, just %r30. */ - load_reg (2, - 20, STACK_POINTER_REGNUM); - } - - /* Reset stack pointer (and possibly frame pointer). The stack - pointer is initially set to fp + 64 to avoid a race condition. */ - else if (frame_pointer_needed) - { - set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64); - emit_insn (gen_pre_ldwm (frame_pointer_rtx, - stack_pointer_rtx, - GEN_INT (-64))); - } - /* If we were deferring a callee register restore, do it now. */ - else if (! frame_pointer_needed && merge_sp_adjust_with_load) - emit_insn (gen_pre_ldwm (gen_rtx_REG (SImode, merge_sp_adjust_with_load), - stack_pointer_rtx, - GEN_INT (- actual_fsize))); - else if (actual_fsize != 0) - set_reg_plus_d (STACK_POINTER_REGNUM, - STACK_POINTER_REGNUM, - - actual_fsize); -} - -/* Fetch the return address for the frame COUNT steps up from - the current frame, after the prologue. FRAMEADDR is the - frame pointer of the COUNT frame. - - We want to ignore any export stub remnants here. - - The value returned is used in two different ways: - - 1. To find a function's caller. - - 2. To change the return address for a function. - - This function handles most instances of case 1; however, it will - fail if there are two levels of stubs to execute on the return - path. The only way I believe that can happen is if the return value - needs a parameter relocation, which never happens for C code. - - This function handles most instances of case 2; however, it will - fail if we did not originally have stub code on the return path - but will need code on the new return path. This can happen if - the caller & callee are both in the main program, but the new - return location is in a shared library. - - To handle this correctly we need to set the return pointer at - frame-20 to point to a return stub frame-24 to point to the - location we wish to return to. */ - -rtx -return_addr_rtx (count, frameaddr) - int count ATTRIBUTE_UNUSED; - rtx frameaddr; -{ - rtx label; - rtx saved_rp; - rtx ins; - - saved_rp = gen_reg_rtx (Pmode); - - /* First, we start off with the normal return address pointer from - -20[frameaddr]. */ - - emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD)); - - /* Get pointer to the instruction stream. We have to mask out the - privilege level from the two low order bits of the return address - pointer here so that ins will point to the start of the first - instruction that would have been executed if we returned. */ - ins = copy_to_reg (gen_rtx_AND (Pmode, - copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)), - MASK_RETURN_ADDR)); - label = gen_label_rtx (); - - /* Check the instruction stream at the normal return address for the - export stub: - - 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp - 0x004010a1 | stub+12: ldsid (sr0,rp),r1 - 0x00011820 | stub+16: mtsp r1,sr0 - 0xe0400002 | stub+20: be,n 0(sr0,rp) - - If it is an export stub, than our return address is really in - -24[frameaddr]. */ - - emit_cmp_insn (gen_rtx_MEM (SImode, ins), - GEN_INT (0x4bc23fd1), - NE, NULL_RTX, SImode, 1, 0); - emit_jump_insn (gen_bne (label)); - - emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)), - GEN_INT (0x004010a1), - NE, NULL_RTX, SImode, 1, 0); - emit_jump_insn (gen_bne (label)); - - emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)), - GEN_INT (0x00011820), - NE, NULL_RTX, SImode, 1, 0); - emit_jump_insn (gen_bne (label)); - - emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)), - GEN_INT (0xe0400002), - NE, NULL_RTX, SImode, 1, 0); - - /* If there is no export stub then just use our initial guess of - -20[frameaddr]. */ - - emit_jump_insn (gen_bne (label)); - - /* Here we know that our return address pointer points to an export - stub. We don't want to return the address of the export stub, - but rather the return address that leads back into user code. - That return address is stored at -24[frameaddr]. */ - - emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD)); - - emit_label (label); - return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp)); -} - -/* This is only valid once reload has completed because it depends on - knowing exactly how much (if any) frame there is and... - - It's only valid if there is no frame marker to de-allocate and... - - It's only valid if %r2 hasn't been saved into the caller's frame - (we're not profiling and %r2 isn't live anywhere). */ -int -hppa_can_use_return_insn_p () -{ - return (reload_completed - && (compute_frame_size (get_frame_size (), 0) ? 0 : 1) - && ! profile_flag - && ! regs_ever_live[2] - && ! frame_pointer_needed); -} - -void -emit_bcond_fp (code, operand0) - enum rtx_code code; - rtx operand0; -{ - emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, - gen_rtx_fmt_ee (code, - VOIDmode, - gen_rtx_REG (CCFPmode, 0), - const0_rtx), - gen_rtx_LABEL_REF (VOIDmode, operand0), - pc_rtx))); - -} - -rtx -gen_cmp_fp (code, operand0, operand1) - enum rtx_code code; - rtx operand0, operand1; -{ - return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), - gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)); -} - -/* Adjust the cost of a scheduling dependency. Return the new cost of - a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ - -int -pa_adjust_cost (insn, link, dep_insn, cost) - rtx insn; - rtx link; - rtx dep_insn; - int cost; -{ - enum attr_type attr_type; - - if (! recog_memoized (insn)) - return 0; - - /* CYGNUS LOCAL PA8000/law */ - /* No cost adjustments are needed for the PA8000 */ - if (pa_cpu == PROCESSOR_8000) - return 0; - /* END CYGNUS LOCAL */ - - attr_type = get_attr_type (insn); - - if (REG_NOTE_KIND (link) == 0) - { - /* Data dependency; DEP_INSN writes a register that INSN reads some - cycles later. */ - - if (attr_type == TYPE_FPSTORE) - { - rtx pat = PATTERN (insn); - rtx dep_pat = PATTERN (dep_insn); - if (GET_CODE (pat) == PARALLEL) - { - /* This happens for the fstXs,mb patterns. */ - pat = XVECEXP (pat, 0, 0); - } - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - /* If this happens, we have to extend this to schedule - optimally. Return 0 for now. */ - return 0; - - if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) - { - if (! recog_memoized (dep_insn)) - return 0; - /* DEP_INSN is writing its result to the register - being stored in the fpstore INSN. */ - switch (get_attr_type (dep_insn)) - { - case TYPE_FPLOAD: - /* This cost 3 cycles, not 2 as the md says for the - 700 and 7100. */ - return cost + 1; - - case TYPE_FPALU: - case TYPE_FPMULSGL: - case TYPE_FPMULDBL: - case TYPE_FPDIVSGL: - case TYPE_FPDIVDBL: - case TYPE_FPSQRTSGL: - case TYPE_FPSQRTDBL: - /* In these important cases, we save one cycle compared to - when flop instruction feed each other. */ - return cost - 1; - - default: - return cost; - } - } - } - - /* For other data dependencies, the default cost specified in the - md is correct. */ - return cost; - } - else if (REG_NOTE_KIND (link) == REG_DEP_ANTI) - { - /* Anti dependency; DEP_INSN reads a register that INSN writes some - cycles later. */ - - if (attr_type == TYPE_FPLOAD) - { - rtx pat = PATTERN (insn); - rtx dep_pat = PATTERN (dep_insn); - if (GET_CODE (pat) == PARALLEL) - { - /* This happens for the fldXs,mb patterns. */ - pat = XVECEXP (pat, 0, 0); - } - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - /* If this happens, we have to extend this to schedule - optimally. Return 0 for now. */ - return 0; - - if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) - { - if (! recog_memoized (dep_insn)) - return 0; - switch (get_attr_type (dep_insn)) - { - case TYPE_FPALU: - case TYPE_FPMULSGL: - case TYPE_FPMULDBL: - case TYPE_FPDIVSGL: - case TYPE_FPDIVDBL: - case TYPE_FPSQRTSGL: - case TYPE_FPSQRTDBL: - /* A fpload can't be issued until one cycle before a - preceding arithmetic operation has finished if - the target of the fpload is any of the sources - (or destination) of the arithmetic operation. */ - return cost - 1; - - default: - return 0; - } - } - } - else if (attr_type == TYPE_FPALU) - { - rtx pat = PATTERN (insn); - rtx dep_pat = PATTERN (dep_insn); - if (GET_CODE (pat) == PARALLEL) - { - /* This happens for the fldXs,mb patterns. */ - pat = XVECEXP (pat, 0, 0); - } - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - /* If this happens, we have to extend this to schedule - optimally. Return 0 for now. */ - return 0; - - if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) - { - if (! recog_memoized (dep_insn)) - return 0; - switch (get_attr_type (dep_insn)) - { - case TYPE_FPDIVSGL: - case TYPE_FPDIVDBL: - case TYPE_FPSQRTSGL: - case TYPE_FPSQRTDBL: - /* An ALU flop can't be issued until two cycles before a - preceding divide or sqrt operation has finished if - the target of the ALU flop is any of the sources - (or destination) of the divide or sqrt operation. */ - return cost - 2; - - default: - return 0; - } - } - } - - /* For other anti dependencies, the cost is 0. */ - return 0; - } - else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) - { - /* Output dependency; DEP_INSN writes a register that INSN writes some - cycles later. */ - if (attr_type == TYPE_FPLOAD) - { - rtx pat = PATTERN (insn); - rtx dep_pat = PATTERN (dep_insn); - if (GET_CODE (pat) == PARALLEL) - { - /* This happens for the fldXs,mb patterns. */ - pat = XVECEXP (pat, 0, 0); - } - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - /* If this happens, we have to extend this to schedule - optimally. Return 0 for now. */ - return 0; - - if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) - { - if (! recog_memoized (dep_insn)) - return 0; - switch (get_attr_type (dep_insn)) - { - case TYPE_FPALU: - case TYPE_FPMULSGL: - case TYPE_FPMULDBL: - case TYPE_FPDIVSGL: - case TYPE_FPDIVDBL: - case TYPE_FPSQRTSGL: - case TYPE_FPSQRTDBL: - /* A fpload can't be issued until one cycle before a - preceding arithmetic operation has finished if - the target of the fpload is the destination of the - arithmetic operation. */ - return cost - 1; - - default: - return 0; - } - } - } - else if (attr_type == TYPE_FPALU) - { - rtx pat = PATTERN (insn); - rtx dep_pat = PATTERN (dep_insn); - if (GET_CODE (pat) == PARALLEL) - { - /* This happens for the fldXs,mb patterns. */ - pat = XVECEXP (pat, 0, 0); - } - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - /* If this happens, we have to extend this to schedule - optimally. Return 0 for now. */ - return 0; - - if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) - { - if (! recog_memoized (dep_insn)) - return 0; - switch (get_attr_type (dep_insn)) - { - case TYPE_FPDIVSGL: - case TYPE_FPDIVDBL: - case TYPE_FPSQRTSGL: - case TYPE_FPSQRTDBL: - /* An ALU flop can't be issued until two cycles before a - preceding divide or sqrt operation has finished if - the target of the ALU flop is also the target of - the divide or sqrt operation. */ - return cost - 2; - - default: - return 0; - } - } - } - - /* For other output dependencies, the cost is 0. */ - return 0; - } - else - abort (); -} - -/* Return any length adjustment needed by INSN which already has its length - computed as LENGTH. Return zero if no adjustment is necessary. - - For the PA: function calls, millicode calls, and backwards short - conditional branches with unfilled delay slots need an adjustment by +1 - (to account for the NOP which will be inserted into the instruction stream). - - Also compute the length of an inline block move here as it is too - complicated to express as a length attribute in pa.md. */ -int -pa_adjust_insn_length (insn, length) - rtx insn; - int length; -{ - rtx pat = PATTERN (insn); - - /* Call insns which are *not* indirect and have unfilled delay slots. */ - if (GET_CODE (insn) == CALL_INSN) - { - - if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL - && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF) - return 4; - else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET - && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0)) - == SYMBOL_REF) - return 4; - else - return 0; - } - /* Jumps inside switch tables which have unfilled delay slots - also need adjustment. */ - else if (GET_CODE (insn) == JUMP_INSN - && simplejump_p (insn) - && GET_MODE (insn) == SImode) - return 4; - /* Millicode insn with an unfilled delay slot. */ - else if (GET_CODE (insn) == INSN - && GET_CODE (pat) != SEQUENCE - && GET_CODE (pat) != USE - && GET_CODE (pat) != CLOBBER - && get_attr_type (insn) == TYPE_MILLI) - return 4; - /* Block move pattern. */ - else if (GET_CODE (insn) == INSN - && GET_CODE (pat) == PARALLEL - && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM - && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM - && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode - && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) - return compute_movstrsi_length (insn) - 4; - /* Conditional branch with an unfilled delay slot. */ - else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) - { - /* Adjust a short backwards conditional with an unfilled delay slot. */ - if (GET_CODE (pat) == SET - && length == 4 - && ! forward_branch_p (insn)) - return 4; - else if (GET_CODE (pat) == PARALLEL - && get_attr_type (insn) == TYPE_PARALLEL_BRANCH - && length == 4) - return 4; - /* Adjust dbra insn with short backwards conditional branch with - unfilled delay slot -- only for case where counter is in a - general register register. */ - else if (GET_CODE (pat) == PARALLEL - && GET_CODE (XVECEXP (pat, 0, 1)) == SET - && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG - && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) - && length == 4 - && ! forward_branch_p (insn)) - return 4; - else - return 0; - } - return 0; -} - -/* Print operand X (an rtx) in assembler syntax to file FILE. - CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. - For `%' followed by punctuation, CODE is the punctuation and X is null. */ - -void -print_operand (file, x, code) - FILE *file; - rtx x; - int code; -{ - switch (code) - { - case '#': - /* Output a 'nop' if there's nothing for the delay slot. */ - if (dbr_sequence_length () == 0) - fputs ("\n\tnop", file); - return; - case '*': - /* Output an nullification completer if there's nothing for the */ - /* delay slot or nullification is requested. */ - if (dbr_sequence_length () == 0 || - (final_sequence && - INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) - fputs (",n", file); - return; - case 'R': - /* Print out the second register name of a register pair. - I.e., R (6) => 7. */ - fputs (reg_names[REGNO (x)+1], file); - return; - case 'r': - /* A register or zero. */ - if (x == const0_rtx - || (x == CONST0_RTX (DFmode)) - || (x == CONST0_RTX (SFmode))) - { - fputs ("0", file); - return; - } - else - break; - case 'C': /* Plain (C)ondition */ - case 'X': - switch (GET_CODE (x)) - { - case EQ: - fputs ("=", file); break; - case NE: - fputs ("<>", file); break; - case GT: - fputs (">", file); break; - case GE: - fputs (">=", file); break; - case GEU: - fputs (">>=", file); break; - case GTU: - fputs (">>", file); break; - case LT: - fputs ("<", file); break; - case LE: - fputs ("<=", file); break; - case LEU: - fputs ("<<=", file); break; - case LTU: - fputs ("<<", file); break; - default: - abort (); - } - return; - case 'N': /* Condition, (N)egated */ - switch (GET_CODE (x)) - { - case EQ: - fputs ("<>", file); break; - case NE: - fputs ("=", file); break; - case GT: - fputs ("<=", file); break; - case GE: - fputs ("<", file); break; - case GEU: - fputs ("<<", file); break; - case GTU: - fputs ("<<=", file); break; - case LT: - fputs (">=", file); break; - case LE: - fputs (">", file); break; - case LEU: - fputs (">>", file); break; - case LTU: - fputs (">>=", file); break; - default: - abort (); - } - return; - /* For floating point comparisons. Need special conditions to deal - with NaNs properly. */ - case 'Y': - switch (GET_CODE (x)) - { - case EQ: - fputs ("!=", file); break; - case NE: - fputs ("=", file); break; - case GT: - fputs ("<=", file); break; - case GE: - fputs ("<", file); break; - case LT: - fputs (">=", file); break; - case LE: - fputs (">", file); break; - default: - abort (); - } - return; - case 'S': /* Condition, operands are (S)wapped. */ - switch (GET_CODE (x)) - { - case EQ: - fputs ("=", file); break; - case NE: - fputs ("<>", file); break; - case GT: - fputs ("<", file); break; - case GE: - fputs ("<=", file); break; - case GEU: - fputs ("<<=", file); break; - case GTU: - fputs ("<<", file); break; - case LT: - fputs (">", file); break; - case LE: - fputs (">=", file); break; - case LEU: - fputs (">>=", file); break; - case LTU: - fputs (">>", file); break; - default: - abort (); - } - return; - case 'B': /* Condition, (B)oth swapped and negate. */ - switch (GET_CODE (x)) - { - case EQ: - fputs ("<>", file); break; - case NE: - fputs ("=", file); break; - case GT: - fputs (">=", file); break; - case GE: - fputs (">", file); break; - case GEU: - fputs (">>", file); break; - case GTU: - fputs (">>=", file); break; - case LT: - fputs ("<=", file); break; - case LE: - fputs ("<", file); break; - case LEU: - fputs ("<<", file); break; - case LTU: - fputs ("<<=", file); break; - default: - abort (); - } - return; - case 'k': - if (GET_CODE (x) == CONST_INT) - { - fprintf (file, "%d", ~INTVAL (x)); - return; - } - abort(); - case 'L': - if (GET_CODE (x) == CONST_INT) - { - fprintf (file, "%d", 32 - (INTVAL (x) & 31)); - return; - } - abort(); - case 'O': - if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0) - { - fprintf (file, "%d", exact_log2 (INTVAL (x))); - return; - } - abort(); - case 'P': - if (GET_CODE (x) == CONST_INT) - { - fprintf (file, "%d", 31 - (INTVAL (x) & 31)); - return; - } - abort(); - case 'I': - if (GET_CODE (x) == CONST_INT) - fputs ("i", file); - return; - case 'M': - case 'F': - switch (GET_CODE (XEXP (x, 0))) - { - case PRE_DEC: - case PRE_INC: - fputs ("s,mb", file); - break; - case POST_DEC: - case POST_INC: - fputs ("s,ma", file); - break; - case PLUS: - if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT - || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) - fputs ("x,s", file); - else if (code == 'F') - fputs ("s", file); - break; - default: - if (code == 'F') - fputs ("s", file); - break; - } - return; - case 'G': - output_global_address (file, x, 0); - return; - case 'H': - output_global_address (file, x, 1); - return; - case 0: /* Don't do anything special */ - break; - case 'Z': - { - unsigned op[3]; - compute_zdepi_operands (INTVAL (x), op); - fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); - return; - } - default: - abort (); - } - if (GET_CODE (x) == REG) - { - fputs (reg_names [REGNO (x)], file); - if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0) - fputs ("L", file); - } - else if (GET_CODE (x) == MEM) - { - int size = GET_MODE_SIZE (GET_MODE (x)); - rtx base = XEXP (XEXP (x, 0), 0); - switch (GET_CODE (XEXP (x, 0))) - { - case PRE_DEC: - case POST_DEC: - fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]); - break; - case PRE_INC: - case POST_INC: - fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]); - break; - default: - if (GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) - fprintf (file, "%s(0,%s)", - reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], - reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); - else if (GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) - fprintf (file, "%s(0,%s)", - reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], - reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); - else - output_address (XEXP (x, 0)); - break; - } - } - else - output_addr_const (file, x); -} - -/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ - -void -output_global_address (file, x, round_constant) - FILE *file; - rtx x; - int round_constant; -{ - - /* Imagine (high (const (plus ...))). */ - if (GET_CODE (x) == HIGH) - x = XEXP (x, 0); - - if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x)) - assemble_name (file, XSTR (x, 0)); - else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) - { - assemble_name (file, XSTR (x, 0)); - fputs ("-$global$", file); - } - else if (GET_CODE (x) == CONST) - { - char *sep = ""; - int offset = 0; /* assembler wants -$global$ at end */ - rtx base = NULL_RTX; - - if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) - { - base = XEXP (XEXP (x, 0), 0); - output_addr_const (file, base); - } - else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT) - offset = INTVAL (XEXP (XEXP (x, 0), 0)); - else abort (); - - if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF) - { - base = XEXP (XEXP (x, 0), 1); - output_addr_const (file, base); - } - else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) - offset = INTVAL (XEXP (XEXP (x, 0),1)); - else abort (); - - /* How bogus. The compiler is apparently responsible for - rounding the constant if it uses an LR field selector. - - The linker and/or assembler seem a better place since - they have to do this kind of thing already. - - If we fail to do this, HP's optimizing linker may eliminate - an addil, but not update the ldw/stw/ldo instruction that - uses the result of the addil. */ - if (round_constant) - offset = ((offset + 0x1000) & ~0x1fff); - - if (GET_CODE (XEXP (x, 0)) == PLUS) - { - if (offset < 0) - { - offset = -offset; - sep = "-"; - } - else - sep = "+"; - } - else if (GET_CODE (XEXP (x, 0)) == MINUS - && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) - sep = "-"; - else abort (); - - if (!read_only_operand (base) && !flag_pic) - fputs ("-$global$", file); - if (offset) - fprintf (file,"%s%d", sep, offset); - } - else - output_addr_const (file, x); -} - -void -output_deferred_plabels (file) - FILE *file; -{ - int i; - /* If we have deferred plabels, then we need to switch into the data - section and align it to a 4 byte boundary before we output the - deferred plabels. */ - if (n_deferred_plabels) - { - data_section (); - ASM_OUTPUT_ALIGN (file, 2); - } - - /* Now output the deferred plabels. */ - for (i = 0; i < n_deferred_plabels; i++) - { - ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); - assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode, - deferred_plabels[i].name), 4, 1); - } -} - -/* HP's millicode routines mean something special to the assembler. - Keep track of which ones we have used. */ - -enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 }; -static char imported[(int)end1000]; -static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"}; -static char import_string[] = ".IMPORT $$....,MILLICODE"; -#define MILLI_START 10 - -static void -import_milli (code) - enum millicodes code; -{ - char str[sizeof (import_string)]; - - if (!imported[(int)code]) - { - imported[(int)code] = 1; - strcpy (str, import_string); - strncpy (str + MILLI_START, milli_names[(int)code], 4); - output_asm_insn (str, 0); - } -} - -/* The register constraints have put the operands and return value in - the proper registers. */ - -char * -output_mul_insn (unsignedp, insn) - int unsignedp ATTRIBUTE_UNUSED; - rtx insn; -{ - import_milli (mulI); - return output_millicode_call (insn, gen_rtx_SYMBOL_REF (SImode, "$$mulI")); -} - -/* Emit the rtl for doing a division by a constant. */ - -/* Do magic division millicodes exist for this value? */ -static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, - 1, 1}; - -/* We'll use an array to keep track of the magic millicodes and - whether or not we've used them already. [n][0] is signed, [n][1] is - unsigned. */ - -static int div_milli[16][2]; - -int -div_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - return (mode == SImode - && ((GET_CODE (op) == REG && REGNO (op) == 25) - || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0 - && INTVAL (op) < 16 && magic_milli[INTVAL (op)]))); -} - -int -emit_hpdiv_const (operands, unsignedp) - rtx *operands; - int unsignedp; -{ - if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) > 0 - && INTVAL (operands[2]) < 16 - && magic_milli[INTVAL (operands[2])]) - { - emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); - emit - (gen_rtx - (PARALLEL, VOIDmode, - gen_rtvec (5, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), - gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, - SImode, - gen_rtx_REG (SImode, 26), - operands[2])), - gen_rtx_CLOBBER (VOIDmode, operands[3]), - gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), - gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), - gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31))))); - emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); - return 1; - } - return 0; -} - -char * -output_div_insn (operands, unsignedp, insn) - rtx *operands; - int unsignedp; - rtx insn; -{ - int divisor; - - /* If the divisor is a constant, try to use one of the special - opcodes .*/ - if (GET_CODE (operands[0]) == CONST_INT) - { - static char buf[100]; - divisor = INTVAL (operands[0]); - if (!div_milli[divisor][unsignedp]) - { - div_milli[divisor][unsignedp] = 1; - if (unsignedp) - output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); - else - output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); - } - if (unsignedp) - { - sprintf (buf, "$$divU_%d", INTVAL (operands[0])); - return output_millicode_call (insn, - gen_rtx_SYMBOL_REF (SImode, buf)); - } - else - { - sprintf (buf, "$$divI_%d", INTVAL (operands[0])); - return output_millicode_call (insn, - gen_rtx_SYMBOL_REF (SImode, buf)); - } - } - /* Divisor isn't a special constant. */ - else - { - if (unsignedp) - { - import_milli (divU); - return output_millicode_call (insn, - gen_rtx_SYMBOL_REF (SImode, "$$divU")); - } - else - { - import_milli (divI); - return output_millicode_call (insn, - gen_rtx_SYMBOL_REF (SImode, "$$divI")); - } - } -} - -/* Output a $$rem millicode to do mod. */ - -char * -output_mod_insn (unsignedp, insn) - int unsignedp; - rtx insn; -{ - if (unsignedp) - { - import_milli (remU); - return output_millicode_call (insn, - gen_rtx_SYMBOL_REF (SImode, "$$remU")); - } - else - { - import_milli (remI); - return output_millicode_call (insn, - gen_rtx_SYMBOL_REF (SImode, "$$remI")); - } -} - -void -output_arg_descriptor (call_insn) - rtx call_insn; -{ - char *arg_regs[4]; - enum machine_mode arg_mode; - rtx link; - int i, output_flag = 0; - int regno; - - for (i = 0; i < 4; i++) - arg_regs[i] = 0; - - /* Specify explicitly that no argument relocations should take place - if using the portable runtime calling conventions. */ - if (TARGET_PORTABLE_RUNTIME) - { - fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", - asm_out_file); - return; - } - - if (GET_CODE (call_insn) != CALL_INSN) - abort (); - for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1)) - { - rtx use = XEXP (link, 0); - - if (! (GET_CODE (use) == USE - && GET_CODE (XEXP (use, 0)) == REG - && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) - continue; - - arg_mode = GET_MODE (XEXP (use, 0)); - regno = REGNO (XEXP (use, 0)); - if (regno >= 23 && regno <= 26) - { - arg_regs[26 - regno] = "GR"; - if (arg_mode == DImode) - arg_regs[25 - regno] = "GR"; - } - else if (regno >= 32 && regno <= 39) - { - if (arg_mode == SFmode) - arg_regs[(regno - 32) / 2] = "FR"; - else - { -#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED - arg_regs[(regno - 34) / 2] = "FR"; - arg_regs[(regno - 34) / 2 + 1] = "FU"; -#else - arg_regs[(regno - 34) / 2] = "FU"; - arg_regs[(regno - 34) / 2 + 1] = "FR"; -#endif - } - } - } - fputs ("\t.CALL ", asm_out_file); - for (i = 0; i < 4; i++) - { - if (arg_regs[i]) - { - if (output_flag++) - fputc (',', asm_out_file); - fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); - } - } - fputc ('\n', asm_out_file); -} - -/* Return the class of any secondary reload register that is needed to - move IN into a register in class CLASS using mode MODE. - - Profiling has showed this routine and its descendants account for - a significant amount of compile time (~7%). So it has been - optimized to reduce redundant computations and eliminate useless - function calls. - - It might be worthwhile to try and make this a leaf function too. */ - -enum reg_class -secondary_reload_class (class, mode, in) - enum reg_class class; - enum machine_mode mode; - rtx in; -{ - int regno, is_symbolic; - - /* Trying to load a constant into a FP register during PIC code - generation will require %r1 as a scratch register. */ - if (flag_pic == 2 - && GET_MODE_CLASS (mode) == MODE_INT - && FP_REG_CLASS_P (class) - && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE)) - return R1_REGS; - - /* Profiling showed the PA port spends about 1.3% of its compilation - time in true_regnum from calls inside secondary_reload_class. */ - - if (GET_CODE (in) == REG) - { - regno = REGNO (in); - if (regno >= FIRST_PSEUDO_REGISTER) - regno = true_regnum (in); - } - else if (GET_CODE (in) == SUBREG) - regno = true_regnum (in); - else - regno = -1; - - /* If we have something like (mem (mem (...)), we can safely assume the - inner MEM will end up in a general register after reloading, so there's - no need for a secondary reload. */ - if (GET_CODE (in) == MEM - && GET_CODE (XEXP (in, 0)) == MEM) - return NO_REGS; - - /* Handle out of range displacement for integer mode loads/stores of - FP registers. */ - if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1) - && GET_MODE_CLASS (mode) == MODE_INT - && FP_REG_CLASS_P (class)) - || (class == SHIFT_REGS && (regno <= 0 || regno >= 32))) - return GENERAL_REGS; - - if (GET_CODE (in) == HIGH) - in = XEXP (in, 0); - - /* Profiling has showed GCC spends about 2.6% of its compilation - time in symbolic_operand from calls inside secondary_reload_class. - - We use an inline copy and only compute its return value once to avoid - useless work. */ - switch (GET_CODE (in)) - { - rtx tmp; - - case SYMBOL_REF: - case LABEL_REF: - is_symbolic = 1; - break; - case CONST: - tmp = XEXP (in, 0); - is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF - || GET_CODE (XEXP (tmp, 0)) == LABEL_REF) - && GET_CODE (XEXP (tmp, 1)) == CONST_INT); - break; - - default: - is_symbolic = 0; - break; - } - - if (!flag_pic - && is_symbolic - && read_only_operand (in)) - return NO_REGS; - - if (class != R1_REGS && is_symbolic) - return R1_REGS; - - return NO_REGS; -} - -enum direction -function_arg_padding (mode, type) - enum machine_mode mode; - tree type; -{ - int size; - - if (mode == BLKmode) - { - if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) - size = int_size_in_bytes (type) * BITS_PER_UNIT; - else - return upward; /* Don't know if this is right, but */ - /* same as old definition. */ - } - else - size = GET_MODE_BITSIZE (mode); - if (size < PARM_BOUNDARY) - return downward; - else if (size % PARM_BOUNDARY) - return upward; - else - return none; -} - - -/* Do what is necessary for `va_start'. The argument is ignored; - We look at the current function to determine if stdargs or varargs - is used and fill in an initial va_list. A pointer to this constructor - is returned. */ - -struct rtx_def * -hppa_builtin_saveregs (arglist) - tree arglist ATTRIBUTE_UNUSED; -{ - rtx offset, dest; - tree fntype = TREE_TYPE (current_function_decl); - int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0 - && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) - != void_type_node))) - ? UNITS_PER_WORD : 0); - - if (argadj) - offset = plus_constant (current_function_arg_offset_rtx, argadj); - else - offset = current_function_arg_offset_rtx; - - /* Store general registers on the stack. */ - dest = gen_rtx_MEM (BLKmode, - plus_constant (current_function_internal_arg_pointer, - -16)); - move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD); - - /* move_block_from_reg will emit code to store the argument registers - individually as scalar stores. - - However, other insns may later load from the same addresses for - a structure load (passing a struct to a varargs routine). - - The alias code assumes that such aliasing can never happen, so we - have to keep memory referencing insns from moving up beyond the - last argument register store. So we emit a blockage insn here. */ - emit_insn (gen_blockage ()); - - if (current_function_check_memory_usage) - emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3, - dest, ptr_mode, - GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype), - GEN_INT (MEMORY_USE_RW), - TYPE_MODE (integer_type_node)); - - return copy_to_reg (expand_binop (Pmode, add_optab, - current_function_internal_arg_pointer, - offset, 0, 0, OPTAB_LIB_WIDEN)); -} - -/* This routine handles all the normal conditional branch sequences we - might need to generate. It handles compare immediate vs compare - register, nullification of delay slots, varying length branches, - negated branches, and all combinations of the above. It returns the - output appropriate to emit the branch corresponding to all given - parameters. */ - -char * -output_cbranch (operands, nullify, length, negated, insn) - rtx *operands; - int nullify, length, negated; - rtx insn; -{ - static char buf[100]; - int useskip = 0; - - /* A conditional branch to the following instruction (eg the delay slot) is - asking for a disaster. This can happen when not optimizing. - - In such cases it is safe to emit nothing. */ - - if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) - return ""; - - /* If this is a long branch with its delay slot unfilled, set `nullify' - as it can nullify the delay slot and save a nop. */ - if (length == 8 && dbr_sequence_length () == 0) - nullify = 1; - - /* If this is a short forward conditional branch which did not get - its delay slot filled, the delay slot can still be nullified. */ - if (! nullify && length == 4 && dbr_sequence_length () == 0) - nullify = forward_branch_p (insn); - - /* A forward branch over a single nullified insn can be done with a - comclr instruction. This avoids a single cycle penalty due to - mis-predicted branch if we fall through (branch not taken). */ - if (length == 4 - && next_real_insn (insn) != 0 - && get_attr_length (next_real_insn (insn)) == 4 - && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) - && nullify) - useskip = 1; - - switch (length) - { - /* All short conditional branches except backwards with an unfilled - delay slot. */ - case 4: - if (useskip) - strcpy (buf, "com%I2clr,"); - else - strcpy (buf, "com%I2b,"); - if (negated) - strcat (buf, "%B3"); - else - strcat (buf, "%S3"); - if (useskip) - strcat (buf, " %2,%r1,0"); - else if (nullify) - strcat (buf, ",n %2,%r1,%0"); - else - strcat (buf, " %2,%r1,%0"); - break; - - /* All long conditionals. Note an short backward branch with an - unfilled delay slot is treated just like a long backward branch - with an unfilled delay slot. */ - case 8: - /* Handle weird backwards branch with a filled delay slot - with is nullified. */ - if (dbr_sequence_length () != 0 - && ! forward_branch_p (insn) - && nullify) - { - strcpy (buf, "com%I2b,"); - if (negated) - strcat (buf, "%S3"); - else - strcat (buf, "%B3"); - strcat (buf, ",n %2,%r1,.+12\n\tbl %0,0"); - } - /* Handle short backwards branch with an unfilled delay slot. - Using a comb;nop rather than comiclr;bl saves 1 cycle for both - taken and untaken branches. */ - else if (dbr_sequence_length () == 0 - && ! forward_branch_p (insn) - && insn_addresses - && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] - - insn_addresses[INSN_UID (insn)] - 8)) - { - strcpy (buf, "com%I2b,"); - if (negated) - strcat (buf, "%B3 %2,%r1,%0%#"); - else - strcat (buf, "%S3 %2,%r1,%0%#"); - } - else - { - strcpy (buf, "com%I2clr,"); - if (negated) - strcat (buf, "%S3"); - else - strcat (buf, "%B3"); - if (nullify) - strcat (buf, " %2,%r1,0\n\tbl,n %0,0"); - else - strcat (buf, " %2,%r1,0\n\tbl %0,0"); - } - break; - - case 20: - /* Very long branch. Right now we only handle these when not - optimizing. See "jump" pattern in pa.md for details. */ - if (optimize) - abort (); - - /* Create a reversed conditional branch which branches around - the following insns. */ - if (negated) - strcpy (buf, "com%I2b,%S3,n %2,%r1,.+20"); - else - strcpy (buf, "com%I2b,%B3,n %2,%r1,.+20"); - output_asm_insn (buf, operands); - - /* Output an insn to save %r1. */ - output_asm_insn ("stw %%r1,-16(%%r30)", operands); - - /* Now output a very long branch to the original target. */ - output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands); - - /* Now restore the value of %r1 in the delay slot. We're not - optimizing so we know nothing else can be in the delay slot. */ - return "ldw -16(%%r30),%%r1"; - - case 28: - /* Very long branch when generating PIC code. Right now we only - handle these when not optimizing. See "jump" pattern in pa.md - for details. */ - if (optimize) - abort (); - - /* Create a reversed conditional branch which branches around - the following insns. */ - if (negated) - strcpy (buf, "com%I2b,%S3,n %2,%r1,.+28"); - else - strcpy (buf, "com%I2b,%B3,n %2,%r1,.+28"); - output_asm_insn (buf, operands); - - /* Output an insn to save %r1. */ - output_asm_insn ("stw %%r1,-16(%%r30)", operands); - - /* Now output a very long PIC branch to the original target. */ - { - rtx xoperands[5]; - - xoperands[0] = operands[0]; - xoperands[1] = operands[1]; - xoperands[2] = operands[2]; - xoperands[3] = operands[3]; - xoperands[4] = gen_label_rtx (); - - output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (xoperands[4])); - output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands); - } - - /* Now restore the value of %r1 in the delay slot. We're not - optimizing so we know nothing else can be in the delay slot. */ - return "ldw -16(%%r30),%%r1"; - - default: - abort(); - } - return buf; -} - -/* This routine handles all the branch-on-bit conditional branch sequences we - might need to generate. It handles nullification of delay slots, - varying length branches, negated branches and all combinations of the - above. it returns the appropriate output template to emit the branch. */ - -char * -output_bb (operands, nullify, length, negated, insn, which) - rtx *operands ATTRIBUTE_UNUSED; - int nullify, length, negated; - rtx insn; - int which; -{ - static char buf[100]; - int useskip = 0; - - /* A conditional branch to the following instruction (eg the delay slot) is - asking for a disaster. I do not think this can happen as this pattern - is only used when optimizing; jump optimization should eliminate the - jump. But be prepared just in case. */ - - if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) - return ""; - - /* If this is a long branch with its delay slot unfilled, set `nullify' - as it can nullify the delay slot and save a nop. */ - if (length == 8 && dbr_sequence_length () == 0) - nullify = 1; - - /* If this is a short forward conditional branch which did not get - its delay slot filled, the delay slot can still be nullified. */ - if (! nullify && length == 4 && dbr_sequence_length () == 0) - nullify = forward_branch_p (insn); - - /* A forward branch over a single nullified insn can be done with a - extrs instruction. This avoids a single cycle penalty due to - mis-predicted branch if we fall through (branch not taken). */ - - if (length == 4 - && next_real_insn (insn) != 0 - && get_attr_length (next_real_insn (insn)) == 4 - && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) - && nullify) - useskip = 1; - - switch (length) - { - - /* All short conditional branches except backwards with an unfilled - delay slot. */ - case 4: - if (useskip) - strcpy (buf, "extrs,"); - else - strcpy (buf, "bb,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, ">="); - else - strcat (buf, "<"); - if (useskip) - strcat (buf, " %0,%1,1,0"); - else if (nullify && negated) - strcat (buf, ",n %0,%1,%3"); - else if (nullify && ! negated) - strcat (buf, ",n %0,%1,%2"); - else if (! nullify && negated) - strcat (buf, "%0,%1,%3"); - else if (! nullify && ! negated) - strcat (buf, " %0,%1,%2"); - break; - - /* All long conditionals. Note an short backward branch with an - unfilled delay slot is treated just like a long backward branch - with an unfilled delay slot. */ - case 8: - /* Handle weird backwards branch with a filled delay slot - with is nullified. */ - if (dbr_sequence_length () != 0 - && ! forward_branch_p (insn) - && nullify) - { - strcpy (buf, "bb,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, "<"); - else - strcat (buf, ">="); - if (negated) - strcat (buf, ",n %0,%1,.+12\n\tbl %3,0"); - else - strcat (buf, ",n %0,%1,.+12\n\tbl %2,0"); - } - /* Handle short backwards branch with an unfilled delay slot. - Using a bb;nop rather than extrs;bl saves 1 cycle for both - taken and untaken branches. */ - else if (dbr_sequence_length () == 0 - && ! forward_branch_p (insn) - && insn_addresses - && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] - - insn_addresses[INSN_UID (insn)] - 8)) - { - strcpy (buf, "bb,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, ">="); - else - strcat (buf, "<"); - if (negated) - strcat (buf, " %0,%1,%3%#"); - else - strcat (buf, " %0,%1,%2%#"); - } - else - { - strcpy (buf, "extrs,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, "<"); - else - strcat (buf, ">="); - if (nullify && negated) - strcat (buf, " %0,%1,1,0\n\tbl,n %3,0"); - else if (nullify && ! negated) - strcat (buf, " %0,%1,1,0\n\tbl,n %2,0"); - else if (negated) - strcat (buf, " %0,%1,1,0\n\tbl %3,0"); - else - strcat (buf, " %0,%1,1,0\n\tbl %2,0"); - } - break; - - default: - abort(); - } - return buf; -} - -/* This routine handles all the branch-on-variable-bit conditional branch - sequences we might need to generate. It handles nullification of delay - slots, varying length branches, negated branches and all combinations - of the above. it returns the appropriate output template to emit the - branch. */ - -char * -output_bvb (operands, nullify, length, negated, insn, which) - rtx *operands ATTRIBUTE_UNUSED; - int nullify, length, negated; - rtx insn; - int which; -{ - static char buf[100]; - int useskip = 0; - - /* A conditional branch to the following instruction (eg the delay slot) is - asking for a disaster. I do not think this can happen as this pattern - is only used when optimizing; jump optimization should eliminate the - jump. But be prepared just in case. */ - - if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) - return ""; - - /* If this is a long branch with its delay slot unfilled, set `nullify' - as it can nullify the delay slot and save a nop. */ - if (length == 8 && dbr_sequence_length () == 0) - nullify = 1; - - /* If this is a short forward conditional branch which did not get - its delay slot filled, the delay slot can still be nullified. */ - if (! nullify && length == 4 && dbr_sequence_length () == 0) - nullify = forward_branch_p (insn); - - /* A forward branch over a single nullified insn can be done with a - extrs instruction. This avoids a single cycle penalty due to - mis-predicted branch if we fall through (branch not taken). */ - - if (length == 4 - && next_real_insn (insn) != 0 - && get_attr_length (next_real_insn (insn)) == 4 - && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) - && nullify) - useskip = 1; - - switch (length) - { - - /* All short conditional branches except backwards with an unfilled - delay slot. */ - case 4: - if (useskip) - strcpy (buf, "vextrs,"); - else - strcpy (buf, "bvb,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, ">="); - else - strcat (buf, "<"); - if (useskip) - strcat (buf, " %0,1,0"); - else if (nullify && negated) - strcat (buf, ",n %0,%3"); - else if (nullify && ! negated) - strcat (buf, ",n %0,%2"); - else if (! nullify && negated) - strcat (buf, "%0,%3"); - else if (! nullify && ! negated) - strcat (buf, " %0,%2"); - break; - - /* All long conditionals. Note an short backward branch with an - unfilled delay slot is treated just like a long backward branch - with an unfilled delay slot. */ - case 8: - /* Handle weird backwards branch with a filled delay slot - with is nullified. */ - if (dbr_sequence_length () != 0 - && ! forward_branch_p (insn) - && nullify) - { - strcpy (buf, "bvb,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, "<"); - else - strcat (buf, ">="); - if (negated) - strcat (buf, ",n %0,.+12\n\tbl %3,0"); - else - strcat (buf, ",n %0,.+12\n\tbl %2,0"); - } - /* Handle short backwards branch with an unfilled delay slot. - Using a bb;nop rather than extrs;bl saves 1 cycle for both - taken and untaken branches. */ - else if (dbr_sequence_length () == 0 - && ! forward_branch_p (insn) - && insn_addresses - && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] - - insn_addresses[INSN_UID (insn)] - 8)) - { - strcpy (buf, "bvb,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, ">="); - else - strcat (buf, "<"); - if (negated) - strcat (buf, " %0,%3%#"); - else - strcat (buf, " %0,%2%#"); - } - else - { - strcpy (buf, "vextrs,"); - if ((which == 0 && negated) - || (which == 1 && ! negated)) - strcat (buf, "<"); - else - strcat (buf, ">="); - if (nullify && negated) - strcat (buf, " %0,1,0\n\tbl,n %3,0"); - else if (nullify && ! negated) - strcat (buf, " %0,1,0\n\tbl,n %2,0"); - else if (negated) - strcat (buf, " %0,1,0\n\tbl %3,0"); - else - strcat (buf, " %0,1,0\n\tbl %2,0"); - } - break; - - default: - abort(); - } - return buf; -} - -/* Return the output template for emitting a dbra type insn. - - Note it may perform some output operations on its own before - returning the final output string. */ -char * -output_dbra (operands, insn, which_alternative) - rtx *operands; - rtx insn; - int which_alternative; -{ - - /* A conditional branch to the following instruction (eg the delay slot) is - asking for a disaster. Be prepared! */ - - if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) - { - if (which_alternative == 0) - return "ldo %1(%0),%0"; - else if (which_alternative == 1) - { - output_asm_insn ("fstws %0,-16(0,%%r30)",operands); - output_asm_insn ("ldw -16(0,%%r30),%4",operands); - output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands); - return "fldws -16(0,%%r30),%0"; - } - else - { - output_asm_insn ("ldw %0,%4", operands); - return "ldo %1(%4),%4\n\tstw %4,%0"; - } - } - - if (which_alternative == 0) - { - int nullify = INSN_ANNULLED_BRANCH_P (insn); - int length = get_attr_length (insn); - - /* If this is a long branch with its delay slot unfilled, set `nullify' - as it can nullify the delay slot and save a nop. */ - if (length == 8 && dbr_sequence_length () == 0) - nullify = 1; - - /* If this is a short forward conditional branch which did not get - its delay slot filled, the delay slot can still be nullified. */ - if (! nullify && length == 4 && dbr_sequence_length () == 0) - nullify = forward_branch_p (insn); - - /* Handle short versions first. */ - if (length == 4 && nullify) - return "addib,%C2,n %1,%0,%3"; - else if (length == 4 && ! nullify) - return "addib,%C2 %1,%0,%3"; - else if (length == 8) - { - /* Handle weird backwards branch with a fulled delay slot - which is nullified. */ - if (dbr_sequence_length () != 0 - && ! forward_branch_p (insn) - && nullify) - return "addib,%N2,n %1,%0,.+12\n\tbl %3,0"; - /* Handle short backwards branch with an unfilled delay slot. - Using a addb;nop rather than addi;bl saves 1 cycle for both - taken and untaken branches. */ - else if (dbr_sequence_length () == 0 - && ! forward_branch_p (insn) - && insn_addresses - && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] - - insn_addresses[INSN_UID (insn)] - 8)) - return "addib,%C2 %1,%0,%3%#"; - - /* Handle normal cases. */ - if (nullify) - return "addi,%N2 %1,%0,%0\n\tbl,n %3,0"; - else - return "addi,%N2 %1,%0,%0\n\tbl %3,0"; - } - else - abort(); - } - /* Deal with gross reload from FP register case. */ - else if (which_alternative == 1) - { - /* Move loop counter from FP register to MEM then into a GR, - increment the GR, store the GR into MEM, and finally reload - the FP register from MEM from within the branch's delay slot. */ - output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands); - output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands); - if (get_attr_length (insn) == 24) - return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0"; - else - return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0"; - } - /* Deal with gross reload from memory case. */ - else - { - /* Reload loop counter from memory, the store back to memory - happens in the branch's delay slot. */ - output_asm_insn ("ldw %0,%4", operands); - if (get_attr_length (insn) == 12) - return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; - else - return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0"; - } -} - -/* Return the output template for emitting a dbra type insn. - - Note it may perform some output operations on its own before - returning the final output string. */ -char * -output_movb (operands, insn, which_alternative, reverse_comparison) - rtx *operands; - rtx insn; - int which_alternative; - int reverse_comparison; -{ - - /* A conditional branch to the following instruction (eg the delay slot) is - asking for a disaster. Be prepared! */ - - if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) - { - if (which_alternative == 0) - return "copy %1,%0"; - else if (which_alternative == 1) - { - output_asm_insn ("stw %1,-16(0,%%r30)",operands); - return "fldws -16(0,%%r30),%0"; - } - else if (which_alternative == 2) - return "stw %1,%0"; - else - return "mtsar %r1"; - } - - /* Support the second variant. */ - if (reverse_comparison) - PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); - - if (which_alternative == 0) - { - int nullify = INSN_ANNULLED_BRANCH_P (insn); - int length = get_attr_length (insn); - - /* If this is a long branch with its delay slot unfilled, set `nullify' - as it can nullify the delay slot and save a nop. */ - if (length == 8 && dbr_sequence_length () == 0) - nullify = 1; - - /* If this is a short forward conditional branch which did not get - its delay slot filled, the delay slot can still be nullified. */ - if (! nullify && length == 4 && dbr_sequence_length () == 0) - nullify = forward_branch_p (insn); - - /* Handle short versions first. */ - if (length == 4 && nullify) - return "movb,%C2,n %1,%0,%3"; - else if (length == 4 && ! nullify) - return "movb,%C2 %1,%0,%3"; - else if (length == 8) - { - /* Handle weird backwards branch with a filled delay slot - which is nullified. */ - if (dbr_sequence_length () != 0 - && ! forward_branch_p (insn) - && nullify) - return "movb,%N2,n %1,%0,.+12\n\tbl %3,0"; - - /* Handle short backwards branch with an unfilled delay slot. - Using a movb;nop rather than or;bl saves 1 cycle for both - taken and untaken branches. */ - else if (dbr_sequence_length () == 0 - && ! forward_branch_p (insn) - && insn_addresses - && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] - - insn_addresses[INSN_UID (insn)] - 8)) - return "movb,%C2 %1,%0,%3%#"; - /* Handle normal cases. */ - if (nullify) - return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0"; - else - return "or,%N2 %1,%%r0,%0\n\tbl %3,0"; - } - else - abort(); - } - /* Deal with gross reload from FP register case. */ - else if (which_alternative == 1) - { - /* Move loop counter from FP register to MEM then into a GR, - increment the GR, store the GR into MEM, and finally reload - the FP register from MEM from within the branch's delay slot. */ - output_asm_insn ("stw %1,-16(0,%%r30)",operands); - if (get_attr_length (insn) == 12) - return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0"; - else - return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0"; - } - /* Deal with gross reload from memory case. */ - else if (which_alternative == 2) - { - /* Reload loop counter from memory, the store back to memory - happens in the branch's delay slot. */ - if (get_attr_length (insn) == 8) - return "comb,%S2 0,%1,%3\n\tstw %1,%0"; - else - return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0"; - } - /* Handle SAR as a destination. */ - else - { - if (get_attr_length (insn) == 8) - return "comb,%S2 0,%1,%3\n\tmtsar %r1"; - else - return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1"; - } -} - - -/* INSN is a millicode call. It may have an unconditional jump in its delay - slot. - - CALL_DEST is the routine we are calling. */ - -char * -output_millicode_call (insn, call_dest) - rtx insn; - rtx call_dest; -{ - int distance; - rtx xoperands[4]; - rtx seq_insn; - - /* Handle common case -- empty delay slot or no jump in the delay slot, - and we're sure that the branch will reach the beginning of the $CODE$ - subspace. */ - if ((dbr_sequence_length () == 0 - && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28)) - || (dbr_sequence_length () != 0 - && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN - && get_attr_length (insn) == 4)) - { - xoperands[0] = call_dest; - output_asm_insn ("bl %0,%%r31%#", xoperands); - return ""; - } - - /* This call may not reach the beginning of the $CODE$ subspace. */ - if (get_attr_length (insn) > 4) - { - int delay_insn_deleted = 0; - rtx xoperands[2]; - - /* We need to emit an inline long-call branch. */ - if (dbr_sequence_length () != 0 - && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) - { - /* A non-jump insn in the delay slot. By definition we can - emit this insn before the call. */ - final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); - - /* Now delete the delay insn. */ - PUT_CODE (NEXT_INSN (insn), NOTE); - NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; - delay_insn_deleted = 1; - } - - /* If we're allowed to use be/ble instructions, then this is the - best sequence to use for a long millicode call. */ - if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS - || ! (flag_pic || TARGET_PORTABLE_RUNTIME)) - { - xoperands[0] = call_dest; - output_asm_insn ("ldil L%%%0,%%r31", xoperands); - output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands); - output_asm_insn ("nop", xoperands); - } - /* Pure portable runtime doesn't allow be/ble; we also don't have - PIC support int he assembler/linker, so this sequence is needed. */ - else if (TARGET_PORTABLE_RUNTIME) - { - xoperands[0] = call_dest; - /* Get the address of our target into %r29. */ - output_asm_insn ("ldil L%%%0,%%r29", xoperands); - output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands); - - /* Get our return address into %r31. */ - output_asm_insn ("blr 0,%%r31", xoperands); - - /* Jump to our target address in %r29. */ - output_asm_insn ("bv,n 0(%%r29)", xoperands); - - /* Empty delay slot. Note this insn gets fetched twice and - executed once. To be safe we use a nop. */ - output_asm_insn ("nop", xoperands); - return ""; - } - /* PIC long millicode call sequence. */ - else - { - xoperands[0] = call_dest; - xoperands[1] = gen_label_rtx (); - /* Get our address + 8 into %r1. */ - output_asm_insn ("bl .+8,%%r1", xoperands); - - /* Add %r1 to the offset of our target from the next insn. */ - output_asm_insn ("addil L%%%0-%1,%%r1", xoperands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (xoperands[1])); - output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands); - - /* Get the return address into %r31. */ - output_asm_insn ("blr 0,%%r31", xoperands); - - /* Branch to our target which is in %r1. */ - output_asm_insn ("bv,n 0(%%r1)", xoperands); - - /* Empty delay slot. Note this insn gets fetched twice and - executed once. To be safe we use a nop. */ - output_asm_insn ("nop", xoperands); - } - - /* If we had a jump in the call's delay slot, output it now. */ - if (dbr_sequence_length () != 0 - && !delay_insn_deleted) - { - xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); - output_asm_insn ("b,n %0", xoperands); - - /* Now delete the delay insn. */ - PUT_CODE (NEXT_INSN (insn), NOTE); - NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; - } - return ""; - } - - /* This call has an unconditional jump in its delay slot and the - call is known to reach its target or the beginning of the current - subspace. */ - - /* Use the containing sequence insn's address. */ - seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); - - distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))] - - insn_addresses[INSN_UID (seq_insn)] - 8; - - /* If the branch was too far away, emit a normal call followed - by a nop, followed by the unconditional branch. - - If the branch is close, then adjust %r2 from within the - call's delay slot. */ - - xoperands[0] = call_dest; - xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1); - if (! VAL_14_BITS_P (distance)) - output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands); - else - { - xoperands[3] = gen_label_rtx (); - output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (xoperands[3])); - } - - /* Delete the jump. */ - PUT_CODE (NEXT_INSN (insn), NOTE); - NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; - return ""; -} - -extern struct obstack permanent_obstack; -extern struct obstack *saveable_obstack; -extern struct obstack *rtl_obstack; -extern struct obstack *current_obstack; - -/* INSN is either a function call. It may have an unconditional jump - in its delay slot. - - CALL_DEST is the routine we are calling. */ - -char * -output_call (insn, call_dest) - rtx insn; - rtx call_dest; -{ - int distance; - rtx xoperands[4]; - rtx seq_insn; - - /* Handle common case -- empty delay slot or no jump in the delay slot, - and we're sure that the branch will reach the beginning of the $CODE$ - subspace. */ - if ((dbr_sequence_length () == 0 - && get_attr_length (insn) == 8) - || (dbr_sequence_length () != 0 - && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN - && get_attr_length (insn) == 4)) - { - xoperands[0] = call_dest; - output_asm_insn ("bl %0,%%r2%#", xoperands); - return ""; - } - - /* This call may not reach the beginning of the $CODE$ subspace. */ - if (get_attr_length (insn) > 8) - { - int delay_insn_deleted = 0; - rtx xoperands[2]; - rtx link; - - /* We need to emit an inline long-call branch. Furthermore, - because we're changing a named function call into an indirect - function call well after the parameters have been set up, we - need to make sure any FP args appear in both the integer - and FP registers. Also, we need move any delay slot insn - out of the delay slot. And finally, we can't rely on the linker - being able to fix the call to $$dyncall! -- Yuk!. */ - if (dbr_sequence_length () != 0 - && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) - { - /* A non-jump insn in the delay slot. By definition we can - emit this insn before the call (and in fact before argument - relocating. */ - final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); - - /* Now delete the delay insn. */ - PUT_CODE (NEXT_INSN (insn), NOTE); - NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; - delay_insn_deleted = 1; - } - - /* Now copy any FP arguments into integer registers. */ - for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) - { - int arg_mode, regno; - rtx use = XEXP (link, 0); - if (! (GET_CODE (use) == USE - && GET_CODE (XEXP (use, 0)) == REG - && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) - continue; - - arg_mode = GET_MODE (XEXP (use, 0)); - regno = REGNO (XEXP (use, 0)); - /* Is it a floating point register? */ - if (regno >= 32 && regno <= 39) - { - /* Copy from the FP register into an integer register - (via memory). */ - if (arg_mode == SFmode) - { - xoperands[0] = XEXP (use, 0); - xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); - output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands); - output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); - } - else - { - xoperands[0] = XEXP (use, 0); - xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); - output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands); - output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); - output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); - } - } - } - - /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since - we don't have any direct calls in that case. */ - { - int i; - char *name = XSTR (call_dest, 0); - - /* See if we have already put this function on the list - of deferred plabels. This list is generally small, - so a liner search is not too ugly. If it proves too - slow replace it with something faster. */ - for (i = 0; i < n_deferred_plabels; i++) - if (strcmp (name, deferred_plabels[i].name) == 0) - break; - - /* If the deferred plabel list is empty, or this entry was - not found on the list, create a new entry on the list. */ - if (deferred_plabels == NULL || i == n_deferred_plabels) - { - struct obstack *ambient_obstack = current_obstack; - struct obstack *ambient_rtl_obstack = rtl_obstack; - char *real_name; - - /* Any RTL we create here needs to live until the end of - the compilation unit and therefore must live on the - permanent obstack. */ - current_obstack = &permanent_obstack; - rtl_obstack = &permanent_obstack; - - if (deferred_plabels == 0) - deferred_plabels = (struct deferred_plabel *) - xmalloc (1 * sizeof (struct deferred_plabel)); - else - deferred_plabels = (struct deferred_plabel *) - xrealloc (deferred_plabels, - ((n_deferred_plabels + 1) - * sizeof (struct deferred_plabel))); - - i = n_deferred_plabels++; - deferred_plabels[i].internal_label = gen_label_rtx (); - deferred_plabels[i].name = obstack_alloc (&permanent_obstack, - strlen (name) + 1); - strcpy (deferred_plabels[i].name, name); - - /* Switch back to normal obstack allocation. */ - current_obstack = ambient_obstack; - rtl_obstack = ambient_rtl_obstack; - - /* Gross. We have just implicitly taken the address of this - function, mark it as such. */ - STRIP_NAME_ENCODING (real_name, name); - TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1; - } - - /* We have to load the address of the function using a procedure - label (plabel). Inline plabels can lose for PIC and other - cases, so avoid them by creating a 32bit plabel in the data - segment. */ - if (flag_pic) - { - xoperands[0] = deferred_plabels[i].internal_label; - xoperands[1] = gen_label_rtx (); - - output_asm_insn ("addil LT%%%0,%%r19", xoperands); - output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands); - output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands); - - /* Get our address + 8 into %r1. */ - output_asm_insn ("bl .+8,%%r1", xoperands); - - /* Add %r1 to the offset of dyncall from the next insn. */ - output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (xoperands[1])); - output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands); - - /* Get the return address into %r31. */ - output_asm_insn ("blr 0,%%r31", xoperands); - - /* Branch to our target which is in %r1. */ - output_asm_insn ("bv 0(%%r1)", xoperands); - - /* Copy the return address into %r2 also. */ - output_asm_insn ("copy %%r31,%%r2", xoperands); - } - else - { - xoperands[0] = deferred_plabels[i].internal_label; - - /* Get the address of our target into %r22. */ - output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands); - output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands); - - /* Get the high part of the address of $dyncall into %r2, then - add in the low part in the branch instruction. */ - output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands); - output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands); - - /* Copy the return pointer into both %r31 and %r2. */ - output_asm_insn ("copy %%r31,%%r2", xoperands); - } - } - - /* If we had a jump in the call's delay slot, output it now. */ - if (dbr_sequence_length () != 0 - && !delay_insn_deleted) - { - xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); - output_asm_insn ("b,n %0", xoperands); - - /* Now delete the delay insn. */ - PUT_CODE (NEXT_INSN (insn), NOTE); - NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; - } - return ""; - } - - /* This call has an unconditional jump in its delay slot and the - call is known to reach its target or the beginning of the current - subspace. */ - - /* Use the containing sequence insn's address. */ - seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); - - distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))] - - insn_addresses[INSN_UID (seq_insn)] - 8; - - /* If the branch was too far away, emit a normal call followed - by a nop, followed by the unconditional branch. - - If the branch is close, then adjust %r2 from within the - call's delay slot. */ - - xoperands[0] = call_dest; - xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1); - if (! VAL_14_BITS_P (distance)) - output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands); - else - { - xoperands[3] = gen_label_rtx (); - output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (xoperands[3])); - } - - /* Delete the jump. */ - PUT_CODE (NEXT_INSN (insn), NOTE); - NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; - return ""; -} - -/* In HPUX 8.0's shared library scheme, special relocations are needed - for function labels if they might be passed to a function - in a shared library (because shared libraries don't live in code - space), and special magic is needed to construct their address. - - For reasons too disgusting to describe storage for the new name - is allocated either on the saveable_obstack (released at function - exit) or on the permanent_obstack for things that can never change - (libcall names for example). */ - -void -hppa_encode_label (sym, permanent) - rtx sym; - int permanent; -{ - char *str = XSTR (sym, 0); - int len = strlen (str); - char *newstr; - - newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack), - len + 2); - - if (str[0] == '*') - *newstr++ = *str++; - strcpy (newstr + 1, str); - *newstr = '@'; - XSTR (sym,0) = newstr; -} - -int -function_label_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0)); -} - -/* Returns 1 if OP is a function label involved in a simple addition - with a constant. Used to keep certain patterns from matching - during instruction combination. */ -int -is_function_label_plus_const (op) - rtx op; -{ - /* Strip off any CONST. */ - if (GET_CODE (op) == CONST) - op = XEXP (op, 0); - - return (GET_CODE (op) == PLUS - && function_label_operand (XEXP (op, 0), Pmode) - && GET_CODE (XEXP (op, 1)) == CONST_INT); -} - -/* Returns 1 if the 6 operands specified in OPERANDS are suitable for - use in fmpyadd instructions. */ -int -fmpyaddoperands (operands) - rtx *operands; -{ - enum machine_mode mode = GET_MODE (operands[0]); - - /* Must be a floating point mode. */ - if (mode != SFmode && mode != DFmode) - return 0; - - /* All modes must be the same. */ - if (! (mode == GET_MODE (operands[1]) - && mode == GET_MODE (operands[2]) - && mode == GET_MODE (operands[3]) - && mode == GET_MODE (operands[4]) - && mode == GET_MODE (operands[5]))) - return 0; - - /* All operands must be registers. */ - if (! (GET_CODE (operands[1]) == REG - && GET_CODE (operands[2]) == REG - && GET_CODE (operands[3]) == REG - && GET_CODE (operands[4]) == REG - && GET_CODE (operands[5]) == REG)) - return 0; - - /* Only 2 real operands to the addition. One of the input operands must - be the same as the output operand. */ - if (! rtx_equal_p (operands[3], operands[4]) - && ! rtx_equal_p (operands[3], operands[5])) - return 0; - - /* Inout operand of add can not conflict with any operands from multiply. */ - if (rtx_equal_p (operands[3], operands[0]) - || rtx_equal_p (operands[3], operands[1]) - || rtx_equal_p (operands[3], operands[2])) - return 0; - - /* multiply can not feed into addition operands. */ - if (rtx_equal_p (operands[4], operands[0]) - || rtx_equal_p (operands[5], operands[0])) - return 0; - - /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ - if (mode == SFmode - && (REGNO (operands[0]) < 57 - || REGNO (operands[1]) < 57 - || REGNO (operands[2]) < 57 - || REGNO (operands[3]) < 57 - || REGNO (operands[4]) < 57 - || REGNO (operands[5]) < 57)) - return 0; - - /* Passed. Operands are suitable for fmpyadd. */ - return 1; -} - -/* Returns 1 if the 6 operands specified in OPERANDS are suitable for - use in fmpysub instructions. */ -int -fmpysuboperands (operands) - rtx *operands; -{ - enum machine_mode mode = GET_MODE (operands[0]); - - /* Must be a floating point mode. */ - if (mode != SFmode && mode != DFmode) - return 0; - - /* All modes must be the same. */ - if (! (mode == GET_MODE (operands[1]) - && mode == GET_MODE (operands[2]) - && mode == GET_MODE (operands[3]) - && mode == GET_MODE (operands[4]) - && mode == GET_MODE (operands[5]))) - return 0; - - /* All operands must be registers. */ - if (! (GET_CODE (operands[1]) == REG - && GET_CODE (operands[2]) == REG - && GET_CODE (operands[3]) == REG - && GET_CODE (operands[4]) == REG - && GET_CODE (operands[5]) == REG)) - return 0; - - /* Only 2 real operands to the subtraction. Subtraction is not a commutative - operation, so operands[4] must be the same as operand[3]. */ - if (! rtx_equal_p (operands[3], operands[4])) - return 0; - - /* multiply can not feed into subtraction. */ - if (rtx_equal_p (operands[5], operands[0])) - return 0; - - /* Inout operand of sub can not conflict with any operands from multiply. */ - if (rtx_equal_p (operands[3], operands[0]) - || rtx_equal_p (operands[3], operands[1]) - || rtx_equal_p (operands[3], operands[2])) - return 0; - - /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ - if (mode == SFmode - && (REGNO (operands[0]) < 57 - || REGNO (operands[1]) < 57 - || REGNO (operands[2]) < 57 - || REGNO (operands[3]) < 57 - || REGNO (operands[4]) < 57 - || REGNO (operands[5]) < 57)) - return 0; - - /* Passed. Operands are suitable for fmpysub. */ - return 1; -} - -int -plus_xor_ior_operator (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR - || GET_CODE (op) == IOR); -} - -/* Return 1 if the given constant is 2, 4, or 8. These are the valid - constants for shadd instructions. */ -static int -shadd_constant_p (val) - int val; -{ - if (val == 2 || val == 4 || val == 8) - return 1; - else - return 0; -} - -/* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are - the valid constant for shadd instructions. */ -int -shadd_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op))); -} - -/* Return 1 if OP is valid as a base register in a reg + reg address. */ - -int -basereg_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - /* cse will create some unscaled indexed addresses, however; it - generally isn't a win on the PA, so avoid creating unscaled - indexed addresses until after cse is finished. */ - if (!cse_not_expected) - return 0; - - /* Once reload has started everything is considered valid. Reload should - only create indexed addresses using the stack/frame pointer, and any - others were checked for validity when created by the combine pass. - - Also allow any register when TARGET_NO_SPACE_REGS is in effect since - we don't have to worry about the braindamaged implicit space register - selection using the basereg only (rather than effective address) - screwing us over. */ - if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed) - return (GET_CODE (op) == REG); - - /* Stack is always OK for indexing. */ - if (op == stack_pointer_rtx) - return 1; - - /* While it's always safe to index off the frame pointer, it's not - always profitable, particularly when the frame pointer is being - eliminated. */ - if (! flag_omit_frame_pointer && op == frame_pointer_rtx) - return 1; - - /* The only other valid OPs are pseudo registers with - REGNO_POINTER_FLAG set. */ - if (GET_CODE (op) != REG - || REGNO (op) < FIRST_PSEUDO_REGISTER - || ! register_operand (op, mode)) - return 0; - - return REGNO_POINTER_FLAG (REGNO (op)); -} - -/* Return 1 if this operand is anything other than a hard register. */ - -int -non_hard_reg_operand (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER); -} - -/* Return 1 if INSN branches forward. Should be using insn_addresses - to avoid walking through all the insns... */ -static int -forward_branch_p (insn) - rtx insn; -{ - rtx label = JUMP_LABEL (insn); - - while (insn) - { - if (insn == label) - break; - else - insn = NEXT_INSN (insn); - } - - return (insn == label); -} - -/* Return 1 if OP is an equality comparison, else return 0. */ -int -eq_neq_comparison_operator (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == EQ || GET_CODE (op) == NE); -} - -/* Return 1 if OP is an operator suitable for use in a movb instruction. */ -int -movb_comparison_operator (op, mode) - rtx op; - enum machine_mode mode ATTRIBUTE_UNUSED; -{ - return (GET_CODE (op) == EQ || GET_CODE (op) == NE - || GET_CODE (op) == LT || GET_CODE (op) == GE); -} - -/* Return 1 if INSN is in the delay slot of a call instruction. */ -int -jump_in_call_delay (insn) - rtx insn; -{ - - if (GET_CODE (insn) != JUMP_INSN) - return 0; - - if (PREV_INSN (insn) - && PREV_INSN (PREV_INSN (insn)) - && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) - { - rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn))); - - return (GET_CODE (PATTERN (test_insn)) == SEQUENCE - && XVECEXP (PATTERN (test_insn), 0, 1) == insn); - - } - else - return 0; -} - -/* Output an unconditional move and branch insn. */ - -char * -output_parallel_movb (operands, length) - rtx *operands; - int length; -{ - /* These are the cases in which we win. */ - if (length == 4) - return "mov%I1b,tr %1,%0,%2"; - - /* None of these cases wins, but they don't lose either. */ - if (dbr_sequence_length () == 0) - { - /* Nothing in the delay slot, fake it by putting the combined - insn (the copy or add) in the delay slot of a bl. */ - if (GET_CODE (operands[1]) == CONST_INT) - return "bl %2,0\n\tldi %1,%0"; - else - return "bl %2,0\n\tcopy %1,%0"; - } - else - { - /* Something in the delay slot, but we've got a long branch. */ - if (GET_CODE (operands[1]) == CONST_INT) - return "ldi %1,%0\n\tbl %2,0"; - else - return "copy %1,%0\n\tbl %2,0"; - } -} - -/* Output an unconditional add and branch insn. */ - -char * -output_parallel_addb (operands, length) - rtx *operands; - int length; -{ - /* To make life easy we want operand0 to be the shared input/output - operand and operand1 to be the readonly operand. */ - if (operands[0] == operands[1]) - operands[1] = operands[2]; - - /* These are the cases in which we win. */ - if (length == 4) - return "add%I1b,tr %1,%0,%3"; - - /* None of these cases win, but they don't lose either. */ - if (dbr_sequence_length () == 0) - { - /* Nothing in the delay slot, fake it by putting the combined - insn (the copy or add) in the delay slot of a bl. */ - return "bl %3,0\n\tadd%I1 %1,%0,%0"; - } - else - { - /* Something in the delay slot, but we've got a long branch. */ - return "add%I1 %1,%0,%0\n\tbl %3,0"; - } -} - -/* Return nonzero if INSN (a jump insn) immediately follows a call to - a named function. This is used to discourage creating parallel movb/addb - insns since a jump which immediately follows a call can execute in the - delay slot of the call. - - It is also used to avoid filling the delay slot of a jump which - immediately follows a call since the jump can usually be eliminated - completely by modifying RP in the delay slot of the call. */ - -int -following_call (insn) - rtx insn; -{ - /* CYGNUS LOCAL PA8000/law */ - /* We do not parallel movb,addb or place jumps into call delay slots when - optimizing for the PA8000. */ - if (pa_cpu != PROCESSOR_8000) - return 0; - /* END CYGNUS LOCAL */ - - /* Find the previous real insn, skipping NOTEs. */ - insn = PREV_INSN (insn); - while (insn && GET_CODE (insn) == NOTE) - insn = PREV_INSN (insn); - - /* Check for CALL_INSNs and millicode calls. */ - if (insn - && ((GET_CODE (insn) == CALL_INSN - && get_attr_type (insn) != TYPE_DYNCALL) - || (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) != SEQUENCE - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER - && get_attr_type (insn) == TYPE_MILLI))) - return 1; - - return 0; -} - -/* Restore any INSN_CODEs for insns with unscaled indexed addresses since - the INSN_CODE might be clobberd by rerecognition triggered by reorg. */ - -static void -restore_unscaled_index_insn_codes (insns) - rtx insns; -{ - rtx insn; - - for (insn = insns; insn; insn = NEXT_INSN (insn)) - { - if (INSN_UID (insn) < max_unscaled_index_insn_codes_uid - && unscaled_index_insn_codes[INSN_UID (insn)] != -1) - INSN_CODE (insn) = unscaled_index_insn_codes[INSN_UID (insn)]; - } -} - -/* Severe braindamage: - - On the PA, address computations within MEM expressions are not - commutative because of the implicit space register selection - from the base register (instead of the entire effective address). - - Because of this mis-feature we have to know which register in a reg+reg - address is the base and which is the index. - - Before reload, the base can be identified by REGNO_POINTER_FLAG. We use - this to force base + index addresses to match a different insn than - index + base addresses. - - We assume that no pass during or after reload creates new unscaled indexed - addresses, so any unscaled indexed address we find after reload must have - at one time been recognized a base + index or index + base and we accept - any register as a base register. - - This scheme assumes that no pass during/after reload will rerecognize an - insn with an unscaled indexed address. This failed due to a reorg call - to rerecognize certain insns. - - So, we record if an insn uses an unscaled indexed address and which - register is the base (via recording of the INSN_CODE for such insns). - - Just before we output code for the function, we make sure all the insns - using unscaled indexed addresses have the same INSN_CODE as they did - immediately before delay slot scheduling. - - This is extremely gross. Long term, I'd like to be able to look at - REG_POINTER_FLAG to handle these kinds of problems. */ - -static void -record_unscaled_index_insn_codes (insns) - rtx insns; -{ - rtx insn; - - max_unscaled_index_insn_codes_uid = get_max_uid (); - unscaled_index_insn_codes - = (int *)xmalloc (max_unscaled_index_insn_codes_uid * sizeof (int)); - memset (unscaled_index_insn_codes, -1, - max_unscaled_index_insn_codes_uid * sizeof (int)); - - for (insn = insns; insn; insn = NEXT_INSN (insn)) - { - rtx set = single_set (insn); - rtx mem = NULL_RTX; - - /* Ignore anything that isn't a normal SET. */ - if (set == NULL_RTX) - continue; - - /* No insns can have more than one MEM. */ - if (GET_CODE (SET_SRC (set)) == MEM) - mem = SET_SRC (set); - - if (GET_CODE (SET_DEST (set)) == MEM) - mem = SET_DEST (set); - - /* If neither operand is a mem, then there's nothing to do. */ - if (mem == NULL_RTX) - continue; - - if (GET_CODE (XEXP (mem, 0)) != PLUS) - continue; - - /* If both are REGs (or SUBREGs), then record the insn code for - this insn. */ - if (REG_P (XEXP (XEXP (mem, 0), 0)) && REG_P (XEXP (XEXP (mem, 0), 1))) - unscaled_index_insn_codes[INSN_UID (insn)] = INSN_CODE (insn); - } -} - -/* We use this hook to perform a PA specific optimization which is difficult - to do in earlier passes. - - We want the delay slots of branches within jump tables to be filled. - None of the compiler passes at the moment even has the notion that a - PA jump table doesn't contain addresses, but instead contains actual - instructions! - - Because we actually jump into the table, the addresses of each entry - must stay constant in relation to the beginning of the table (which - itself must stay constant relative to the instruction to jump into - it). I don't believe we can guarantee earlier passes of the compiler - will adhere to those rules. - - So, late in the compilation process we find all the jump tables, and - expand them into real code -- eg each entry in the jump table vector - will get an appropriate label followed by a jump to the final target. - - Reorg and the final jump pass can then optimize these branches and - fill their delay slots. We end up with smaller, more efficient code. - - The jump instructions within the table are special; we must be able - to identify them during assembly output (if the jumps don't get filled - we need to emit a nop rather than nullifying the delay slot)). We - identify jumps in switch tables by marking the SET with DImode. - - We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB - insns. This serves two purposes, first it prevents jump.c from - noticing that the last N entries in the table jump to the instruction - immediately after the table and deleting the jumps. Second, those - insns mark where we should emit .begin_brtab and .end_brtab directives - when using GAS (allows for better link time optimizations). */ - -void -pa_reorg (insns) - rtx insns; -{ - rtx insn; - - /* Keep track of which insns have unscaled indexed addresses, and which - register is the base address in such insns. */ - record_unscaled_index_insn_codes (insns); - - remove_useless_addtr_insns (insns, 1); - - /* CYGNUS LOCAL PA8000/law */ - /* These optimizations hurt PA8000 performance. */ - if (pa_cpu != PROCESSOR_8000) - pa_combine_instructions (get_insns ()); - /* END CYGNUS LOCAL */ - - /* This is fairly cheap, so always run it if optimizing. */ - if (optimize > 0 && !TARGET_BIG_SWITCH) - { - /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ - insns = get_insns (); - for (insn = insns; insn; insn = NEXT_INSN (insn)) - { - rtx pattern, tmp, location; - unsigned int length, i; - - /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ - if (GET_CODE (insn) != JUMP_INSN - || (GET_CODE (PATTERN (insn)) != ADDR_VEC - && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) - continue; - - /* Emit marker for the beginning of the branch table. */ - emit_insn_before (gen_begin_brtab (), insn); - - pattern = PATTERN (insn); - location = PREV_INSN (insn); - length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); - - for (i = 0; i < length; i++) - { - /* Emit a label before each jump to keep jump.c from - removing this code. */ - tmp = gen_label_rtx (); - LABEL_NUSES (tmp) = 1; - emit_label_after (tmp, location); - location = NEXT_INSN (location); - - if (GET_CODE (pattern) == ADDR_VEC) - { - /* Emit the jump itself. */ - tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0)); - tmp = emit_jump_insn_after (tmp, location); - JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0); - /* It is easy to rely on the branch table markers - during assembly output to trigger the correct code - for a switch table jump with an unfilled delay slot, - - However, that requires state and assumes that we look - at insns in order. - - We can't make such assumptions when computing the length - of instructions. Ugh. We could walk the insn chain to - determine if this instruction is in a branch table, but - that can get rather expensive, particularly during the - branch shortening phase of the compiler. - - So instead we mark this jump as being special. This is - far from ideal and knows that no code after this will - muck around with the mode of the JUMP_INSN itself. */ - PUT_MODE (tmp, SImode); - LABEL_NUSES (JUMP_LABEL (tmp))++; - location = NEXT_INSN (location); - } - else - { - /* Emit the jump itself. */ - tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0)); - tmp = emit_jump_insn_after (tmp, location); - JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0); - /* It is easy to rely on the branch table markers - during assembly output to trigger the correct code - for a switch table jump with an unfilled delay slot, - - However, that requires state and assumes that we look - at insns in order. - - We can't make such assumptions when computing the length - of instructions. Ugh. We could walk the insn chain to - determine if this instruction is in a branch table, but - that can get rather expensive, particularly during the - branch shortening phase of the compiler. - - So instead we mark this jump as being special. This is - far from ideal and knows that no code after this will - muck around with the mode of the JUMP_INSN itself. */ - PUT_MODE (tmp, SImode); - LABEL_NUSES (JUMP_LABEL (tmp))++; - location = NEXT_INSN (location); - } - - /* Emit a BARRIER after the jump. */ - emit_barrier_after (location); - location = NEXT_INSN (location); - } - - /* Emit marker for the end of the branch table. */ - emit_insn_before (gen_end_brtab (), location); - location = NEXT_INSN (location); - emit_barrier_after (location); - - /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ - delete_insn (insn); - } - } - else - { - /* Sill need an end_brtab insn. */ - insns = get_insns (); - for (insn = insns; insn; insn = NEXT_INSN (insn)) - { - /* Find an ADDR_VEC insn. */ - if (GET_CODE (insn) != JUMP_INSN - || (GET_CODE (PATTERN (insn)) != ADDR_VEC - && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) - continue; - - /* Now generate markers for the beginning and end of the - branch table. */ - emit_insn_before (gen_begin_brtab (), insn); - emit_insn_after (gen_end_brtab (), insn); - } - } -} - -/* The PA has a number of odd instructions which can perform multiple - tasks at once. On first generation PA machines (PA1.0 and PA1.1) - it may be profitable to combine two instructions into one instruction - with two outputs. It's not profitable PA2.0 machines because the - two outputs would take two slots in the reorder buffers. - - This routine finds instructions which can be combined and combines - them. We only support some of the potential combinations, and we - only try common ways to find suitable instructions. - - * addb can add two registers or a register and a small integer - and jump to a nearby (+-8k) location. Normally the jump to the - nearby location is conditional on the result of the add, but by - using the "true" condition we can make the jump unconditional. - Thus addb can perform two independent operations in one insn. - - * movb is similar to addb in that it can perform a reg->reg - or small immediate->reg copy and jump to a nearby (+-8k location). - - * fmpyadd and fmpysub can perform a FP multiply and either an - FP add or FP sub if the operands of the multiply and add/sub are - independent (there are other minor restrictions). Note both - the fmpy and fadd/fsub can in theory move to better spots according - to data dependencies, but for now we require the fmpy stay at a - fixed location. - - * Many of the memory operations can perform pre & post updates - of index registers. GCC's pre/post increment/decrement addressing - is far too simple to take advantage of all the possibilities. This - pass may not be suitable since those insns may not be independent. - - * comclr can compare two ints or an int and a register, nullify - the following instruction and zero some other register. This - is more difficult to use as it's harder to find an insn which - will generate a comclr than finding something like an unconditional - branch. (conditional moves & long branches create comclr insns). - - * Most arithmetic operations can conditionally skip the next - instruction. They can be viewed as "perform this operation - and conditionally jump to this nearby location" (where nearby - is an insns away). These are difficult to use due to the - branch length restrictions. */ - -static void -pa_combine_instructions (insns) - rtx insns ATTRIBUTE_UNUSED; -{ - rtx anchor, new; - - /* This can get expensive since the basic algorithm is on the - order of O(n^2) (or worse). Only do it for -O2 or higher - levels of optimization. */ - if (optimize < 2) - return; - - /* Walk down the list of insns looking for "anchor" insns which - may be combined with "floating" insns. As the name implies, - "anchor" instructions don't move, while "floating" insns may - move around. */ - new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); - new = make_insn_raw (new); - - for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) - { - enum attr_pa_combine_type anchor_attr; - enum attr_pa_combine_type floater_attr; - - /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. - Also ignore any special USE insns. */ - if ((GET_CODE (anchor) != INSN - && GET_CODE (anchor) != JUMP_INSN - && GET_CODE (anchor) != CALL_INSN) - || GET_CODE (PATTERN (anchor)) == USE - || GET_CODE (PATTERN (anchor)) == CLOBBER - || GET_CODE (PATTERN (anchor)) == ADDR_VEC - || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) - continue; - - anchor_attr = get_attr_pa_combine_type (anchor); - /* See if anchor is an insn suitable for combination. */ - if (anchor_attr == PA_COMBINE_TYPE_FMPY - || anchor_attr == PA_COMBINE_TYPE_FADDSUB - || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH - && ! forward_branch_p (anchor))) - { - rtx floater; - - for (floater = PREV_INSN (anchor); - floater; - floater = PREV_INSN (floater)) - { - if (GET_CODE (floater) == NOTE - || (GET_CODE (floater) == INSN - && (GET_CODE (PATTERN (floater)) == USE - || GET_CODE (PATTERN (floater)) == CLOBBER))) - continue; - - /* Anything except a regular INSN will stop our search. */ - if (GET_CODE (floater) != INSN - || GET_CODE (PATTERN (floater)) == ADDR_VEC - || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) - { - floater = NULL_RTX; - break; - } - - /* See if FLOATER is suitable for combination with the - anchor. */ - floater_attr = get_attr_pa_combine_type (floater); - if ((anchor_attr == PA_COMBINE_TYPE_FMPY - && floater_attr == PA_COMBINE_TYPE_FADDSUB) - || (anchor_attr == PA_COMBINE_TYPE_FADDSUB - && floater_attr == PA_COMBINE_TYPE_FMPY)) - { - /* If ANCHOR and FLOATER can be combined, then we're - done with this pass. */ - if (pa_can_combine_p (new, anchor, floater, 0, - SET_DEST (PATTERN (floater)), - XEXP (SET_SRC (PATTERN (floater)), 0), - XEXP (SET_SRC (PATTERN (floater)), 1))) - break; - } - - else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH - && floater_attr == PA_COMBINE_TYPE_ADDMOVE) - { - if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) - { - if (pa_can_combine_p (new, anchor, floater, 0, - SET_DEST (PATTERN (floater)), - XEXP (SET_SRC (PATTERN (floater)), 0), - XEXP (SET_SRC (PATTERN (floater)), 1))) - break; - } - else - { - if (pa_can_combine_p (new, anchor, floater, 0, - SET_DEST (PATTERN (floater)), - SET_SRC (PATTERN (floater)), - SET_SRC (PATTERN (floater)))) - break; - } - } - } - - /* If we didn't find anything on the backwards scan try forwards. */ - if (!floater - && (anchor_attr == PA_COMBINE_TYPE_FMPY - || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) - { - for (floater = anchor; floater; floater = NEXT_INSN (floater)) - { - if (GET_CODE (floater) == NOTE - || (GET_CODE (floater) == INSN - && (GET_CODE (PATTERN (floater)) == USE - || GET_CODE (PATTERN (floater)) == CLOBBER))) - - continue; - - /* Anything except a regular INSN will stop our search. */ - if (GET_CODE (floater) != INSN - || GET_CODE (PATTERN (floater)) == ADDR_VEC - || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) - { - floater = NULL_RTX; - break; - } - - /* See if FLOATER is suitable for combination with the - anchor. */ - floater_attr = get_attr_pa_combine_type (floater); - if ((anchor_attr == PA_COMBINE_TYPE_FMPY - && floater_attr == PA_COMBINE_TYPE_FADDSUB) - || (anchor_attr == PA_COMBINE_TYPE_FADDSUB - && floater_attr == PA_COMBINE_TYPE_FMPY)) - { - /* If ANCHOR and FLOATER can be combined, then we're - done with this pass. */ - if (pa_can_combine_p (new, anchor, floater, 1, - SET_DEST (PATTERN (floater)), - XEXP (SET_SRC (PATTERN(floater)),0), - XEXP(SET_SRC(PATTERN(floater)),1))) - break; - } - } - } - - /* FLOATER will be nonzero if we found a suitable floating - insn for combination with ANCHOR. */ - if (floater - && (anchor_attr == PA_COMBINE_TYPE_FADDSUB - || anchor_attr == PA_COMBINE_TYPE_FMPY)) - { - /* Emit the new instruction and delete the old anchor. */ - emit_insn_before (gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, - PATTERN (anchor), - PATTERN (floater))), - anchor); - PUT_CODE (anchor, NOTE); - NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (anchor) = 0; - - /* Emit a special USE insn for FLOATER, then delete - the floating insn. */ - emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); - delete_insn (floater); - - continue; - } - else if (floater - && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) - { - rtx temp; - /* Emit the new_jump instruction and delete the old anchor. */ - temp = emit_jump_insn_before (gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, PATTERN (anchor), - PATTERN (floater))), - anchor); - JUMP_LABEL (temp) = JUMP_LABEL (anchor); - PUT_CODE (anchor, NOTE); - NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (anchor) = 0; - - /* Emit a special USE insn for FLOATER, then delete - the floating insn. */ - emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); - delete_insn (floater); - continue; - } - } - } -} - -int -pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2) - rtx new, anchor, floater; - int reversed; - rtx dest, src1, src2; -{ - int insn_code_number; - rtx start, end; - - /* Create a PARALLEL with the patterns of ANCHOR and - FLOATER, try to recognize it, then test constraints - for the resulting pattern. - - If the pattern doesn't match or the constraints - aren't met keep searching for a suitable floater - insn. */ - XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor); - XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater); - INSN_CODE (new) = -1; - insn_code_number = recog_memoized (new); - if (insn_code_number < 0 - || !constrain_operands (insn_code_number, 1)) - return 0; - - if (reversed) - { - start = anchor; - end = floater; - } - else - { - start = floater; - end = anchor; - } - - /* There's up to three operands to consider. One - output and two inputs. - - The output must not be used between FLOATER & ANCHOR - exclusive. The inputs must not be set between - FLOATER and ANCHOR exclusive. */ - - if (reg_used_between_p (dest, start, end)) - return 0; - - if (reg_set_between_p (src1, start, end)) - return 0; - - if (reg_set_between_p (src2, start, end)) - return 0; - - /* If we get here, then everything is good. */ - return 1; -} - -/* Return nonzero if sets and references for INSN are delayed. - - Millicode insns are actually function calls with some special - constraints on arguments and register usage. - - Millicode calls always expect their arguments in the integer argument - registers, and always return their result in %r29 (ret1). They - are expected to clobber their arguments, %r1, %r29, and %r31 and - nothing else. - - By considering this effects delayed reorg reorg can put insns - which set the argument registers into the delay slot of the millicode - call -- thus they act more like traditional CALL_INSNs. - - get_attr_type will try to recognize the given insn, so make sure to - filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns - in particular. */ -int -insn_sets_and_refs_are_delayed (insn) - rtx insn; -{ - return ((GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) != SEQUENCE - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER - && get_attr_type (insn) == TYPE_MILLI)); -} |