diff options
author | YamaArashi <shadow962@live.com> | 2016-01-06 01:47:28 -0800 |
---|---|---|
committer | YamaArashi <shadow962@live.com> | 2016-01-06 01:47:28 -0800 |
commit | be8b04496302184c6e8f04d6179f9c3afc50aeb6 (patch) | |
tree | 726e2468c0c07add773c0dbd86ab6386844259ae /gcc/config/pa/pa.c |
initial commit
Diffstat (limited to 'gcc/config/pa/pa.c')
-rwxr-xr-x | gcc/config/pa/pa.c | 6491 |
1 files changed, 6491 insertions, 0 deletions
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c new file mode 100755 index 0000000..de7f698 --- /dev/null +++ b/gcc/config/pa/pa.c @@ -0,0 +1,6491 @@ +/* Subroutines for insn-output.c for HPPA. + Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc. + Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include "config.h" +#include "system.h" + +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "reload.h" +#include "c-tree.h" +#include "expr.h" +#include "obstack.h" +#include "toplev.h" + +static void restore_unscaled_index_insn_codes PROTO((rtx)); +static void record_unscaled_index_insn_codes PROTO((rtx)); +static void pa_combine_instructions PROTO((rtx)); +static int pa_can_combine_p PROTO((rtx, rtx, rtx, int, rtx, rtx, rtx)); +static int forward_branch_p PROTO((rtx)); +static int shadd_constant_p PROTO((int)); + +/* Save the operands last given to a compare for use when we + generate a scc or bcc insn. */ + +rtx hppa_compare_op0, hppa_compare_op1; +enum cmp_type hppa_branch_type; + +/* Which cpu we are scheduling for. */ +enum processor_type pa_cpu; + +/* String to hold which cpu we are scheduling for. */ +char *pa_cpu_string; + +/* Set by the FUNCTION_PROFILER macro. */ +int hp_profile_labelno; + +/* Counts for the number of callee-saved general and floating point + registers which were saved by the current function's prologue. */ +static int gr_saved, fr_saved; + +/* Whether or not the current function uses an out-of-line prologue + and epilogue. */ +static int out_of_line_prologue_epilogue; + +static rtx find_addr_reg (); + +/* Keep track of the number of bytes we have output in the CODE subspaces + during this compilation so we'll know when to emit inline long-calls. */ + +unsigned int total_code_bytes; + +/* Variables to handle plabels that we discover are necessary at assembly + output time. They are output after the current function. */ + +struct deferred_plabel +{ + rtx internal_label; + char *name; +} *deferred_plabels = 0; +int n_deferred_plabels = 0; + +/* Array indexed by INSN_UIDs holding the INSN_CODE of an insn which + uses an unscaled indexed address before delay slot scheduling. */ +static int *unscaled_index_insn_codes; + +/* Upper bound for the array. */ +static int max_unscaled_index_insn_codes_uid; + +void +override_options () +{ + /* Default to 7100 scheduling. If the 7100LC scheduling ever + gets reasonably tuned, it should be the default since that + what most PAs sold now are. */ + if (pa_cpu_string == NULL + || ! strcmp (pa_cpu_string, "7100")) + { + pa_cpu_string = "7100"; + pa_cpu = PROCESSOR_7100; + } + else if (! strcmp (pa_cpu_string, "700")) + { + pa_cpu_string = "700"; + pa_cpu = PROCESSOR_700; + } + else if (! strcmp (pa_cpu_string, "7100LC")) + { + pa_cpu_string = "7100LC"; + pa_cpu = PROCESSOR_7100LC; + } + else if (! strcmp (pa_cpu_string, "7200")) + { + pa_cpu_string = "7200"; + pa_cpu = PROCESSOR_7200; + } + /* CYGNUS LOCAL PA8000/law */ + else if (! strcmp (pa_cpu_string, "8000")) + { + pa_cpu_string = "8000"; + pa_cpu = PROCESSOR_8000; + } + else + { + warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC, 7200 and 8000\n", pa_cpu_string); + } + /* END CYGNUS LOCAL */ + + if (flag_pic && TARGET_PORTABLE_RUNTIME) + { + warning ("PIC code generation is not supported in the portable runtime model\n"); + } + + if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)) + { + warning ("PIC code generation is not compatible with fast indirect calls\n"); + } + + if (flag_pic && profile_flag) + { + warning ("PIC code generation is not compatible with profiling\n"); + } + + if (TARGET_SPACE && (flag_pic || profile_flag)) + { + warning ("Out of line entry/exit sequences are not compatible\n"); + warning ("with PIC or profiling\n"); + } + + if (! TARGET_GAS && write_symbols != NO_DEBUG) + { + warning ("-g is only supported when using GAS on this processor,"); + warning ("-g option disabled."); + write_symbols = NO_DEBUG; + } +} + + +/* Return non-zero only if OP is a register of mode MODE, + or CONST0_RTX. */ +int +reg_or_0_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (op == CONST0_RTX (mode) || register_operand (op, mode)); +} + +/* Return non-zero if OP is suitable for use in a call to a named + function. + + (???) For 2.5 try to eliminate either call_operand_address or + function_label_operand, they perform very similar functions. */ +int +call_operand_address (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME); +} + +/* Return 1 if X contains a symbolic expression. We know these + expressions will have one of a few well defined forms, so + we need only check those forms. */ +int +symbolic_expression_p (x) + register rtx x; +{ + + /* Strip off any HIGH. */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + return (symbolic_operand (x, VOIDmode)); +} + +int +symbolic_operand (op, mode) + register rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + case LABEL_REF: + return 1; + case CONST: + op = XEXP (op, 0); + return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF + || GET_CODE (XEXP (op, 0)) == LABEL_REF) + && GET_CODE (XEXP (op, 1)) == CONST_INT); + default: + return 0; + } +} + +/* Return truth value of statement that OP is a symbolic memory + operand of mode MODE. */ + +int +symbolic_memory_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) != MEM) + return 0; + op = XEXP (op, 0); + return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST + || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF); +} + +/* Return 1 if the operand is either a register or a memory operand that is + not symbolic. */ + +int +reg_or_nonsymb_mem_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + return 1; + + if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode)) + return 1; + + return 0; +} + +/* Return 1 if the operand is either a register, zero, or a memory operand + that is not symbolic. */ + +int +reg_or_0_or_nonsymb_mem_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + return 1; + + if (op == CONST0_RTX (mode)) + return 1; + + if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode)) + return 1; + + return 0; +} + +/* Accept any constant that can be moved in one instructions into a + general register. */ +int +cint_ok_for_move (intval) + HOST_WIDE_INT intval; +{ + /* OK if ldo, ldil, or zdepi, can be used. */ + return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0 + || zdepi_cint_p (intval)); +} + +/* Accept anything that can be moved in one instruction into a general + register. */ +int +move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONSTANT_P_RTX) + return 1; + + if (GET_CODE (op) == CONST_INT) + return cint_ok_for_move (INTVAL (op)); + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) != MEM) + return 0; + + op = XEXP (op, 0); + if (GET_CODE (op) == LO_SUM) + return (register_operand (XEXP (op, 0), Pmode) + && CONSTANT_P (XEXP (op, 1))); + + /* Since move_operand is only used for source operands, we can always + allow scaled indexing! */ + if (! TARGET_DISABLE_INDEXING + && GET_CODE (op) == PLUS + && ((GET_CODE (XEXP (op, 0)) == MULT + && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT + && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode) + && GET_CODE (XEXP (op, 1)) == REG) + || (GET_CODE (XEXP (op, 1)) == MULT + &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG + && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT + && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode) + && GET_CODE (XEXP (op, 0)) == REG))) + return 1; + + return memory_address_p (mode, op); +} + +/* Accept REG and any CONST_INT that can be moved in one instruction into a + general register. */ +int +reg_or_cint_move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT) + return cint_ok_for_move (INTVAL (op)); + + return 0; +} + +int +pic_label_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + if (!flag_pic) + return 0; + + switch (GET_CODE (op)) + { + case LABEL_REF: + return 1; + case CONST: + op = XEXP (op, 0); + return (GET_CODE (XEXP (op, 0)) == LABEL_REF + && GET_CODE (XEXP (op, 1)) == CONST_INT); + default: + return 0; + } +} + +int +fp_reg_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return reg_renumber && FP_REG_P (op); +} + + + +/* Return truth value of whether OP can be used as an operand in a + three operand arithmetic insn that accepts registers of mode MODE + or 14-bit signed integers. */ +int +arith_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT && INT_14_BITS (op))); +} + +/* Return truth value of whether OP can be used as an operand in a + three operand arithmetic insn that accepts registers of mode MODE + or 11-bit signed integers. */ +int +arith11_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT && INT_11_BITS (op))); +} + +/* A constant integer suitable for use in a PRE_MODIFY memory + reference. */ +int +pre_cint_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT + && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10); +} + +/* A constant integer suitable for use in a POST_MODIFY memory + reference. */ +int +post_cint_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT + && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10); +} + +int +arith_double_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_DOUBLE + && GET_MODE (op) == mode + && VAL_14_BITS_P (CONST_DOUBLE_LOW (op)) + && ((CONST_DOUBLE_HIGH (op) >= 0) + == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0)))); +} + +/* Return truth value of whether OP is a integer which fits the + range constraining immediate operands in three-address insns, or + is an integer register. */ + +int +ireg_or_int5_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op)) + || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32)); +} + +/* Return truth value of whether OP is a integer which fits the + range constraining immediate operands in three-address insns. */ + +int +int5_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT && INT_5_BITS (op)); +} + +int +uint5_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op)); +} + +int +int11_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT && INT_11_BITS (op)); +} + +int +uint32_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ +#if HOST_BITS_PER_WIDE_INT > 32 + /* All allowed constants will fit a CONST_INT. */ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L)); +#else + return (GET_CODE (op) == CONST_INT + || (GET_CODE (op) == CONST_DOUBLE + && CONST_DOUBLE_HIGH (op) == 0)); +#endif +} + +int +arith5_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return register_operand (op, mode) || int5_operand (op, mode); +} + +/* True iff zdepi can be used to generate this CONST_INT. */ +int +zdepi_cint_p (x) + unsigned HOST_WIDE_INT x; +{ + unsigned HOST_WIDE_INT lsb_mask, t; + + /* This might not be obvious, but it's at least fast. + This function is critical; we don't have the time loops would take. */ + lsb_mask = x & -x; + t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); + /* Return true iff t is a power of two. */ + return ((t & (t - 1)) == 0); +} + +/* True iff depi or extru can be used to compute (reg & mask). + Accept bit pattern like these: + 0....01....1 + 1....10....0 + 1..10..01..1 */ +int +and_mask_p (mask) + unsigned HOST_WIDE_INT mask; +{ + mask = ~mask; + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* True iff depi or extru can be used to compute (reg & OP). */ +int +and_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (register_operand (op, mode) + || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op)))); +} + +/* True iff depi can be used to compute (reg | MASK). */ +int +ior_mask_p (mask) + unsigned HOST_WIDE_INT mask; +{ + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* True iff depi can be used to compute (reg | OP). */ +int +ior_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op))); +} + +int +lhs_lshift_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode); +} + +/* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx. + Such values can be the left hand side x in (x << r), using the zvdepi + instruction. */ +int +lhs_lshift_cint_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + unsigned HOST_WIDE_INT x; + if (GET_CODE (op) != CONST_INT) + return 0; + x = INTVAL (op) >> 4; + return (x & (x + 1)) == 0; +} + +int +arith32_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return register_operand (op, mode) || GET_CODE (op) == CONST_INT; +} + +int +pc_or_label_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF); +} + +/* Legitimize PIC addresses. If the address is already + position-independent, we return ORIG. Newly generated + position-independent addresses go to REG. If we need more + than one register, we lose. */ + +rtx +legitimize_pic_address (orig, mode, reg) + rtx orig, reg; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + rtx pic_ref = orig; + + /* Labels need special handling. */ + if (pic_label_operand (orig)) + { + emit_insn (gen_pic_load_label (reg, orig)); + current_function_uses_pic_offset_table = 1; + return reg; + } + if (GET_CODE (orig) == SYMBOL_REF) + { + if (reg == 0) + abort (); + + if (flag_pic == 2) + { + emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig)); + pic_ref + = gen_rtx_MEM (Pmode, + gen_rtx_LO_SUM (Pmode, reg, + gen_rtx_UNSPEC (SImode, + gen_rtvec (1, orig), + 0))); + } + else + pic_ref = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, orig)); + current_function_uses_pic_offset_table = 1; + RTX_UNCHANGING_P (pic_ref) = 1; + emit_move_insn (reg, pic_ref); + return reg; + } + else if (GET_CODE (orig) == CONST) + { + rtx base; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) + return orig; + + if (reg == 0) + abort (); + + if (GET_CODE (XEXP (orig, 0)) == PLUS) + { + base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); + orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, + base == reg ? 0 : reg); + } + else abort (); + if (GET_CODE (orig) == CONST_INT) + { + if (INT_14_BITS (orig)) + return plus_constant_for_output (base, INTVAL (orig)); + orig = force_reg (Pmode, orig); + } + pic_ref = gen_rtx_PLUS (Pmode, base, orig); + /* Likewise, should we set special REG_NOTEs here? */ + } + return pic_ref; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + MODE and WIN are passed so that this macro can use + GO_IF_LEGITIMATE_ADDRESS. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the PA, transform: + + memory(X + <large int>) + + into: + + if (<large int> & mask) >= 16 + Y = (<large int> & ~mask) + mask + 1 Round up. + else + Y = (<large int> & ~mask) Round down. + Z = X + Y + memory (Z + (<large int> - Y)); + + This is for CSE to find several similar references, and only use one Z. + + X can either be a SYMBOL_REF or REG, but because combine can not + perform a 4->2 combination we do nothing for SYMBOL_REF + D where + D will not fit in 14 bits. + + MODE_FLOAT references allow displacements which fit in 5 bits, so use + 0x1f as the mask. + + MODE_INT references allow displacements which fit in 14 bits, so use + 0x3fff as the mask. + + This relies on the fact that most mode MODE_FLOAT references will use FP + registers and most mode MODE_INT references will use integer registers. + (In the rare case of an FP register used in an integer MODE, we depend + on secondary reloads to clean things up.) + + + It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special + manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed + addressing modes to be used). + + Put X and Z into registers. Then put the entire expression into + a register. */ + +rtx +hppa_legitimize_address (x, oldx, mode) + rtx x, oldx ATTRIBUTE_UNUSED; + enum machine_mode mode; +{ + rtx orig = x; + + if (flag_pic) + return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); + + /* Strip off CONST. */ + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + + /* Special case. Get the SYMBOL_REF into a register and use indexing. + That should always be safe. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) + { + rtx reg = force_reg (SImode, XEXP (x, 1)); + return force_reg (SImode, gen_rtx_PLUS (SImode, reg, XEXP (x, 0))); + } + + /* Note we must reject symbols which represent function addresses + since the assembler/linker can't handle arithmetic on plabels. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == CONST_INT + && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) + || GET_CODE (XEXP (x, 0)) == REG)) + { + rtx int_part, ptr_reg; + int newoffset; + int offset = INTVAL (XEXP (x, 1)); + int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff; + + /* CYGNUS LOCAL pa8000/law */ + mask = (GET_MODE_CLASS (mode) == MODE_FLOAT + ? (TARGET_PARISC_2_0 ? 0x3fff : 0x1f) : 0x3fff); + /* END CYGNUS LOCAL */ + + /* Choose which way to round the offset. Round up if we + are >= halfway to the next boundary. */ + if ((offset & mask) >= ((mask + 1) / 2)) + newoffset = (offset & ~ mask) + mask + 1; + else + newoffset = (offset & ~ mask); + + /* If the newoffset will not fit in 14 bits (ldo), then + handling this would take 4 or 5 instructions (2 to load + the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to + add the new offset and the SYMBOL_REF.) Combine can + not handle 4->2 or 5->2 combinations, so do not create + them. */ + if (! VAL_14_BITS_P (newoffset) + && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) + { + rtx const_part + = gen_rtx_CONST (VOIDmode, gen_rtx_PLUS (Pmode, + XEXP (x, 0), + GEN_INT (newoffset))); + rtx tmp_reg + = force_reg (Pmode, + gen_rtx_HIGH (Pmode, const_part)); + ptr_reg + = force_reg (Pmode, + gen_rtx_LO_SUM (Pmode, tmp_reg, const_part)); + } + else + { + if (! VAL_14_BITS_P (newoffset)) + int_part = force_reg (Pmode, GEN_INT (newoffset)); + else + int_part = GEN_INT (newoffset); + + ptr_reg = force_reg (Pmode, + gen_rtx_PLUS (Pmode, + force_reg (Pmode, XEXP (x, 0)), + int_part)); + } + return plus_constant (ptr_reg, offset - newoffset); + } + + /* Handle (plus (mult (a) (shadd_constant)) (b)). */ + + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) + && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o' + || GET_CODE (XEXP (x, 1)) == SUBREG) + && GET_CODE (XEXP (x, 1)) != CONST) + { + int val = INTVAL (XEXP (XEXP (x, 0), 1)); + rtx reg1, reg2; + + reg1 = XEXP (x, 1); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg2, + GEN_INT (val)), + reg1)); + } + + /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). + + Only do so for floating point modes since this is more speculative + and we lose if it's an integer store. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) + && (mode == SFmode || mode == DFmode)) + { + + /* First, try and figure out what to use as a base register. */ + rtx reg1, reg2, base, idx, orig_base; + + reg1 = XEXP (XEXP (x, 0), 1); + reg2 = XEXP (x, 1); + base = NULL_RTX; + idx = NULL_RTX; + + /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], + then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll + know it's a base register below. */ + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + /* Figure out what the base and index are. */ + + if (GET_CODE (reg1) == REG + && REGNO_POINTER_FLAG (REGNO (reg1))) + { + base = reg1; + orig_base = XEXP (XEXP (x, 0), 1); + idx = gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, + XEXP (XEXP (XEXP (x, 0), 0), 0), + XEXP (XEXP (XEXP (x, 0), 0), 1)), + XEXP (x, 1)); + } + else if (GET_CODE (reg2) == REG + && REGNO_POINTER_FLAG (REGNO (reg2))) + { + base = reg2; + orig_base = XEXP (x, 1); + idx = XEXP (x, 0); + } + + if (base == 0) + return orig; + + /* If the index adds a large constant, try to scale the + constant so that it can be loaded with only one insn. */ + if (GET_CODE (XEXP (idx, 1)) == CONST_INT + && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) + / INTVAL (XEXP (XEXP (idx, 0), 1))) + && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) + { + /* Divide the CONST_INT by the scale factor, then add it to A. */ + int val = INTVAL (XEXP (idx, 1)); + + val /= INTVAL (XEXP (XEXP (idx, 0), 1)); + reg1 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); + + /* We can now generate a simple scaled indexed address. */ + return force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg1, + XEXP (XEXP (idx, 0), 1)), + base)); + } + + /* If B + C is still a valid base register, then add them. */ + if (GET_CODE (XEXP (idx, 1)) == CONST_INT + && INTVAL (XEXP (idx, 1)) <= 4096 + && INTVAL (XEXP (idx, 1)) >= -4096) + { + int val = INTVAL (XEXP (XEXP (idx, 0), 1)); + rtx reg1, reg2; + + reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); + + reg2 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg2) != CONST_INT) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg2, + GEN_INT (val)), + reg1)); + } + + /* Get the index into a register, then add the base + index and + return a register holding the result. */ + + /* First get A into a register. */ + reg1 = XEXP (XEXP (idx, 0), 0); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + /* And get B into a register. */ + reg2 = XEXP (idx, 1); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + reg1 = force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg1, + XEXP (XEXP (idx, 0), 1)), + reg2)); + + /* Add the result to our base register and return. */ + return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); + + } + + /* Uh-oh. We might have an address for x[n-100000]. This needs + special handling to avoid creating an indexed memory address + with x-100000 as the base. + + If the constant part is small enough, then it's still safe because + there is a guard page at the beginning and end of the data segment. + + Scaled references are common enough that we want to try and rearrange the + terms so that we can use indexing for these addresses too. Only + do the optimization for floatint point modes. */ + + if (GET_CODE (x) == PLUS + && symbolic_expression_p (XEXP (x, 1))) + { + /* Ugly. We modify things here so that the address offset specified + by the index expression is computed first, then added to x to form + the entire address. */ + + rtx regx1, regx2, regy1, regy2, y; + + /* Strip off any CONST. */ + y = XEXP (x, 1); + if (GET_CODE (y) == CONST) + y = XEXP (y, 0); + + if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) + { + /* See if this looks like + (plus (mult (reg) (shadd_const)) + (const (plus (symbol_ref) (const_int)))) + + Where const_int is small. In that case the const + expression is a valid pointer for indexing. + + If const_int is big, but can be divided evenly by shadd_const + and added to (reg). This allows more scaled indexed addresses. */ + if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF + && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) >= -4096 + && INTVAL (XEXP (y, 1)) <= 4095 + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) + { + int val = INTVAL (XEXP (XEXP (x, 0), 1)); + rtx reg1, reg2; + + reg1 = XEXP (x, 1); + if (GET_CODE (reg1) != REG) + reg1 = force_reg (Pmode, force_operand (reg1, 0)); + + reg2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (reg2) != REG) + reg2 = force_reg (Pmode, force_operand (reg2, 0)); + + return force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, reg2, + GEN_INT (val)), + reg1)); + } + else if ((mode == DFmode || mode == SFmode) + && GET_CODE (XEXP (y, 0)) == SYMBOL_REF + && GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) + { + regx1 + = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) + / INTVAL (XEXP (XEXP (x, 0), 1)))); + regx2 = XEXP (XEXP (x, 0), 0); + if (GET_CODE (regx2) != REG) + regx2 = force_reg (Pmode, force_operand (regx2, 0)); + regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, + regx2, regx1)); + return force_reg (Pmode, + gen_rtx_PLUS (Pmode, + gen_rtx_MULT (Pmode, regx2, + XEXP (XEXP (x, 0), + 1)), + force_reg (Pmode, XEXP (y, 0)))); + } + else if (GET_CODE (XEXP (y, 1)) == CONST_INT + && INTVAL (XEXP (y, 1)) >= -4096 + && INTVAL (XEXP (y, 1)) <= 4095) + { + /* This is safe because of the guard page at the + beginning and end of the data space. Just + return the original address. */ + return orig; + } + else + { + /* Doesn't look like one we can optimize. */ + regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); + regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); + regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); + regx1 = force_reg (Pmode, + gen_rtx_fmt_ee (GET_CODE (y), Pmode, + regx1, regy2)); + return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); + } + } + } + + return orig; +} + +/* For the HPPA, REG and REG+CONST is cost 0 + and addresses involving symbolic constants are cost 2. + + PIC addresses are very expensive. + + It is no coincidence that this has the same structure + as GO_IF_LEGITIMATE_ADDRESS. */ +int +hppa_address_cost (X) + rtx X; +{ + if (GET_CODE (X) == PLUS) + return 1; + else if (GET_CODE (X) == LO_SUM) + return 1; + else if (GET_CODE (X) == HIGH) + return 2; + return 4; +} + +/* Emit insns to move operands[1] into operands[0]. + + Return 1 if we have written out everything that needs to be done to + do the move. Otherwise, return 0 and the caller will emit the move + normally. */ + +int +emit_move_sequence (operands, mode, scratch_reg) + rtx *operands; + enum machine_mode mode; + rtx scratch_reg; +{ + register rtx operand0 = operands[0]; + register rtx operand1 = operands[1]; + register rtx tem; + + if (scratch_reg + && reload_in_progress && GET_CODE (operand0) == REG + && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + operand0 = reg_equiv_mem[REGNO (operand0)]; + else if (scratch_reg + && reload_in_progress && GET_CODE (operand0) == SUBREG + && GET_CODE (SUBREG_REG (operand0)) == REG + && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) + { + SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))]; + operand0 = alter_subreg (operand0); + } + + if (scratch_reg + && reload_in_progress && GET_CODE (operand1) == REG + && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) + operand1 = reg_equiv_mem[REGNO (operand1)]; + else if (scratch_reg + && reload_in_progress && GET_CODE (operand1) == SUBREG + && GET_CODE (SUBREG_REG (operand1)) == REG + && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) + { + SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))]; + operand1 = alter_subreg (operand1); + } + + if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM + && ((tem = find_replacement (&XEXP (operand0, 0))) + != XEXP (operand0, 0))) + operand0 = gen_rtx_MEM (GET_MODE (operand0), tem); + if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM + && ((tem = find_replacement (&XEXP (operand1, 0))) + != XEXP (operand1, 0))) + operand1 = gen_rtx_MEM (GET_MODE (operand1), tem); + + /* Handle secondary reloads for loads/stores of FP registers from + REG+D addresses where D does not fit in 5 bits, including + (subreg (mem (addr))) cases. */ + if (fp_reg_operand (operand0, mode) + && ((GET_CODE (operand1) == MEM + && ! memory_address_p (DFmode, XEXP (operand1, 0))) + || ((GET_CODE (operand1) == SUBREG + && GET_CODE (XEXP (operand1, 0)) == MEM + && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0))))) + && scratch_reg) + { + if (GET_CODE (operand1) == SUBREG) + operand1 = XEXP (operand1, 0); + + scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg)); + + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (!memory_address_p (SImode, XEXP (operand1, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), + SImode, + XEXP (XEXP (operand1, 0), 0), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, XEXP (operand1, 0)); + emit_insn (gen_rtx_SET (VOIDmode, operand0, gen_rtx_MEM (mode, + scratch_reg))); + return 1; + } + else if (fp_reg_operand (operand1, mode) + && ((GET_CODE (operand0) == MEM + && ! memory_address_p (DFmode, XEXP (operand0, 0))) + || ((GET_CODE (operand0) == SUBREG) + && GET_CODE (XEXP (operand0, 0)) == MEM + && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0)))) + && scratch_reg) + { + if (GET_CODE (operand0) == SUBREG) + operand0 = XEXP (operand0, 0); + + scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg)); + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (!memory_address_p (SImode, XEXP (operand0, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, + 0)), + SImode, + XEXP (XEXP (operand0, 0), + 0), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, XEXP (operand0, 0)); + emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg), + operand1)); + return 1; + } + /* Handle secondary reloads for loads of FP registers from constant + expressions by forcing the constant into memory. + + use scratch_reg to hold the address of the memory location. + + ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return + NO_REGS when presented with a const_int and an register class + containing only FP registers. Doing so unfortunately creates + more problems than it solves. Fix this for 2.5. */ + else if (fp_reg_operand (operand0, mode) + && CONSTANT_P (operand1) + && scratch_reg) + { + rtx xoperands[2]; + + /* Force the constant into memory and put the address of the + memory location into scratch_reg. */ + xoperands[0] = scratch_reg; + xoperands[1] = XEXP (force_const_mem (mode, operand1), 0); + emit_move_sequence (xoperands, Pmode, 0); + + /* Now load the destination register. */ + emit_insn (gen_rtx_SET (mode, operand0, gen_rtx_MEM (mode, scratch_reg))); + return 1; + } + /* Handle secondary reloads for SAR. These occur when trying to load + the SAR from memory a FP register, or with a constant. */ + else if (GET_CODE (operand0) == REG + && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS + && (GET_CODE (operand1) == MEM + || GET_CODE (operand1) == CONST_INT + || (GET_CODE (operand1) == REG + && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))) + && scratch_reg) + { + /* D might not fit in 14 bits either; for such cases load D into + scratch reg. */ + if (GET_CODE (operand1) == MEM + && !memory_address_p (SImode, XEXP (operand1, 0))) + { + emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); + emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, + 0)), + SImode, + XEXP (XEXP (operand1, 0), + 0), + scratch_reg)); + emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand1), + scratch_reg)); + } + else + emit_move_insn (scratch_reg, operand1); + emit_move_insn (operand0, scratch_reg); + return 1; + } + /* Handle most common case: storing into a register. */ + else if (register_operand (operand0, mode)) + { + if (register_operand (operand1, mode) + || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1)) + || (operand1 == CONST0_RTX (mode)) + || (GET_CODE (operand1) == HIGH + && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) + /* Only `general_operands' can come here, so MEM is ok. */ + || GET_CODE (operand1) == MEM) + { + /* Run this case quickly. */ + emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); + return 1; + } + } + else if (GET_CODE (operand0) == MEM) + { + if (mode == DFmode && operand1 == CONST0_RTX (mode) + && !(reload_in_progress || reload_completed)) + { + rtx temp = gen_reg_rtx (DFmode); + + emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); + emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); + return 1; + } + if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) + { + /* Run this case quickly. */ + emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); + return 1; + } + if (! (reload_in_progress || reload_completed)) + { + operands[0] = validize_mem (operand0); + operands[1] = operand1 = force_reg (mode, operand1); + } + } + + /* Simplify the source if we need to. + Note we do have to handle function labels here, even though we do + not consider them legitimate constants. Loop optimizations can + call the emit_move_xxx with one as a source. */ + if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) + || function_label_operand (operand1, mode) + || (GET_CODE (operand1) == HIGH + && symbolic_operand (XEXP (operand1, 0), mode))) + { + int ishighonly = 0; + + if (GET_CODE (operand1) == HIGH) + { + ishighonly = 1; + operand1 = XEXP (operand1, 0); + } + if (symbolic_operand (operand1, mode)) + { + /* Argh. The assembler and linker can't handle arithmetic + involving plabels. + + So we force the plabel into memory, load operand0 from + the memory location, then add in the constant part. */ + if ((GET_CODE (operand1) == CONST + && GET_CODE (XEXP (operand1, 0)) == PLUS + && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode)) + || function_label_operand (operand1, mode)) + { + rtx temp, const_part; + + /* Figure out what (if any) scratch register to use. */ + if (reload_in_progress || reload_completed) + scratch_reg = scratch_reg ? scratch_reg : operand0; + else if (flag_pic) + scratch_reg = gen_reg_rtx (Pmode); + + if (GET_CODE (operand1) == CONST) + { + /* Save away the constant part of the expression. */ + const_part = XEXP (XEXP (operand1, 0), 1); + if (GET_CODE (const_part) != CONST_INT) + abort (); + + /* Force the function label into memory. */ + temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); + } + else + { + /* No constant part. */ + const_part = NULL_RTX; + + /* Force the function label into memory. */ + temp = force_const_mem (mode, operand1); + } + + + /* Get the address of the memory location. PIC-ify it if + necessary. */ + temp = XEXP (temp, 0); + if (flag_pic) + temp = legitimize_pic_address (temp, mode, scratch_reg); + + /* Put the address of the memory location into our destination + register. */ + operands[1] = temp; + emit_move_sequence (operands, mode, scratch_reg); + + /* Now load from the memory location into our destination + register. */ + operands[1] = gen_rtx_MEM (Pmode, operands[0]); + emit_move_sequence (operands, mode, scratch_reg); + + /* And add back in the constant part. */ + if (const_part != NULL_RTX) + expand_inc (operand0, const_part); + + return 1; + } + + if (flag_pic) + { + rtx temp; + + if (reload_in_progress || reload_completed) + temp = scratch_reg ? scratch_reg : operand0; + else + temp = gen_reg_rtx (Pmode); + + /* (const (plus (symbol) (const_int))) must be forced to + memory during/after reload if the const_int will not fit + in 14 bits. */ + if (GET_CODE (operand1) == CONST + && GET_CODE (XEXP (operand1, 0)) == PLUS + && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT + && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) + && (reload_completed || reload_in_progress) + && flag_pic) + { + operands[1] = force_const_mem (mode, operand1); + operands[1] = legitimize_pic_address (XEXP (operands[1], 0), + mode, temp); + emit_move_sequence (operands, mode, temp); + } + else + { + operands[1] = legitimize_pic_address (operand1, mode, temp); + emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); + } + } + /* On the HPPA, references to data space are supposed to use dp, + register 27, but showing it in the RTL inhibits various cse + and loop optimizations. */ + else + { + rtx temp, set; + + if (reload_in_progress || reload_completed) + temp = scratch_reg ? scratch_reg : operand0; + else + temp = gen_reg_rtx (mode); + + /* Loading a SYMBOL_REF into a register makes that register + safe to be used as the base in an indexed address. + + Don't mark hard registers though. That loses. */ + if (GET_CODE (operand0) == REG + && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + REGNO_POINTER_FLAG (REGNO (operand0)) = 1; + if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) + REGNO_POINTER_FLAG (REGNO (temp)) = 1; + if (ishighonly) + set = gen_rtx_SET (mode, operand0, temp); + else + set = gen_rtx_SET (VOIDmode, operand0, + gen_rtx_LO_SUM (mode, temp, operand1)); + + emit_insn (gen_rtx_SET (VOIDmode, + temp, + gen_rtx_HIGH (mode, operand1))); + emit_insn (set); + + } + return 1; + } + else if (GET_CODE (operand1) != CONST_INT + || ! cint_ok_for_move (INTVAL (operand1))) + { + rtx temp; + + if (reload_in_progress || reload_completed) + temp = operand0; + else + temp = gen_reg_rtx (mode); + + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_HIGH (mode, operand1))); + operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); + } + } + /* Now have insn-emit do whatever it normally does. */ + return 0; +} + +/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning + it will need a link/runtime reloc). */ + +int +reloc_needed (exp) + tree exp; +{ + int reloc = 0; + + switch (TREE_CODE (exp)) + { + case ADDR_EXPR: + return 1; + + case PLUS_EXPR: + case MINUS_EXPR: + reloc = reloc_needed (TREE_OPERAND (exp, 0)); + reloc |= reloc_needed (TREE_OPERAND (exp, 1)); + break; + + case NOP_EXPR: + case CONVERT_EXPR: + case NON_LVALUE_EXPR: + reloc = reloc_needed (TREE_OPERAND (exp, 0)); + break; + + case CONSTRUCTOR: + { + register tree link; + for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link)) + if (TREE_VALUE (link) != 0) + reloc |= reloc_needed (TREE_VALUE (link)); + } + break; + + case ERROR_MARK: + break; + + default: + break; + } + return reloc; +} + +/* Does operand (which is a symbolic_operand) live in text space? If + so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */ + +int +read_only_operand (operand) + rtx operand; +{ + if (GET_CODE (operand) == CONST) + operand = XEXP (XEXP (operand, 0), 0); + if (flag_pic) + { + if (GET_CODE (operand) == SYMBOL_REF) + return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand); + } + else + { + if (GET_CODE (operand) == SYMBOL_REF) + return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand); + } + return 1; +} + + +/* Return the best assembler insn template + for moving operands[1] into operands[0] as a fullword. */ +char * +singlemove_string (operands) + rtx *operands; +{ + HOST_WIDE_INT intval; + + if (GET_CODE (operands[0]) == MEM) + return "stw %r1,%0"; + if (GET_CODE (operands[1]) == MEM) + return "ldw %1,%0"; + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + long i; + REAL_VALUE_TYPE d; + + if (GET_MODE (operands[1]) != SFmode) + abort (); + + /* Translate the CONST_DOUBLE to a CONST_INT with the same target + bit pattern. */ + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (d, i); + + operands[1] = GEN_INT (i); + /* Fall through to CONST_INT case. */ + } + if (GET_CODE (operands[1]) == CONST_INT) + { + intval = INTVAL (operands[1]); + + if (VAL_14_BITS_P (intval)) + return "ldi %1,%0"; + else if ((intval & 0x7ff) == 0) + return "ldil L'%1,%0"; + else if (zdepi_cint_p (intval)) + return "zdepi %Z1,%0"; + else + return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; + } + return "copy %1,%0"; +} + + +/* Compute position (in OP[1]) and width (in OP[2]) + useful for copying IMM to a register using the zdepi + instructions. Store the immediate value to insert in OP[0]. */ +void +compute_zdepi_operands (imm, op) + unsigned HOST_WIDE_INT imm; + unsigned *op; +{ + int lsb, len; + + /* Find the least significant set bit in IMM. */ + for (lsb = 0; lsb < 32; lsb++) + { + if ((imm & 1) != 0) + break; + imm >>= 1; + } + + /* Choose variants based on *sign* of the 5-bit field. */ + if ((imm & 0x10) == 0) + len = (lsb <= 28) ? 4 : 32 - lsb; + else + { + /* Find the width of the bitstring in IMM. */ + for (len = 5; len < 32; len++) + { + if ((imm & (1 << len)) == 0) + break; + } + + /* Sign extend IMM as a 5-bit value. */ + imm = (imm & 0xf) - 0x10; + } + + op[0] = imm; + op[1] = 31 - lsb; + op[2] = len; +} + +/* Output assembler code to perform a doubleword move insn + with operands OPERANDS. */ + +char * +output_move_double (operands) + rtx *operands; +{ + enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; + rtx latehalf[2]; + rtx addreg0 = 0, addreg1 = 0; + + /* First classify both operands. */ + + if (REG_P (operands[0])) + optype0 = REGOP; + else if (offsettable_memref_p (operands[0])) + optype0 = OFFSOP; + else if (GET_CODE (operands[0]) == MEM) + optype0 = MEMOP; + else + optype0 = RNDOP; + + if (REG_P (operands[1])) + optype1 = REGOP; + else if (CONSTANT_P (operands[1])) + optype1 = CNSTOP; + else if (offsettable_memref_p (operands[1])) + optype1 = OFFSOP; + else if (GET_CODE (operands[1]) == MEM) + optype1 = MEMOP; + else + optype1 = RNDOP; + + /* Check for the cases that the operand constraints are not + supposed to allow to happen. Abort if we get one, + because generating code for these cases is painful. */ + + if (optype0 != REGOP && optype1 != REGOP) + abort (); + + /* Handle auto decrementing and incrementing loads and stores + specifically, since the structure of the function doesn't work + for them without major modification. Do it better when we learn + this port about the general inc/dec addressing of PA. + (This was written by tege. Chide him if it doesn't work.) */ + + if (optype0 == MEMOP) + { + /* We have to output the address syntax ourselves, since print_operand + doesn't deal with the addresses we want to use. Fix this later. */ + + rtx addr = XEXP (operands[0], 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); + + operands[0] = XEXP (addr, 0); + if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG) + abort (); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == POST_INC) + return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)"; + return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)"; + } + else + abort(); + } + else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); + + operands[0] = XEXP (addr, 0); + if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG) + abort (); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == PRE_INC) + return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)"; + return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)"; + } + else + abort(); + } + } + if (optype1 == MEMOP) + { + /* We have to output the address syntax ourselves, since print_operand + doesn't deal with the addresses we want to use. Fix this later. */ + + rtx addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + operands[1] = XEXP (addr, 0); + if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG) + abort (); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == POST_INC) + return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0"; + return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0"; + } + else + { + /* This is an undefined situation. We should load into the + address register *and* update that register. Probably + we don't need to handle this at all. */ + if (GET_CODE (addr) == POST_INC) + return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0"; + return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0"; + } + } + else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + operands[1] = XEXP (addr, 0); + if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG) + abort (); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + /* No overlap between high target register and address + register. (We do this in a non-obvious way to + save a register file writeback) */ + if (GET_CODE (addr) == PRE_INC) + return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0"; + return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0"; + } + else + { + /* This is an undefined situation. We should load into the + address register *and* update that register. Probably + we don't need to handle this at all. */ + if (GET_CODE (addr) == PRE_INC) + return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0"; + return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0"; + } + } + else if (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == MULT) + { + rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); + + if (!reg_overlap_mentioned_p (high_reg, addr)) + { + rtx xoperands[3]; + + xoperands[0] = high_reg; + xoperands[1] = XEXP (addr, 1); + xoperands[2] = XEXP (XEXP (addr, 0), 0); + xoperands[3] = XEXP (XEXP (addr, 0), 1); + output_asm_insn ("sh%O3addl %2,%1,%0", xoperands); + return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0"; + } + else + { + rtx xoperands[3]; + + xoperands[0] = high_reg; + xoperands[1] = XEXP (addr, 1); + xoperands[2] = XEXP (XEXP (addr, 0), 0); + xoperands[3] = XEXP (XEXP (addr, 0), 1); + output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands); + return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0"; + } + + } + } + + /* If an operand is an unoffsettable memory ref, find a register + we can increment temporarily to make it refer to the second word. */ + + if (optype0 == MEMOP) + addreg0 = find_addr_reg (XEXP (operands[0], 0)); + + if (optype1 == MEMOP) + addreg1 = find_addr_reg (XEXP (operands[1], 0)); + + /* Ok, we can do one word at a time. + Normally we do the low-numbered word first. + + In either case, set up in LATEHALF the operands to use + for the high-numbered word and in some cases alter the + operands in OPERANDS to be suitable for the low-numbered word. */ + + if (optype0 == REGOP) + latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + else if (optype0 == OFFSOP) + latehalf[0] = adj_offsettable_operand (operands[0], 4); + else + latehalf[0] = operands[0]; + + if (optype1 == REGOP) + latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); + else if (optype1 == OFFSOP) + latehalf[1] = adj_offsettable_operand (operands[1], 4); + else if (optype1 == CNSTOP) + split_double (operands[1], &operands[1], &latehalf[1]); + else + latehalf[1] = operands[1]; + + /* If the first move would clobber the source of the second one, + do them in the other order. + + This can happen in two cases: + + mem -> register where the first half of the destination register + is the same register used in the memory's address. Reload + can create such insns. + + mem in this case will be either register indirect or register + indirect plus a valid offset. + + register -> register move where REGNO(dst) == REGNO(src + 1) + someone (Tim/Tege?) claimed this can happen for parameter loads. + + Handle mem -> register case first. */ + if (optype0 == REGOP + && (optype1 == MEMOP || optype1 == OFFSOP) + && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, + operands[1], 0)) + { + /* Do the late half first. */ + if (addreg1) + output_asm_insn ("ldo 4(%0),%0", &addreg1); + output_asm_insn (singlemove_string (latehalf), latehalf); + + /* Then clobber. */ + if (addreg1) + output_asm_insn ("ldo -4(%0),%0", &addreg1); + return singlemove_string (operands); + } + + /* Now handle register -> register case. */ + if (optype0 == REGOP && optype1 == REGOP + && REGNO (operands[0]) == REGNO (operands[1]) + 1) + { + output_asm_insn (singlemove_string (latehalf), latehalf); + return singlemove_string (operands); + } + + /* Normal case: do the two words, low-numbered first. */ + + output_asm_insn (singlemove_string (operands), operands); + + /* Make any unoffsettable addresses point at high-numbered word. */ + if (addreg0) + output_asm_insn ("ldo 4(%0),%0", &addreg0); + if (addreg1) + output_asm_insn ("ldo 4(%0),%0", &addreg1); + + /* Do that word. */ + output_asm_insn (singlemove_string (latehalf), latehalf); + + /* Undo the adds we just did. */ + if (addreg0) + output_asm_insn ("ldo -4(%0),%0", &addreg0); + if (addreg1) + output_asm_insn ("ldo -4(%0),%0", &addreg1); + + return ""; +} + +char * +output_fp_move_double (operands) + rtx *operands; +{ + if (FP_REG_P (operands[0])) + { + if (FP_REG_P (operands[1]) + || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) + output_asm_insn ("fcpy,dbl %r1,%0", operands); + else + output_asm_insn ("fldd%F1 %1,%0", operands); + } + else if (FP_REG_P (operands[1])) + { + output_asm_insn ("fstd%F0 %1,%0", operands); + } + else if (operands[1] == CONST0_RTX (GET_MODE (operands[0]))) + { + if (GET_CODE (operands[0]) == REG) + { + rtx xoperands[2]; + xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + xoperands[0] = operands[0]; + output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); + } + /* This is a pain. You have to be prepared to deal with an + arbitrary address here including pre/post increment/decrement. + + so avoid this in the MD. */ + else + abort (); + } + else abort (); + return ""; +} + +/* Return a REG that occurs in ADDR with coefficient 1. + ADDR can be effectively incremented by incrementing REG. */ + +static rtx +find_addr_reg (addr) + rtx addr; +{ + while (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 0)) == REG) + addr = XEXP (addr, 0); + else if (GET_CODE (XEXP (addr, 1)) == REG) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 0))) + addr = XEXP (addr, 1); + else if (CONSTANT_P (XEXP (addr, 1))) + addr = XEXP (addr, 0); + else + abort (); + } + if (GET_CODE (addr) == REG) + return addr; + abort (); +} + +/* Emit code to perform a block move. + + OPERANDS[0] is the destination pointer as a REG, clobbered. + OPERANDS[1] is the source pointer as a REG, clobbered. + OPERANDS[2] is a register for temporary storage. + OPERANDS[4] is the size as a CONST_INT + OPERANDS[3] is a register for temporary storage. + OPERANDS[5] is the alignment safe to use, as a CONST_INT. + OPERANDS[6] is another temporary register. */ + +char * +output_block_move (operands, size_is_constant) + rtx *operands; + int size_is_constant ATTRIBUTE_UNUSED; +{ + int align = INTVAL (operands[5]); + unsigned long n_bytes = INTVAL (operands[4]); + + /* We can't move more than four bytes at a time because the PA + has no longer integer move insns. (Could use fp mem ops?) */ + if (align > 4) + align = 4; + + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) + { + case 4: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldws,ma 4(0,%1),%3", operands); + output_asm_insn ("ldws,ma 4(0,%1),%6", operands); + output_asm_insn ("stws,ma %3,4(0,%0)", operands); + output_asm_insn ("addib,>= -8,%2,.-12", operands); + output_asm_insn ("stws,ma %6,4(0,%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[4] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) + output_asm_insn ("ldws,ma 4(0,%1),%3", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("ldw 0(0,%1),%6", operands); + if (n_bytes % 8 >= 4) + output_asm_insn ("stws,ma %3,4(0,%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("stbys,e %6,%4(0,%0)", operands); + } + return ""; + + case 2: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); + output_asm_insn ("ldhs,ma 2(0,%1),%6", operands); + output_asm_insn ("sths,ma %3,2(0,%0)", operands); + output_asm_insn ("addib,>= -4,%2,.-12", operands); + output_asm_insn ("sths,ma %6,2(0,%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) + output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("ldb 0(0,%1),%6", operands); + if (n_bytes % 4 >= 2) + output_asm_insn ("sths,ma %3,2(0,%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %6,0(0,%0)", operands); + } + return ""; + + case 1: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldbs,ma 1(0,%1),%3", operands); + output_asm_insn ("ldbs,ma 1(0,%1),%6", operands); + output_asm_insn ("stbs,ma %3,1(0,%0)", operands); + output_asm_insn ("addib,>= -2,%2,.-12", operands); + output_asm_insn ("stbs,ma %6,1(0,%0)", operands); + + /* Handle the residual. */ + if (n_bytes % 2 != 0) + { + output_asm_insn ("ldb 0(0,%1),%3", operands); + output_asm_insn ("stb %3,0(0,%0)", operands); + } + return ""; + + default: + abort (); + } +} + +/* Count the number of insns necessary to handle this block move. + + Basic structure is the same as emit_block_move, except that we + count insns rather than emit them. */ + +int +compute_movstrsi_length (insn) + rtx insn; +{ + rtx pat = PATTERN (insn); + int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0)); + unsigned int n_insns = 0; + + /* We can't move more than four bytes at a time because the PA + has no longer integer move insns. (Could use fp mem ops?) */ + if (align > 4) + align = 4; + + /* The basic copying loop. */ + n_insns = 6; + + /* Residuals. */ + if (n_bytes % (2 * align) != 0) + { + if ((n_bytes % (2 * align)) >= align) + n_insns += 2; + + if ((n_bytes % align) != 0) + n_insns += 2; + } + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; +} + + +char * +output_and (operands) + rtx *operands; +{ + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) + { + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int ls0, ls1, ms0, p, len; + + for (ls0 = 0; ls0 < 32; ls0++) + if ((mask & (1 << ls0)) == 0) + break; + + for (ls1 = ls0; ls1 < 32; ls1++) + if ((mask & (1 << ls1)) != 0) + break; + + for (ms0 = ls1; ms0 < 32; ms0++) + if ((mask & (1 << ms0)) == 0) + break; + + if (ms0 != 32) + abort(); + + if (ls1 == 32) + { + len = ls0; + + if (len == 0) + abort (); + + operands[2] = GEN_INT (len); + return "extru %1,31,%2,%0"; + } + else + { + /* We could use this `depi' for the case above as well, but `depi' + requires one more register file access than an `extru'. */ + + p = 31 - ls0; + len = ls1 - ls0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "depi 0,%2,%3,%0"; + } + } + else + return "and %1,%2,%0"; +} + +char * +output_ior (operands) + rtx *operands; +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + int bs0, bs1, p, len; + + if (INTVAL (operands[2]) == 0) + return "copy %1,%0"; + + for (bs0 = 0; bs0 < 32; bs0++) + if ((mask & (1 << bs0)) != 0) + break; + + for (bs1 = bs0; bs1 < 32; bs1++) + if ((mask & (1 << bs1)) == 0) + break; + + if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask) + abort(); + + p = 31 - bs0; + len = bs1 - bs0; + + operands[2] = GEN_INT (p); + operands[3] = GEN_INT (len); + return "depi -1,%2,%3,%0"; +} + +/* Output an ascii string. */ +void +output_ascii (file, p, size) + FILE *file; + unsigned char *p; + int size; +{ + int i; + int chars_output; + unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ + + /* The HP assembler can only take strings of 256 characters at one + time. This is a limitation on input line length, *not* the + length of the string. Sigh. Even worse, it seems that the + restriction is in number of input characters (see \xnn & + \whatever). So we have to do this very carefully. */ + + fputs ("\t.STRING \"", file); + + chars_output = 0; + for (i = 0; i < size; i += 4) + { + int co = 0; + int io = 0; + for (io = 0, co = 0; io < MIN (4, size - i); io++) + { + register unsigned int c = p[i + io]; + + if (c == '\"' || c == '\\') + partial_output[co++] = '\\'; + if (c >= ' ' && c < 0177) + partial_output[co++] = c; + else + { + unsigned int hexd; + partial_output[co++] = '\\'; + partial_output[co++] = 'x'; + hexd = c / 16 - 0 + '0'; + if (hexd > '9') + hexd -= '9' - 'a' + 1; + partial_output[co++] = hexd; + hexd = c % 16 - 0 + '0'; + if (hexd > '9') + hexd -= '9' - 'a' + 1; + partial_output[co++] = hexd; + } + } + if (chars_output + co > 243) + { + fputs ("\"\n\t.STRING \"", file); + chars_output = 0; + } + fwrite (partial_output, 1, co, file); + chars_output += co; + co = 0; + } + fputs ("\"\n", file); +} + +/* Try to rewrite floating point comparisons & branches to avoid + useless add,tr insns. + + CHECK_NOTES is nonzero if we should examine REG_DEAD notes + to see if FPCC is dead. CHECK_NOTES is nonzero for the + first attempt to remove useless add,tr insns. It is zero + for the second pass as reorg sometimes leaves bogus REG_DEAD + notes lying around. + + When CHECK_NOTES is zero we can only eliminate add,tr insns + when there's a 1:1 correspondence between fcmp and ftest/fbranch + instructions. */ +void +remove_useless_addtr_insns (insns, check_notes) + rtx insns; + int check_notes; +{ + rtx insn; + static int pass = 0; + + /* This is fairly cheap, so always run it when optimizing. */ + if (optimize > 0) + { + int fcmp_count = 0; + int fbranch_count = 0; + + /* Walk all the insns in this function looking for fcmp & fbranch + instructions. Keep track of how many of each we find. */ + insns = get_insns (); + for (insn = insns; insn; insn = next_insn (insn)) + { + rtx tmp; + + /* Ignore anything that isn't an INSN or a JUMP_INSN. */ + if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) + continue; + + tmp = PATTERN (insn); + + /* It must be a set. */ + if (GET_CODE (tmp) != SET) + continue; + + /* If the destination is CCFP, then we've found an fcmp insn. */ + tmp = SET_DEST (tmp); + if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) + { + fcmp_count++; + continue; + } + + tmp = PATTERN (insn); + /* If this is an fbranch instruction, bump the fbranch counter. */ + if (GET_CODE (tmp) == SET + && SET_DEST (tmp) == pc_rtx + && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) + { + fbranch_count++; + continue; + } + } + + + /* Find all floating point compare + branch insns. If possible, + reverse the comparison & the branch to avoid add,tr insns. */ + for (insn = insns; insn; insn = next_insn (insn)) + { + rtx tmp, next; + + /* Ignore anything that isn't an INSN. */ + if (GET_CODE (insn) != INSN) + continue; + + tmp = PATTERN (insn); + + /* It must be a set. */ + if (GET_CODE (tmp) != SET) + continue; + + /* The destination must be CCFP, which is register zero. */ + tmp = SET_DEST (tmp); + if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) + continue; + + /* INSN should be a set of CCFP. + + See if the result of this insn is used in a reversed FP + conditional branch. If so, reverse our condition and + the branch. Doing so avoids useless add,tr insns. */ + next = next_insn (insn); + while (next) + { + /* Jumps, calls and labels stop our search. */ + if (GET_CODE (next) == JUMP_INSN + || GET_CODE (next) == CALL_INSN + || GET_CODE (next) == CODE_LABEL) + break; + + /* As does another fcmp insn. */ + if (GET_CODE (next) == INSN + && GET_CODE (PATTERN (next)) == SET + && GET_CODE (SET_DEST (PATTERN (next))) == REG + && REGNO (SET_DEST (PATTERN (next))) == 0) + break; + + next = next_insn (next); + } + + /* Is NEXT_INSN a branch? */ + if (next + && GET_CODE (next) == JUMP_INSN) + { + rtx pattern = PATTERN (next); + + /* If it a reversed fp conditional branch (eg uses add,tr) + and CCFP dies, then reverse our conditional and the branch + to avoid the add,tr. */ + if (GET_CODE (pattern) == SET + && SET_DEST (pattern) == pc_rtx + && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 + && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC + && (fcmp_count == fbranch_count + || (check_notes + && find_regno_note (next, REG_DEAD, 0)))) + { + /* Reverse the branch. */ + tmp = XEXP (SET_SRC (pattern), 1); + XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); + XEXP (SET_SRC (pattern), 2) = tmp; + INSN_CODE (next) = -1; + + /* Reverse our condition. */ + tmp = PATTERN (insn); + PUT_CODE (XEXP (tmp, 1), + reverse_condition (GET_CODE (XEXP (tmp, 1)))); + } + } + } + } + + pass = !pass; + +} + +/* You may have trouble believing this, but this is the HP-PA stack + layout. Wow. + + Offset Contents + + Variable arguments (optional; any number may be allocated) + + SP-(4*(N+9)) arg word N + : : + SP-56 arg word 5 + SP-52 arg word 4 + + Fixed arguments (must be allocated; may remain unused) + + SP-48 arg word 3 + SP-44 arg word 2 + SP-40 arg word 1 + SP-36 arg word 0 + + Frame Marker + + SP-32 External Data Pointer (DP) + SP-28 External sr4 + SP-24 External/stub RP (RP') + SP-20 Current RP + SP-16 Static Link + SP-12 Clean up + SP-8 Calling Stub RP (RP'') + SP-4 Previous SP + + Top of Frame + + SP-0 Stack Pointer (points to next available address) + +*/ + +/* This function saves registers as follows. Registers marked with ' are + this function's registers (as opposed to the previous function's). + If a frame_pointer isn't needed, r4 is saved as a general register; + the space for the frame pointer is still allocated, though, to keep + things simple. + + + Top of Frame + + SP (FP') Previous FP + SP + 4 Alignment filler (sigh) + SP + 8 Space for locals reserved here. + . + . + . + SP + n All call saved register used. + . + . + . + SP + o All call saved fp registers used. + . + . + . + SP + p (SP') points to next available address. + +*/ + +/* Emit RTL to store REG at the memory location specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const pattern. + + Note in DISP > 8k case, we will leave the high part of the address + in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ +static void +store_reg (reg, disp, base) + int reg, disp, base; +{ + if (VAL_14_BITS_P (disp)) + { + emit_move_insn (gen_rtx_MEM (SImode, + gen_rtx_PLUS (SImode, + gen_rtx_REG (SImode, base), + GEN_INT (disp))), + gen_rtx_REG (SImode, reg)); + } + else + { + emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), + gen_rtx_REG (SImode, base), + GEN_INT (disp))); + emit_move_insn (gen_rtx_MEM (SImode, + gen_rtx_LO_SUM (SImode, + gen_rtx_REG (SImode, 1), + GEN_INT (disp))), + gen_rtx_REG (SImode, reg)); + } +} + +/* Emit RTL to load REG from the memory location specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const pattern. + + Note in DISP > 8k case, we will leave the high part of the address + in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ +static void +load_reg (reg, disp, base) + int reg, disp, base; +{ + if (VAL_14_BITS_P (disp)) + { + emit_move_insn (gen_rtx_REG (SImode, reg), + gen_rtx_MEM (SImode, + gen_rtx_PLUS (SImode, + gen_rtx_REG (SImode, base), + GEN_INT (disp)))); + } + else + { + emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), + gen_rtx_REG (SImode, base), + GEN_INT (disp))); + emit_move_insn (gen_rtx_REG (SImode, reg), + gen_rtx_MEM (SImode, + gen_rtx_LO_SUM (SImode, + gen_rtx_REG (SImode, 1), + GEN_INT (disp)))); + } +} + +/* Emit RTL to set REG to the value specified by BASE+DISP. + Handle case where DISP > 8k by using the add_high_const pattern. + + Note in DISP > 8k case, we will leave the high part of the address + in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ +static void +set_reg_plus_d(reg, base, disp) + int reg, base, disp; +{ + if (VAL_14_BITS_P (disp)) + { + emit_move_insn (gen_rtx_REG (SImode, reg), + gen_rtx_PLUS (SImode, + gen_rtx_REG (SImode, base), + GEN_INT (disp))); + } + else + { + emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), + gen_rtx_REG (SImode, base), + GEN_INT (disp))); + emit_move_insn (gen_rtx_REG (SImode, reg), + gen_rtx_LO_SUM (SImode, + gen_rtx_REG (SImode, 1), + GEN_INT (disp))); + } +} + +/* Global variables set by FUNCTION_PROLOGUE. */ +/* Size of frame. Need to know this to emit return insns from + leaf procedures. */ +static int actual_fsize; +static int local_fsize, save_fregs; + +int +compute_frame_size (size, fregs_live) + int size; + int *fregs_live; +{ + extern int current_function_outgoing_args_size; + int i, fsize; + + /* 8 is space for frame pointer + filler. If any frame is allocated + we need to add this in because of STARTING_FRAME_OFFSET. */ + fsize = size + (size || frame_pointer_needed ? 8 : 0); + + /* We must leave enough space for all the callee saved registers + from 3 .. highest used callee save register since we don't + know if we're going to have an inline or out of line prologue + and epilogue. */ + for (i = 18; i >= 3; i--) + if (regs_ever_live[i]) + { + fsize += 4 * (i - 2); + break; + } + + /* Round the stack. */ + fsize = (fsize + 7) & ~7; + + /* We must leave enough space for all the callee saved registers + from 3 .. highest used callee save register since we don't + know if we're going to have an inline or out of line prologue + and epilogue. */ + for (i = 66; i >= 48; i -= 2) + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + if (fregs_live) + *fregs_live = 1; + + fsize += 4 * (i - 46); + break; + } + + fsize += current_function_outgoing_args_size; + if (! leaf_function_p () || fsize) + fsize += 32; + return (fsize + 63) & ~63; +} + +rtx hp_profile_label_rtx; +static char hp_profile_label_name[8]; +void +output_function_prologue (file, size) + FILE *file; + int size ATTRIBUTE_UNUSED; +{ + /* The function's label and associated .PROC must never be + separated and must be output *after* any profiling declarations + to avoid changing spaces/subspaces within a procedure. */ + ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); + fputs ("\t.PROC\n", file); + + /* hppa_expand_prologue does the dirty work now. We just need + to output the assembler directives which denote the start + of a function. */ + fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize); + if (regs_ever_live[2] || profile_flag) + fputs (",CALLS,SAVE_RP", file); + else + fputs (",NO_CALLS", file); + + if (frame_pointer_needed) + fputs (",SAVE_SP", file); + + /* Pass on information about the number of callee register saves + performed in the prologue. + + The compiler is supposed to pass the highest register number + saved, the assembler then has to adjust that number before + entering it into the unwind descriptor (to account for any + caller saved registers with lower register numbers than the + first callee saved register). */ + if (gr_saved) + fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); + + if (fr_saved) + fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); + + fputs ("\n\t.ENTRY\n", file); + + /* Horrid hack. emit_function_prologue will modify this RTL in + place to get the expected results. */ + if (profile_flag) + ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP", + hp_profile_labelno); + + /* If we're using GAS and not using the portable runtime model, then + we don't need to accumulate the total number of code bytes. */ + if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME) + total_code_bytes = 0; + else if (insn_addresses) + { + unsigned int old_total = total_code_bytes; + + total_code_bytes += insn_addresses[INSN_UID (get_last_insn())]; + total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT; + + /* Be prepared to handle overflows. */ + total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes; + } + else + total_code_bytes = -1; + + remove_useless_addtr_insns (get_insns (), 0); + + /* Restore INSN_CODEs for insn which use unscaled indexed addresses. */ + restore_unscaled_index_insn_codes (get_insns ()); +} + +void +hppa_expand_prologue() +{ + extern char call_used_regs[]; + int size = get_frame_size (); + int merge_sp_adjust_with_store = 0; + int i, offset; + rtx tmpreg, size_rtx; + + gr_saved = 0; + fr_saved = 0; + save_fregs = 0; + local_fsize = size + (size || frame_pointer_needed ? 8 : 0); + actual_fsize = compute_frame_size (size, &save_fregs); + + /* Compute a few things we will use often. */ + tmpreg = gen_rtx_REG (SImode, 1); + size_rtx = GEN_INT (actual_fsize); + + /* Handle out of line prologues and epilogues. */ + if (TARGET_SPACE) + { + rtx operands[2]; + int saves = 0; + int outline_insn_count = 0; + int inline_insn_count = 0; + + /* Count the number of insns for the inline and out of line + variants so we can choose one appropriately. + + No need to screw with counting actual_fsize operations -- they're + done for both inline and out of line prologues. */ + if (regs_ever_live[2]) + inline_insn_count += 1; + + if (! cint_ok_for_move (local_fsize)) + outline_insn_count += 2; + else + outline_insn_count += 1; + + /* Put the register save info into %r22. */ + for (i = 18; i >= 3; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + /* -1 because the stack adjustment is normally done in + the same insn as a register save. */ + inline_insn_count += (i - 2) - 1; + saves = i; + break; + } + + for (i = 66; i >= 48; i -= 2) + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + /* +1 needed as we load %r1 with the start of the freg + save area. */ + inline_insn_count += (i/2 - 23) + 1; + saves |= ((i/2 - 12 ) << 16); + break; + } + + if (frame_pointer_needed) + inline_insn_count += 3; + + if (! cint_ok_for_move (saves)) + outline_insn_count += 2; + else + outline_insn_count += 1; + + if (TARGET_PORTABLE_RUNTIME) + outline_insn_count += 2; + else + outline_insn_count += 1; + + /* If there's a lot of insns in the prologue, then do it as + an out-of-line sequence. */ + if (inline_insn_count > outline_insn_count) + { + /* Put the local_fisze into %r19. */ + operands[0] = gen_rtx_REG (SImode, 19); + operands[1] = GEN_INT (local_fsize); + emit_move_insn (operands[0], operands[1]); + + /* Put the stack size into %r21. */ + operands[0] = gen_rtx_REG (SImode, 21); + operands[1] = size_rtx; + emit_move_insn (operands[0], operands[1]); + + operands[0] = gen_rtx_REG (SImode, 22); + operands[1] = GEN_INT (saves); + emit_move_insn (operands[0], operands[1]); + + /* Now call the out-of-line prologue. */ + emit_insn (gen_outline_prologue_call ()); + emit_insn (gen_blockage ()); + + /* Note that we're using an out-of-line prologue. */ + out_of_line_prologue_epilogue = 1; + return; + } + } + + out_of_line_prologue_epilogue = 0; + + /* Save RP first. The calling conventions manual states RP will + always be stored into the caller's frame at sp-20. */ + if (regs_ever_live[2] || profile_flag) + store_reg (2, -20, STACK_POINTER_REGNUM); + + /* Allocate the local frame and set up the frame pointer if needed. */ + if (actual_fsize) + { + if (frame_pointer_needed) + { + /* Copy the old frame pointer temporarily into %r1. Set up the + new stack pointer, then store away the saved old frame pointer + into the stack at sp+actual_fsize and at the same time update + the stack pointer by actual_fsize bytes. Two versions, first + handles small (<8k) frames. The second handles large (>8k) + frames. */ + emit_move_insn (tmpreg, frame_pointer_rtx); + emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + if (VAL_14_BITS_P (actual_fsize)) + emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx)); + else + { + /* It is incorrect to store the saved frame pointer at *sp, + then increment sp (writes beyond the current stack boundary). + + So instead use stwm to store at *sp and post-increment the + stack pointer as an atomic operation. Then increment sp to + finish allocating the new frame. */ + emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64))); + set_reg_plus_d (STACK_POINTER_REGNUM, + STACK_POINTER_REGNUM, + actual_fsize - 64); + } + } + /* no frame pointer needed. */ + else + { + /* In some cases we can perform the first callee register save + and allocating the stack frame at the same time. If so, just + make a note of it and defer allocating the frame until saving + the callee registers. */ + if (VAL_14_BITS_P (-actual_fsize) + && local_fsize == 0 + && ! profile_flag + && ! flag_pic) + merge_sp_adjust_with_store = 1; + /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */ + else if (actual_fsize != 0) + set_reg_plus_d (STACK_POINTER_REGNUM, + STACK_POINTER_REGNUM, + actual_fsize); + } + } + + /* The hppa calling conventions say that %r19, the pic offset + register, is saved at sp - 32 (in this function's frame) when + generating PIC code. FIXME: What is the correct thing to do + for functions which make no calls and allocate no frame? Do + we need to allocate a frame, or can we just omit the save? For + now we'll just omit the save. */ + if (actual_fsize != 0 && flag_pic) + store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM); + + /* Profiling code. + + Instead of taking one argument, the counter label, as most normal + mcounts do, _mcount appears to behave differently on the HPPA. It + takes the return address of the caller, the address of this routine, + and the address of the label. Also, it isn't magic, so + argument registers have to be preserved. */ + if (profile_flag) + { + int pc_offset, i, arg_offset, basereg, offsetadj; + + pc_offset = 4 + (frame_pointer_needed + ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20) + : (VAL_14_BITS_P (actual_fsize) ? 4 : 8)); + + /* When the function has a frame pointer, use it as the base + register for saving/restore registers. Else use the stack + pointer. Adjust the offset according to the frame size if + this function does not have a frame pointer. */ + + basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM + : STACK_POINTER_REGNUM; + offsetadj = frame_pointer_needed ? 0 : actual_fsize; + + /* Horrid hack. emit_function_prologue will modify this RTL in + place to get the expected results. sprintf here is just to + put something in the name. */ + sprintf(hp_profile_label_name, "LP$%04d", -1); + hp_profile_label_rtx = gen_rtx_SYMBOL_REF (SImode, hp_profile_label_name); + if (current_function_returns_struct) + store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg); + + for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4) + if (regs_ever_live [i]) + { + store_reg (i, arg_offset, basereg); + /* Deal with arg_offset not fitting in 14 bits. */ + pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8; + } + + emit_move_insn (gen_rtx_REG (SImode, 26), gen_rtx_REG (SImode, 2)); + emit_move_insn (tmpreg, gen_rtx_HIGH (SImode, hp_profile_label_rtx)); + emit_move_insn (gen_rtx_REG (SImode, 24), + gen_rtx_LO_SUM (SImode, tmpreg, hp_profile_label_rtx)); + /* %r25 is set from within the output pattern. */ + emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20))); + + /* Restore argument registers. */ + for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4) + if (regs_ever_live [i]) + load_reg (i, arg_offset, basereg); + + if (current_function_returns_struct) + load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg); + + } + + /* Normal register save. + + Do not save the frame pointer in the frame_pointer_needed case. It + was done earlier. */ + if (frame_pointer_needed) + { + for (i = 18, offset = local_fsize; i >= 4; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + store_reg (i, offset, FRAME_POINTER_REGNUM); + offset += 4; + gr_saved++; + } + /* Account for %r3 which is saved in a special place. */ + gr_saved++; + } + /* No frame pointer needed. */ + else + { + for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + /* If merge_sp_adjust_with_store is nonzero, then we can + optimize the first GR save. */ + if (merge_sp_adjust_with_store) + { + merge_sp_adjust_with_store = 0; + emit_insn (gen_post_stwm (stack_pointer_rtx, + gen_rtx_REG (SImode, i), + GEN_INT (-offset))); + } + else + store_reg (i, offset, STACK_POINTER_REGNUM); + offset += 4; + gr_saved++; + } + + /* If we wanted to merge the SP adjustment with a GR save, but we never + did any GR saves, then just emit the adjustment here. */ + if (merge_sp_adjust_with_store) + set_reg_plus_d (STACK_POINTER_REGNUM, + STACK_POINTER_REGNUM, + actual_fsize); + } + + /* Align pointer properly (doubleword boundary). */ + offset = (offset + 7) & ~7; + + /* Floating point register store. */ + if (save_fregs) + { + /* First get the frame or stack pointer to the start of the FP register + save area. */ + if (frame_pointer_needed) + set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset); + else + set_reg_plus_d (1, STACK_POINTER_REGNUM, offset); + + /* Now actually save the FP registers. */ + for (i = 66; i >= 48; i -= 2) + { + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + emit_move_insn (gen_rtx_MEM (DFmode, + gen_rtx_POST_INC (DFmode, tmpreg)), + gen_rtx_REG (DFmode, i)); + fr_saved++; + } + } + } + + /* When generating PIC code it is necessary to save/restore the + PIC register around each function call. We used to do this + in the call patterns themselves, but that implementation + made incorrect assumptions about using global variables to hold + per-function rtl code generated in the backend. + + So instead, we copy the PIC register into a reserved callee saved + register in the prologue. Then after each call we reload the PIC + register from the callee saved register. We also reload the PIC + register from the callee saved register in the epilogue ensure the + PIC register is valid at function exit. + + This may (depending on the exact characteristics of the function) + even be more efficient. + + Avoid this if the callee saved register wasn't used (these are + leaf functions). */ + if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED]) + emit_move_insn (gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM_SAVED), + gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM)); +} + + +void +output_function_epilogue (file, size) + FILE *file; + int size ATTRIBUTE_UNUSED; +{ + rtx insn = get_last_insn (); + + /* hppa_expand_epilogue does the dirty work now. We just need + to output the assembler directives which denote the end + of a function. + + To make debuggers happy, emit a nop if the epilogue was completely + eliminated due to a volatile call as the last insn in the + current function. That way the return address (in %r2) will + always point to a valid instruction in the current function. */ + + /* Get the last real insn. */ + if (GET_CODE (insn) == NOTE) + insn = prev_real_insn (insn); + + /* If it is a sequence, then look inside. */ + if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + insn = XVECEXP (PATTERN (insn), 0, 0); + + /* If insn is a CALL_INSN, then it must be a call to a volatile + function (otherwise there would be epilogue insns). */ + if (insn && GET_CODE (insn) == CALL_INSN) + fputs ("\tnop\n", file); + + fputs ("\t.EXIT\n\t.PROCEND\n", file); + + /* Free up stuff we don't need anymore. */ + if (unscaled_index_insn_codes) + free (unscaled_index_insn_codes); + max_unscaled_index_insn_codes_uid = 0; +} + +void +hppa_expand_epilogue () +{ + rtx tmpreg; + int offset,i; + int merge_sp_adjust_with_load = 0; + + /* Handle out of line prologues and epilogues. */ + if (TARGET_SPACE && out_of_line_prologue_epilogue) + { + int saves = 0; + rtx operands[2]; + + /* Put the register save info into %r22. */ + for (i = 18; i >= 3; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + saves = i; + break; + } + + for (i = 66; i >= 48; i -= 2) + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + saves |= ((i/2 - 12 ) << 16); + break; + } + + emit_insn (gen_blockage ()); + + /* Put the local_fisze into %r19. */ + operands[0] = gen_rtx_REG (SImode, 19); + operands[1] = GEN_INT (local_fsize); + emit_move_insn (operands[0], operands[1]); + + /* Put the stack size into %r21. */ + operands[0] = gen_rtx_REG (SImode, 21); + operands[1] = GEN_INT (actual_fsize); + emit_move_insn (operands[0], operands[1]); + + operands[0] = gen_rtx_REG (SImode, 22); + operands[1] = GEN_INT (saves); + emit_move_insn (operands[0], operands[1]); + + /* Now call the out-of-line epilogue. */ + emit_insn (gen_outline_epilogue_call ()); + return; + } + + /* We will use this often. */ + tmpreg = gen_rtx_REG (SImode, 1); + + /* Try to restore RP early to avoid load/use interlocks when + RP gets used in the return (bv) instruction. This appears to still + be necessary even when we schedule the prologue and epilogue. */ + if (frame_pointer_needed + && (regs_ever_live [2] || profile_flag)) + load_reg (2, -20, FRAME_POINTER_REGNUM); + + /* No frame pointer, and stack is smaller than 8k. */ + else if (! frame_pointer_needed + && VAL_14_BITS_P (actual_fsize + 20) + && (regs_ever_live[2] || profile_flag)) + load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM); + + /* General register restores. */ + if (frame_pointer_needed) + { + for (i = 18, offset = local_fsize; i >= 4; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + load_reg (i, offset, FRAME_POINTER_REGNUM); + offset += 4; + } + } + else + { + for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--) + { + if (regs_ever_live[i] && ! call_used_regs[i]) + { + /* Only for the first load. + merge_sp_adjust_with_load holds the register load + with which we will merge the sp adjustment. */ + if (VAL_14_BITS_P (actual_fsize + 20) + && local_fsize == 0 + && ! merge_sp_adjust_with_load) + merge_sp_adjust_with_load = i; + else + load_reg (i, offset, STACK_POINTER_REGNUM); + offset += 4; + } + } + } + + /* Align pointer properly (doubleword boundary). */ + offset = (offset + 7) & ~7; + + /* FP register restores. */ + if (save_fregs) + { + /* Adjust the register to index off of. */ + if (frame_pointer_needed) + set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset); + else + set_reg_plus_d (1, STACK_POINTER_REGNUM, offset); + + /* Actually do the restores now. */ + for (i = 66; i >= 48; i -= 2) + { + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + emit_move_insn (gen_rtx_REG (DFmode, i), + gen_rtx_MEM (DFmode, + gen_rtx_POST_INC (DFmode, tmpreg))); + } + } + } + + /* Emit a blockage insn here to keep these insns from being moved to + an earlier spot in the epilogue, or into the main instruction stream. + + This is necessary as we must not cut the stack back before all the + restores are finished. */ + emit_insn (gen_blockage ()); + /* No frame pointer, but we have a stack greater than 8k. We restore + %r2 very late in this case. (All other cases are restored as early + as possible.) */ + if (! frame_pointer_needed + && ! VAL_14_BITS_P (actual_fsize + 20) + && (regs_ever_live[2] || profile_flag)) + { + set_reg_plus_d (STACK_POINTER_REGNUM, + STACK_POINTER_REGNUM, + - actual_fsize); + + /* This used to try and be clever by not depending on the value in + %r30 and instead use the value held in %r1 (so that the 2nd insn + which sets %r30 could be put in the delay slot of the return insn). + + That won't work since if the stack is exactly 8k set_reg_plus_d + doesn't set %r1, just %r30. */ + load_reg (2, - 20, STACK_POINTER_REGNUM); + } + + /* Reset stack pointer (and possibly frame pointer). The stack + pointer is initially set to fp + 64 to avoid a race condition. */ + else if (frame_pointer_needed) + { + set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64); + emit_insn (gen_pre_ldwm (frame_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-64))); + } + /* If we were deferring a callee register restore, do it now. */ + else if (! frame_pointer_needed && merge_sp_adjust_with_load) + emit_insn (gen_pre_ldwm (gen_rtx_REG (SImode, merge_sp_adjust_with_load), + stack_pointer_rtx, + GEN_INT (- actual_fsize))); + else if (actual_fsize != 0) + set_reg_plus_d (STACK_POINTER_REGNUM, + STACK_POINTER_REGNUM, + - actual_fsize); +} + +/* Fetch the return address for the frame COUNT steps up from + the current frame, after the prologue. FRAMEADDR is the + frame pointer of the COUNT frame. + + We want to ignore any export stub remnants here. + + The value returned is used in two different ways: + + 1. To find a function's caller. + + 2. To change the return address for a function. + + This function handles most instances of case 1; however, it will + fail if there are two levels of stubs to execute on the return + path. The only way I believe that can happen is if the return value + needs a parameter relocation, which never happens for C code. + + This function handles most instances of case 2; however, it will + fail if we did not originally have stub code on the return path + but will need code on the new return path. This can happen if + the caller & callee are both in the main program, but the new + return location is in a shared library. + + To handle this correctly we need to set the return pointer at + frame-20 to point to a return stub frame-24 to point to the + location we wish to return to. */ + +rtx +return_addr_rtx (count, frameaddr) + int count ATTRIBUTE_UNUSED; + rtx frameaddr; +{ + rtx label; + rtx saved_rp; + rtx ins; + + saved_rp = gen_reg_rtx (Pmode); + + /* First, we start off with the normal return address pointer from + -20[frameaddr]. */ + + emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD)); + + /* Get pointer to the instruction stream. We have to mask out the + privilege level from the two low order bits of the return address + pointer here so that ins will point to the start of the first + instruction that would have been executed if we returned. */ + ins = copy_to_reg (gen_rtx_AND (Pmode, + copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)), + MASK_RETURN_ADDR)); + label = gen_label_rtx (); + + /* Check the instruction stream at the normal return address for the + export stub: + + 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp + 0x004010a1 | stub+12: ldsid (sr0,rp),r1 + 0x00011820 | stub+16: mtsp r1,sr0 + 0xe0400002 | stub+20: be,n 0(sr0,rp) + + If it is an export stub, than our return address is really in + -24[frameaddr]. */ + + emit_cmp_insn (gen_rtx_MEM (SImode, ins), + GEN_INT (0x4bc23fd1), + NE, NULL_RTX, SImode, 1, 0); + emit_jump_insn (gen_bne (label)); + + emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)), + GEN_INT (0x004010a1), + NE, NULL_RTX, SImode, 1, 0); + emit_jump_insn (gen_bne (label)); + + emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)), + GEN_INT (0x00011820), + NE, NULL_RTX, SImode, 1, 0); + emit_jump_insn (gen_bne (label)); + + emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)), + GEN_INT (0xe0400002), + NE, NULL_RTX, SImode, 1, 0); + + /* If there is no export stub then just use our initial guess of + -20[frameaddr]. */ + + emit_jump_insn (gen_bne (label)); + + /* Here we know that our return address pointer points to an export + stub. We don't want to return the address of the export stub, + but rather the return address that leads back into user code. + That return address is stored at -24[frameaddr]. */ + + emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD)); + + emit_label (label); + return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp)); +} + +/* This is only valid once reload has completed because it depends on + knowing exactly how much (if any) frame there is and... + + It's only valid if there is no frame marker to de-allocate and... + + It's only valid if %r2 hasn't been saved into the caller's frame + (we're not profiling and %r2 isn't live anywhere). */ +int +hppa_can_use_return_insn_p () +{ + return (reload_completed + && (compute_frame_size (get_frame_size (), 0) ? 0 : 1) + && ! profile_flag + && ! regs_ever_live[2] + && ! frame_pointer_needed); +} + +void +emit_bcond_fp (code, operand0) + enum rtx_code code; + rtx operand0; +{ + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (code, + VOIDmode, + gen_rtx_REG (CCFPmode, 0), + const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, operand0), + pc_rtx))); + +} + +rtx +gen_cmp_fp (code, operand0, operand1) + enum rtx_code code; + rtx operand0, operand1; +{ + return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), + gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)); +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +int +pa_adjust_cost (insn, link, dep_insn, cost) + rtx insn; + rtx link; + rtx dep_insn; + int cost; +{ + enum attr_type attr_type; + + if (! recog_memoized (insn)) + return 0; + + /* CYGNUS LOCAL PA8000/law */ + /* No cost adjustments are needed for the PA8000 */ + if (pa_cpu == PROCESSOR_8000) + return 0; + /* END CYGNUS LOCAL */ + + attr_type = get_attr_type (insn); + + if (REG_NOTE_KIND (link) == 0) + { + /* Data dependency; DEP_INSN writes a register that INSN reads some + cycles later. */ + + if (attr_type == TYPE_FPSTORE) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fstXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + /* DEP_INSN is writing its result to the register + being stored in the fpstore INSN. */ + switch (get_attr_type (dep_insn)) + { + case TYPE_FPLOAD: + /* This cost 3 cycles, not 2 as the md says for the + 700 and 7100. */ + return cost + 1; + + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* In these important cases, we save one cycle compared to + when flop instruction feed each other. */ + return cost - 1; + + default: + return cost; + } + } + } + + /* For other data dependencies, the default cost specified in the + md is correct. */ + return cost; + } + else if (REG_NOTE_KIND (link) == REG_DEP_ANTI) + { + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + + if (attr_type == TYPE_FPLOAD) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* A fpload can't be issued until one cycle before a + preceding arithmetic operation has finished if + the target of the fpload is any of the sources + (or destination) of the arithmetic operation. */ + return cost - 1; + + default: + return 0; + } + } + } + else if (attr_type == TYPE_FPALU) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* An ALU flop can't be issued until two cycles before a + preceding divide or sqrt operation has finished if + the target of the ALU flop is any of the sources + (or destination) of the divide or sqrt operation. */ + return cost - 2; + + default: + return 0; + } + } + } + + /* For other anti dependencies, the cost is 0. */ + return 0; + } + else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + { + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if (attr_type == TYPE_FPLOAD) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPALU: + case TYPE_FPMULSGL: + case TYPE_FPMULDBL: + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* A fpload can't be issued until one cycle before a + preceding arithmetic operation has finished if + the target of the fpload is the destination of the + arithmetic operation. */ + return cost - 1; + + default: + return 0; + } + } + } + else if (attr_type == TYPE_FPALU) + { + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + { + /* This happens for the fldXs,mb patterns. */ + pat = XVECEXP (pat, 0, 0); + } + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return 0 for now. */ + return 0; + + if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) + { + if (! recog_memoized (dep_insn)) + return 0; + switch (get_attr_type (dep_insn)) + { + case TYPE_FPDIVSGL: + case TYPE_FPDIVDBL: + case TYPE_FPSQRTSGL: + case TYPE_FPSQRTDBL: + /* An ALU flop can't be issued until two cycles before a + preceding divide or sqrt operation has finished if + the target of the ALU flop is also the target of + the divide or sqrt operation. */ + return cost - 2; + + default: + return 0; + } + } + } + + /* For other output dependencies, the cost is 0. */ + return 0; + } + else + abort (); +} + +/* Return any length adjustment needed by INSN which already has its length + computed as LENGTH. Return zero if no adjustment is necessary. + + For the PA: function calls, millicode calls, and backwards short + conditional branches with unfilled delay slots need an adjustment by +1 + (to account for the NOP which will be inserted into the instruction stream). + + Also compute the length of an inline block move here as it is too + complicated to express as a length attribute in pa.md. */ +int +pa_adjust_insn_length (insn, length) + rtx insn; + int length; +{ + rtx pat = PATTERN (insn); + + /* Call insns which are *not* indirect and have unfilled delay slots. */ + if (GET_CODE (insn) == CALL_INSN) + { + + if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL + && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF) + return 4; + else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET + && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0)) + == SYMBOL_REF) + return 4; + else + return 0; + } + /* Jumps inside switch tables which have unfilled delay slots + also need adjustment. */ + else if (GET_CODE (insn) == JUMP_INSN + && simplejump_p (insn) + && GET_MODE (insn) == SImode) + return 4; + /* Millicode insn with an unfilled delay slot. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) != SEQUENCE + && GET_CODE (pat) != USE + && GET_CODE (pat) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI) + return 4; + /* Block move pattern. */ + else if (GET_CODE (insn) == INSN + && GET_CODE (pat) == PARALLEL + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) + return compute_movstrsi_length (insn) - 4; + /* Conditional branch with an unfilled delay slot. */ + else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) + { + /* Adjust a short backwards conditional with an unfilled delay slot. */ + if (GET_CODE (pat) == SET + && length == 4 + && ! forward_branch_p (insn)) + return 4; + else if (GET_CODE (pat) == PARALLEL + && get_attr_type (insn) == TYPE_PARALLEL_BRANCH + && length == 4) + return 4; + /* Adjust dbra insn with short backwards conditional branch with + unfilled delay slot -- only for case where counter is in a + general register register. */ + else if (GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 1)) == SET + && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG + && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) + && length == 4 + && ! forward_branch_p (insn)) + return 4; + else + return 0; + } + return 0; +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +void +print_operand (file, x, code) + FILE *file; + rtx x; + int code; +{ + switch (code) + { + case '#': + /* Output a 'nop' if there's nothing for the delay slot. */ + if (dbr_sequence_length () == 0) + fputs ("\n\tnop", file); + return; + case '*': + /* Output an nullification completer if there's nothing for the */ + /* delay slot or nullification is requested. */ + if (dbr_sequence_length () == 0 || + (final_sequence && + INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) + fputs (",n", file); + return; + case 'R': + /* Print out the second register name of a register pair. + I.e., R (6) => 7. */ + fputs (reg_names[REGNO (x)+1], file); + return; + case 'r': + /* A register or zero. */ + if (x == const0_rtx + || (x == CONST0_RTX (DFmode)) + || (x == CONST0_RTX (SFmode))) + { + fputs ("0", file); + return; + } + else + break; + case 'C': /* Plain (C)ondition */ + case 'X': + switch (GET_CODE (x)) + { + case EQ: + fputs ("=", file); break; + case NE: + fputs ("<>", file); break; + case GT: + fputs (">", file); break; + case GE: + fputs (">=", file); break; + case GEU: + fputs (">>=", file); break; + case GTU: + fputs (">>", file); break; + case LT: + fputs ("<", file); break; + case LE: + fputs ("<=", file); break; + case LEU: + fputs ("<<=", file); break; + case LTU: + fputs ("<<", file); break; + default: + abort (); + } + return; + case 'N': /* Condition, (N)egated */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("<>", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs ("<=", file); break; + case GE: + fputs ("<", file); break; + case GEU: + fputs ("<<", file); break; + case GTU: + fputs ("<<=", file); break; + case LT: + fputs (">=", file); break; + case LE: + fputs (">", file); break; + case LEU: + fputs (">>", file); break; + case LTU: + fputs (">>=", file); break; + default: + abort (); + } + return; + /* For floating point comparisons. Need special conditions to deal + with NaNs properly. */ + case 'Y': + switch (GET_CODE (x)) + { + case EQ: + fputs ("!=", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs ("<=", file); break; + case GE: + fputs ("<", file); break; + case LT: + fputs (">=", file); break; + case LE: + fputs (">", file); break; + default: + abort (); + } + return; + case 'S': /* Condition, operands are (S)wapped. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("=", file); break; + case NE: + fputs ("<>", file); break; + case GT: + fputs ("<", file); break; + case GE: + fputs ("<=", file); break; + case GEU: + fputs ("<<=", file); break; + case GTU: + fputs ("<<", file); break; + case LT: + fputs (">", file); break; + case LE: + fputs (">=", file); break; + case LEU: + fputs (">>=", file); break; + case LTU: + fputs (">>", file); break; + default: + abort (); + } + return; + case 'B': /* Condition, (B)oth swapped and negate. */ + switch (GET_CODE (x)) + { + case EQ: + fputs ("<>", file); break; + case NE: + fputs ("=", file); break; + case GT: + fputs (">=", file); break; + case GE: + fputs (">", file); break; + case GEU: + fputs (">>", file); break; + case GTU: + fputs (">>=", file); break; + case LT: + fputs ("<=", file); break; + case LE: + fputs ("<", file); break; + case LEU: + fputs ("<<", file); break; + case LTU: + fputs ("<<=", file); break; + default: + abort (); + } + return; + case 'k': + if (GET_CODE (x) == CONST_INT) + { + fprintf (file, "%d", ~INTVAL (x)); + return; + } + abort(); + case 'L': + if (GET_CODE (x) == CONST_INT) + { + fprintf (file, "%d", 32 - (INTVAL (x) & 31)); + return; + } + abort(); + case 'O': + if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0) + { + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + } + abort(); + case 'P': + if (GET_CODE (x) == CONST_INT) + { + fprintf (file, "%d", 31 - (INTVAL (x) & 31)); + return; + } + abort(); + case 'I': + if (GET_CODE (x) == CONST_INT) + fputs ("i", file); + return; + case 'M': + case 'F': + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_DEC: + case PRE_INC: + fputs ("s,mb", file); + break; + case POST_DEC: + case POST_INC: + fputs ("s,ma", file); + break; + case PLUS: + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) + fputs ("x,s", file); + else if (code == 'F') + fputs ("s", file); + break; + default: + if (code == 'F') + fputs ("s", file); + break; + } + return; + case 'G': + output_global_address (file, x, 0); + return; + case 'H': + output_global_address (file, x, 1); + return; + case 0: /* Don't do anything special */ + break; + case 'Z': + { + unsigned op[3]; + compute_zdepi_operands (INTVAL (x), op); + fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); + return; + } + default: + abort (); + } + if (GET_CODE (x) == REG) + { + fputs (reg_names [REGNO (x)], file); + if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0) + fputs ("L", file); + } + else if (GET_CODE (x) == MEM) + { + int size = GET_MODE_SIZE (GET_MODE (x)); + rtx base = XEXP (XEXP (x, 0), 0); + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_DEC: + case POST_DEC: + fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]); + break; + case PRE_INC: + case POST_INC: + fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]); + break; + default: + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) + fprintf (file, "%s(0,%s)", + reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], + reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); + else if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) + fprintf (file, "%s(0,%s)", + reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], + reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); + else + output_address (XEXP (x, 0)); + break; + } + } + else + output_addr_const (file, x); +} + +/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ + +void +output_global_address (file, x, round_constant) + FILE *file; + rtx x; + int round_constant; +{ + + /* Imagine (high (const (plus ...))). */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x)) + assemble_name (file, XSTR (x, 0)); + else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) + { + assemble_name (file, XSTR (x, 0)); + fputs ("-$global$", file); + } + else if (GET_CODE (x) == CONST) + { + char *sep = ""; + int offset = 0; /* assembler wants -$global$ at end */ + rtx base = NULL_RTX; + + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) + { + base = XEXP (XEXP (x, 0), 0); + output_addr_const (file, base); + } + else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT) + offset = INTVAL (XEXP (XEXP (x, 0), 0)); + else abort (); + + if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF) + { + base = XEXP (XEXP (x, 0), 1); + output_addr_const (file, base); + } + else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) + offset = INTVAL (XEXP (XEXP (x, 0),1)); + else abort (); + + /* How bogus. The compiler is apparently responsible for + rounding the constant if it uses an LR field selector. + + The linker and/or assembler seem a better place since + they have to do this kind of thing already. + + If we fail to do this, HP's optimizing linker may eliminate + an addil, but not update the ldw/stw/ldo instruction that + uses the result of the addil. */ + if (round_constant) + offset = ((offset + 0x1000) & ~0x1fff); + + if (GET_CODE (XEXP (x, 0)) == PLUS) + { + if (offset < 0) + { + offset = -offset; + sep = "-"; + } + else + sep = "+"; + } + else if (GET_CODE (XEXP (x, 0)) == MINUS + && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) + sep = "-"; + else abort (); + + if (!read_only_operand (base) && !flag_pic) + fputs ("-$global$", file); + if (offset) + fprintf (file,"%s%d", sep, offset); + } + else + output_addr_const (file, x); +} + +void +output_deferred_plabels (file) + FILE *file; +{ + int i; + /* If we have deferred plabels, then we need to switch into the data + section and align it to a 4 byte boundary before we output the + deferred plabels. */ + if (n_deferred_plabels) + { + data_section (); + ASM_OUTPUT_ALIGN (file, 2); + } + + /* Now output the deferred plabels. */ + for (i = 0; i < n_deferred_plabels; i++) + { + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); + assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode, + deferred_plabels[i].name), 4, 1); + } +} + +/* HP's millicode routines mean something special to the assembler. + Keep track of which ones we have used. */ + +enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 }; +static char imported[(int)end1000]; +static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"}; +static char import_string[] = ".IMPORT $$....,MILLICODE"; +#define MILLI_START 10 + +static void +import_milli (code) + enum millicodes code; +{ + char str[sizeof (import_string)]; + + if (!imported[(int)code]) + { + imported[(int)code] = 1; + strcpy (str, import_string); + strncpy (str + MILLI_START, milli_names[(int)code], 4); + output_asm_insn (str, 0); + } +} + +/* The register constraints have put the operands and return value in + the proper registers. */ + +char * +output_mul_insn (unsignedp, insn) + int unsignedp ATTRIBUTE_UNUSED; + rtx insn; +{ + import_milli (mulI); + return output_millicode_call (insn, gen_rtx_SYMBOL_REF (SImode, "$$mulI")); +} + +/* Emit the rtl for doing a division by a constant. */ + +/* Do magic division millicodes exist for this value? */ +static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, + 1, 1}; + +/* We'll use an array to keep track of the magic millicodes and + whether or not we've used them already. [n][0] is signed, [n][1] is + unsigned. */ + +static int div_milli[16][2]; + +int +div_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (mode == SImode + && ((GET_CODE (op) == REG && REGNO (op) == 25) + || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0 + && INTVAL (op) < 16 && magic_milli[INTVAL (op)]))); +} + +int +emit_hpdiv_const (operands, unsignedp) + rtx *operands; + int unsignedp; +{ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) < 16 + && magic_milli[INTVAL (operands[2])]) + { + emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); + emit + (gen_rtx + (PARALLEL, VOIDmode, + gen_rtvec (5, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), + gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, + SImode, + gen_rtx_REG (SImode, 26), + operands[2])), + gen_rtx_CLOBBER (VOIDmode, operands[3]), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31))))); + emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); + return 1; + } + return 0; +} + +char * +output_div_insn (operands, unsignedp, insn) + rtx *operands; + int unsignedp; + rtx insn; +{ + int divisor; + + /* If the divisor is a constant, try to use one of the special + opcodes .*/ + if (GET_CODE (operands[0]) == CONST_INT) + { + static char buf[100]; + divisor = INTVAL (operands[0]); + if (!div_milli[divisor][unsignedp]) + { + div_milli[divisor][unsignedp] = 1; + if (unsignedp) + output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); + else + output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); + } + if (unsignedp) + { + sprintf (buf, "$$divU_%d", INTVAL (operands[0])); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, buf)); + } + else + { + sprintf (buf, "$$divI_%d", INTVAL (operands[0])); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, buf)); + } + } + /* Divisor isn't a special constant. */ + else + { + if (unsignedp) + { + import_milli (divU); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$divU")); + } + else + { + import_milli (divI); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$divI")); + } + } +} + +/* Output a $$rem millicode to do mod. */ + +char * +output_mod_insn (unsignedp, insn) + int unsignedp; + rtx insn; +{ + if (unsignedp) + { + import_milli (remU); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$remU")); + } + else + { + import_milli (remI); + return output_millicode_call (insn, + gen_rtx_SYMBOL_REF (SImode, "$$remI")); + } +} + +void +output_arg_descriptor (call_insn) + rtx call_insn; +{ + char *arg_regs[4]; + enum machine_mode arg_mode; + rtx link; + int i, output_flag = 0; + int regno; + + for (i = 0; i < 4; i++) + arg_regs[i] = 0; + + /* Specify explicitly that no argument relocations should take place + if using the portable runtime calling conventions. */ + if (TARGET_PORTABLE_RUNTIME) + { + fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", + asm_out_file); + return; + } + + if (GET_CODE (call_insn) != CALL_INSN) + abort (); + for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1)) + { + rtx use = XEXP (link, 0); + + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + if (regno >= 23 && regno <= 26) + { + arg_regs[26 - regno] = "GR"; + if (arg_mode == DImode) + arg_regs[25 - regno] = "GR"; + } + else if (regno >= 32 && regno <= 39) + { + if (arg_mode == SFmode) + arg_regs[(regno - 32) / 2] = "FR"; + else + { +#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED + arg_regs[(regno - 34) / 2] = "FR"; + arg_regs[(regno - 34) / 2 + 1] = "FU"; +#else + arg_regs[(regno - 34) / 2] = "FU"; + arg_regs[(regno - 34) / 2 + 1] = "FR"; +#endif + } + } + } + fputs ("\t.CALL ", asm_out_file); + for (i = 0; i < 4; i++) + { + if (arg_regs[i]) + { + if (output_flag++) + fputc (',', asm_out_file); + fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); + } + } + fputc ('\n', asm_out_file); +} + +/* Return the class of any secondary reload register that is needed to + move IN into a register in class CLASS using mode MODE. + + Profiling has showed this routine and its descendants account for + a significant amount of compile time (~7%). So it has been + optimized to reduce redundant computations and eliminate useless + function calls. + + It might be worthwhile to try and make this a leaf function too. */ + +enum reg_class +secondary_reload_class (class, mode, in) + enum reg_class class; + enum machine_mode mode; + rtx in; +{ + int regno, is_symbolic; + + /* Trying to load a constant into a FP register during PIC code + generation will require %r1 as a scratch register. */ + if (flag_pic == 2 + && GET_MODE_CLASS (mode) == MODE_INT + && FP_REG_CLASS_P (class) + && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE)) + return R1_REGS; + + /* Profiling showed the PA port spends about 1.3% of its compilation + time in true_regnum from calls inside secondary_reload_class. */ + + if (GET_CODE (in) == REG) + { + regno = REGNO (in); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = true_regnum (in); + } + else if (GET_CODE (in) == SUBREG) + regno = true_regnum (in); + else + regno = -1; + + /* If we have something like (mem (mem (...)), we can safely assume the + inner MEM will end up in a general register after reloading, so there's + no need for a secondary reload. */ + if (GET_CODE (in) == MEM + && GET_CODE (XEXP (in, 0)) == MEM) + return NO_REGS; + + /* Handle out of range displacement for integer mode loads/stores of + FP registers. */ + if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1) + && GET_MODE_CLASS (mode) == MODE_INT + && FP_REG_CLASS_P (class)) + || (class == SHIFT_REGS && (regno <= 0 || regno >= 32))) + return GENERAL_REGS; + + if (GET_CODE (in) == HIGH) + in = XEXP (in, 0); + + /* Profiling has showed GCC spends about 2.6% of its compilation + time in symbolic_operand from calls inside secondary_reload_class. + + We use an inline copy and only compute its return value once to avoid + useless work. */ + switch (GET_CODE (in)) + { + rtx tmp; + + case SYMBOL_REF: + case LABEL_REF: + is_symbolic = 1; + break; + case CONST: + tmp = XEXP (in, 0); + is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF + || GET_CODE (XEXP (tmp, 0)) == LABEL_REF) + && GET_CODE (XEXP (tmp, 1)) == CONST_INT); + break; + + default: + is_symbolic = 0; + break; + } + + if (!flag_pic + && is_symbolic + && read_only_operand (in)) + return NO_REGS; + + if (class != R1_REGS && is_symbolic) + return R1_REGS; + + return NO_REGS; +} + +enum direction +function_arg_padding (mode, type) + enum machine_mode mode; + tree type; +{ + int size; + + if (mode == BLKmode) + { + if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) + size = int_size_in_bytes (type) * BITS_PER_UNIT; + else + return upward; /* Don't know if this is right, but */ + /* same as old definition. */ + } + else + size = GET_MODE_BITSIZE (mode); + if (size < PARM_BOUNDARY) + return downward; + else if (size % PARM_BOUNDARY) + return upward; + else + return none; +} + + +/* Do what is necessary for `va_start'. The argument is ignored; + We look at the current function to determine if stdargs or varargs + is used and fill in an initial va_list. A pointer to this constructor + is returned. */ + +struct rtx_def * +hppa_builtin_saveregs (arglist) + tree arglist ATTRIBUTE_UNUSED; +{ + rtx offset, dest; + tree fntype = TREE_TYPE (current_function_decl); + int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0 + && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) + != void_type_node))) + ? UNITS_PER_WORD : 0); + + if (argadj) + offset = plus_constant (current_function_arg_offset_rtx, argadj); + else + offset = current_function_arg_offset_rtx; + + /* Store general registers on the stack. */ + dest = gen_rtx_MEM (BLKmode, + plus_constant (current_function_internal_arg_pointer, + -16)); + move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD); + + /* move_block_from_reg will emit code to store the argument registers + individually as scalar stores. + + However, other insns may later load from the same addresses for + a structure load (passing a struct to a varargs routine). + + The alias code assumes that such aliasing can never happen, so we + have to keep memory referencing insns from moving up beyond the + last argument register store. So we emit a blockage insn here. */ + emit_insn (gen_blockage ()); + + if (current_function_check_memory_usage) + emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3, + dest, ptr_mode, + GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype), + GEN_INT (MEMORY_USE_RW), + TYPE_MODE (integer_type_node)); + + return copy_to_reg (expand_binop (Pmode, add_optab, + current_function_internal_arg_pointer, + offset, 0, 0, OPTAB_LIB_WIDEN)); +} + +/* This routine handles all the normal conditional branch sequences we + might need to generate. It handles compare immediate vs compare + register, nullification of delay slots, varying length branches, + negated branches, and all combinations of the above. It returns the + output appropriate to emit the branch corresponding to all given + parameters. */ + +char * +output_cbranch (operands, nullify, length, negated, insn) + rtx *operands; + int nullify, length, negated; + rtx insn; +{ + static char buf[100]; + int useskip = 0; + + /* A conditional branch to the following instruction (eg the delay slot) is + asking for a disaster. This can happen when not optimizing. + + In such cases it is safe to emit nothing. */ + + if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) + return ""; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + comclr instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + if (length == 4 + && next_real_insn (insn) != 0 + && get_attr_length (next_real_insn (insn)) == 4 + && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) + && nullify) + useskip = 1; + + switch (length) + { + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "com%I2clr,"); + else + strcpy (buf, "com%I2b,"); + if (negated) + strcat (buf, "%B3"); + else + strcat (buf, "%S3"); + if (useskip) + strcat (buf, " %2,%r1,0"); + else if (nullify) + strcat (buf, ",n %2,%r1,%0"); + else + strcat (buf, " %2,%r1,%0"); + break; + + /* All long conditionals. Note an short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + with is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "com%I2b,"); + if (negated) + strcat (buf, "%S3"); + else + strcat (buf, "%B3"); + strcat (buf, ",n %2,%r1,.+12\n\tbl %0,0"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a comb;nop rather than comiclr;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && insn_addresses + && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] + - insn_addresses[INSN_UID (insn)] - 8)) + { + strcpy (buf, "com%I2b,"); + if (negated) + strcat (buf, "%B3 %2,%r1,%0%#"); + else + strcat (buf, "%S3 %2,%r1,%0%#"); + } + else + { + strcpy (buf, "com%I2clr,"); + if (negated) + strcat (buf, "%S3"); + else + strcat (buf, "%B3"); + if (nullify) + strcat (buf, " %2,%r1,0\n\tbl,n %0,0"); + else + strcat (buf, " %2,%r1,0\n\tbl %0,0"); + } + break; + + case 20: + /* Very long branch. Right now we only handle these when not + optimizing. See "jump" pattern in pa.md for details. */ + if (optimize) + abort (); + + /* Create a reversed conditional branch which branches around + the following insns. */ + if (negated) + strcpy (buf, "com%I2b,%S3,n %2,%r1,.+20"); + else + strcpy (buf, "com%I2b,%B3,n %2,%r1,.+20"); + output_asm_insn (buf, operands); + + /* Output an insn to save %r1. */ + output_asm_insn ("stw %%r1,-16(%%r30)", operands); + + /* Now output a very long branch to the original target. */ + output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands); + + /* Now restore the value of %r1 in the delay slot. We're not + optimizing so we know nothing else can be in the delay slot. */ + return "ldw -16(%%r30),%%r1"; + + case 28: + /* Very long branch when generating PIC code. Right now we only + handle these when not optimizing. See "jump" pattern in pa.md + for details. */ + if (optimize) + abort (); + + /* Create a reversed conditional branch which branches around + the following insns. */ + if (negated) + strcpy (buf, "com%I2b,%S3,n %2,%r1,.+28"); + else + strcpy (buf, "com%I2b,%B3,n %2,%r1,.+28"); + output_asm_insn (buf, operands); + + /* Output an insn to save %r1. */ + output_asm_insn ("stw %%r1,-16(%%r30)", operands); + + /* Now output a very long PIC branch to the original target. */ + { + rtx xoperands[5]; + + xoperands[0] = operands[0]; + xoperands[1] = operands[1]; + xoperands[2] = operands[2]; + xoperands[3] = operands[3]; + xoperands[4] = gen_label_rtx (); + + output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[4])); + output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands); + } + + /* Now restore the value of %r1 in the delay slot. We're not + optimizing so we know nothing else can be in the delay slot. */ + return "ldw -16(%%r30),%%r1"; + + default: + abort(); + } + return buf; +} + +/* This routine handles all the branch-on-bit conditional branch sequences we + might need to generate. It handles nullification of delay slots, + varying length branches, negated branches and all combinations of the + above. it returns the appropriate output template to emit the branch. */ + +char * +output_bb (operands, nullify, length, negated, insn, which) + rtx *operands ATTRIBUTE_UNUSED; + int nullify, length, negated; + rtx insn; + int which; +{ + static char buf[100]; + int useskip = 0; + + /* A conditional branch to the following instruction (eg the delay slot) is + asking for a disaster. I do not think this can happen as this pattern + is only used when optimizing; jump optimization should eliminate the + jump. But be prepared just in case. */ + + if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) + return ""; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + extrs instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + + if (length == 4 + && next_real_insn (insn) != 0 + && get_attr_length (next_real_insn (insn)) == 4 + && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) + && nullify) + useskip = 1; + + switch (length) + { + + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "extrs,"); + else + strcpy (buf, "bb,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (useskip) + strcat (buf, " %0,%1,1,0"); + else if (nullify && negated) + strcat (buf, ",n %0,%1,%3"); + else if (nullify && ! negated) + strcat (buf, ",n %0,%1,%2"); + else if (! nullify && negated) + strcat (buf, "%0,%1,%3"); + else if (! nullify && ! negated) + strcat (buf, " %0,%1,%2"); + break; + + /* All long conditionals. Note an short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + with is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "bb,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (negated) + strcat (buf, ",n %0,%1,.+12\n\tbl %3,0"); + else + strcat (buf, ",n %0,%1,.+12\n\tbl %2,0"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a bb;nop rather than extrs;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && insn_addresses + && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] + - insn_addresses[INSN_UID (insn)] - 8)) + { + strcpy (buf, "bb,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (negated) + strcat (buf, " %0,%1,%3%#"); + else + strcat (buf, " %0,%1,%2%#"); + } + else + { + strcpy (buf, "extrs,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify && negated) + strcat (buf, " %0,%1,1,0\n\tbl,n %3,0"); + else if (nullify && ! negated) + strcat (buf, " %0,%1,1,0\n\tbl,n %2,0"); + else if (negated) + strcat (buf, " %0,%1,1,0\n\tbl %3,0"); + else + strcat (buf, " %0,%1,1,0\n\tbl %2,0"); + } + break; + + default: + abort(); + } + return buf; +} + +/* This routine handles all the branch-on-variable-bit conditional branch + sequences we might need to generate. It handles nullification of delay + slots, varying length branches, negated branches and all combinations + of the above. it returns the appropriate output template to emit the + branch. */ + +char * +output_bvb (operands, nullify, length, negated, insn, which) + rtx *operands ATTRIBUTE_UNUSED; + int nullify, length, negated; + rtx insn; + int which; +{ + static char buf[100]; + int useskip = 0; + + /* A conditional branch to the following instruction (eg the delay slot) is + asking for a disaster. I do not think this can happen as this pattern + is only used when optimizing; jump optimization should eliminate the + jump. But be prepared just in case. */ + + if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) + return ""; + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* A forward branch over a single nullified insn can be done with a + extrs instruction. This avoids a single cycle penalty due to + mis-predicted branch if we fall through (branch not taken). */ + + if (length == 4 + && next_real_insn (insn) != 0 + && get_attr_length (next_real_insn (insn)) == 4 + && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) + && nullify) + useskip = 1; + + switch (length) + { + + /* All short conditional branches except backwards with an unfilled + delay slot. */ + case 4: + if (useskip) + strcpy (buf, "vextrs,"); + else + strcpy (buf, "bvb,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (useskip) + strcat (buf, " %0,1,0"); + else if (nullify && negated) + strcat (buf, ",n %0,%3"); + else if (nullify && ! negated) + strcat (buf, ",n %0,%2"); + else if (! nullify && negated) + strcat (buf, "%0,%3"); + else if (! nullify && ! negated) + strcat (buf, " %0,%2"); + break; + + /* All long conditionals. Note an short backward branch with an + unfilled delay slot is treated just like a long backward branch + with an unfilled delay slot. */ + case 8: + /* Handle weird backwards branch with a filled delay slot + with is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + { + strcpy (buf, "bvb,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (negated) + strcat (buf, ",n %0,.+12\n\tbl %3,0"); + else + strcat (buf, ",n %0,.+12\n\tbl %2,0"); + } + /* Handle short backwards branch with an unfilled delay slot. + Using a bb;nop rather than extrs;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && insn_addresses + && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] + - insn_addresses[INSN_UID (insn)] - 8)) + { + strcpy (buf, "bvb,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, ">="); + else + strcat (buf, "<"); + if (negated) + strcat (buf, " %0,%3%#"); + else + strcat (buf, " %0,%2%#"); + } + else + { + strcpy (buf, "vextrs,"); + if ((which == 0 && negated) + || (which == 1 && ! negated)) + strcat (buf, "<"); + else + strcat (buf, ">="); + if (nullify && negated) + strcat (buf, " %0,1,0\n\tbl,n %3,0"); + else if (nullify && ! negated) + strcat (buf, " %0,1,0\n\tbl,n %2,0"); + else if (negated) + strcat (buf, " %0,1,0\n\tbl %3,0"); + else + strcat (buf, " %0,1,0\n\tbl %2,0"); + } + break; + + default: + abort(); + } + return buf; +} + +/* Return the output template for emitting a dbra type insn. + + Note it may perform some output operations on its own before + returning the final output string. */ +char * +output_dbra (operands, insn, which_alternative) + rtx *operands; + rtx insn; + int which_alternative; +{ + + /* A conditional branch to the following instruction (eg the delay slot) is + asking for a disaster. Be prepared! */ + + if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) + { + if (which_alternative == 0) + return "ldo %1(%0),%0"; + else if (which_alternative == 1) + { + output_asm_insn ("fstws %0,-16(0,%%r30)",operands); + output_asm_insn ("ldw -16(0,%%r30),%4",operands); + output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands); + return "fldws -16(0,%%r30),%0"; + } + else + { + output_asm_insn ("ldw %0,%4", operands); + return "ldo %1(%4),%4\n\tstw %4,%0"; + } + } + + if (which_alternative == 0) + { + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* Handle short versions first. */ + if (length == 4 && nullify) + return "addib,%C2,n %1,%0,%3"; + else if (length == 4 && ! nullify) + return "addib,%C2 %1,%0,%3"; + else if (length == 8) + { + /* Handle weird backwards branch with a fulled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + return "addib,%N2,n %1,%0,.+12\n\tbl %3,0"; + /* Handle short backwards branch with an unfilled delay slot. + Using a addb;nop rather than addi;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && insn_addresses + && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] + - insn_addresses[INSN_UID (insn)] - 8)) + return "addib,%C2 %1,%0,%3%#"; + + /* Handle normal cases. */ + if (nullify) + return "addi,%N2 %1,%0,%0\n\tbl,n %3,0"; + else + return "addi,%N2 %1,%0,%0\n\tbl %3,0"; + } + else + abort(); + } + /* Deal with gross reload from FP register case. */ + else if (which_alternative == 1) + { + /* Move loop counter from FP register to MEM then into a GR, + increment the GR, store the GR into MEM, and finally reload + the FP register from MEM from within the branch's delay slot. */ + output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands); + output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands); + if (get_attr_length (insn) == 24) + return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0"; + else + return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0"; + } + /* Deal with gross reload from memory case. */ + else + { + /* Reload loop counter from memory, the store back to memory + happens in the branch's delay slot. */ + output_asm_insn ("ldw %0,%4", operands); + if (get_attr_length (insn) == 12) + return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; + else + return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0"; + } +} + +/* Return the output template for emitting a dbra type insn. + + Note it may perform some output operations on its own before + returning the final output string. */ +char * +output_movb (operands, insn, which_alternative, reverse_comparison) + rtx *operands; + rtx insn; + int which_alternative; + int reverse_comparison; +{ + + /* A conditional branch to the following instruction (eg the delay slot) is + asking for a disaster. Be prepared! */ + + if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) + { + if (which_alternative == 0) + return "copy %1,%0"; + else if (which_alternative == 1) + { + output_asm_insn ("stw %1,-16(0,%%r30)",operands); + return "fldws -16(0,%%r30),%0"; + } + else if (which_alternative == 2) + return "stw %1,%0"; + else + return "mtsar %r1"; + } + + /* Support the second variant. */ + if (reverse_comparison) + PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); + + if (which_alternative == 0) + { + int nullify = INSN_ANNULLED_BRANCH_P (insn); + int length = get_attr_length (insn); + + /* If this is a long branch with its delay slot unfilled, set `nullify' + as it can nullify the delay slot and save a nop. */ + if (length == 8 && dbr_sequence_length () == 0) + nullify = 1; + + /* If this is a short forward conditional branch which did not get + its delay slot filled, the delay slot can still be nullified. */ + if (! nullify && length == 4 && dbr_sequence_length () == 0) + nullify = forward_branch_p (insn); + + /* Handle short versions first. */ + if (length == 4 && nullify) + return "movb,%C2,n %1,%0,%3"; + else if (length == 4 && ! nullify) + return "movb,%C2 %1,%0,%3"; + else if (length == 8) + { + /* Handle weird backwards branch with a filled delay slot + which is nullified. */ + if (dbr_sequence_length () != 0 + && ! forward_branch_p (insn) + && nullify) + return "movb,%N2,n %1,%0,.+12\n\tbl %3,0"; + + /* Handle short backwards branch with an unfilled delay slot. + Using a movb;nop rather than or;bl saves 1 cycle for both + taken and untaken branches. */ + else if (dbr_sequence_length () == 0 + && ! forward_branch_p (insn) + && insn_addresses + && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] + - insn_addresses[INSN_UID (insn)] - 8)) + return "movb,%C2 %1,%0,%3%#"; + /* Handle normal cases. */ + if (nullify) + return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0"; + else + return "or,%N2 %1,%%r0,%0\n\tbl %3,0"; + } + else + abort(); + } + /* Deal with gross reload from FP register case. */ + else if (which_alternative == 1) + { + /* Move loop counter from FP register to MEM then into a GR, + increment the GR, store the GR into MEM, and finally reload + the FP register from MEM from within the branch's delay slot. */ + output_asm_insn ("stw %1,-16(0,%%r30)",operands); + if (get_attr_length (insn) == 12) + return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0"; + else + return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0"; + } + /* Deal with gross reload from memory case. */ + else if (which_alternative == 2) + { + /* Reload loop counter from memory, the store back to memory + happens in the branch's delay slot. */ + if (get_attr_length (insn) == 8) + return "comb,%S2 0,%1,%3\n\tstw %1,%0"; + else + return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0"; + } + /* Handle SAR as a destination. */ + else + { + if (get_attr_length (insn) == 8) + return "comb,%S2 0,%1,%3\n\tmtsar %r1"; + else + return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1"; + } +} + + +/* INSN is a millicode call. It may have an unconditional jump in its delay + slot. + + CALL_DEST is the routine we are calling. */ + +char * +output_millicode_call (insn, call_dest) + rtx insn; + rtx call_dest; +{ + int distance; + rtx xoperands[4]; + rtx seq_insn; + + /* Handle common case -- empty delay slot or no jump in the delay slot, + and we're sure that the branch will reach the beginning of the $CODE$ + subspace. */ + if ((dbr_sequence_length () == 0 + && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28)) + || (dbr_sequence_length () != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && get_attr_length (insn) == 4)) + { + xoperands[0] = call_dest; + output_asm_insn ("bl %0,%%r31%#", xoperands); + return ""; + } + + /* This call may not reach the beginning of the $CODE$ subspace. */ + if (get_attr_length (insn) > 4) + { + int delay_insn_deleted = 0; + rtx xoperands[2]; + + /* We need to emit an inline long-call branch. */ + if (dbr_sequence_length () != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) + { + /* A non-jump insn in the delay slot. By definition we can + emit this insn before the call. */ + final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); + + /* Now delete the delay insn. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + delay_insn_deleted = 1; + } + + /* If we're allowed to use be/ble instructions, then this is the + best sequence to use for a long millicode call. */ + if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS + || ! (flag_pic || TARGET_PORTABLE_RUNTIME)) + { + xoperands[0] = call_dest; + output_asm_insn ("ldil L%%%0,%%r31", xoperands); + output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands); + output_asm_insn ("nop", xoperands); + } + /* Pure portable runtime doesn't allow be/ble; we also don't have + PIC support int he assembler/linker, so this sequence is needed. */ + else if (TARGET_PORTABLE_RUNTIME) + { + xoperands[0] = call_dest; + /* Get the address of our target into %r29. */ + output_asm_insn ("ldil L%%%0,%%r29", xoperands); + output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands); + + /* Get our return address into %r31. */ + output_asm_insn ("blr 0,%%r31", xoperands); + + /* Jump to our target address in %r29. */ + output_asm_insn ("bv,n 0(%%r29)", xoperands); + + /* Empty delay slot. Note this insn gets fetched twice and + executed once. To be safe we use a nop. */ + output_asm_insn ("nop", xoperands); + return ""; + } + /* PIC long millicode call sequence. */ + else + { + xoperands[0] = call_dest; + xoperands[1] = gen_label_rtx (); + /* Get our address + 8 into %r1. */ + output_asm_insn ("bl .+8,%%r1", xoperands); + + /* Add %r1 to the offset of our target from the next insn. */ + output_asm_insn ("addil L%%%0-%1,%%r1", xoperands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands); + + /* Get the return address into %r31. */ + output_asm_insn ("blr 0,%%r31", xoperands); + + /* Branch to our target which is in %r1. */ + output_asm_insn ("bv,n 0(%%r1)", xoperands); + + /* Empty delay slot. Note this insn gets fetched twice and + executed once. To be safe we use a nop. */ + output_asm_insn ("nop", xoperands); + } + + /* If we had a jump in the call's delay slot, output it now. */ + if (dbr_sequence_length () != 0 + && !delay_insn_deleted) + { + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + output_asm_insn ("b,n %0", xoperands); + + /* Now delete the delay insn. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + } + return ""; + } + + /* This call has an unconditional jump in its delay slot and the + call is known to reach its target or the beginning of the current + subspace. */ + + /* Use the containing sequence insn's address. */ + seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); + + distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))] + - insn_addresses[INSN_UID (seq_insn)] - 8; + + /* If the branch was too far away, emit a normal call followed + by a nop, followed by the unconditional branch. + + If the branch is close, then adjust %r2 from within the + call's delay slot. */ + + xoperands[0] = call_dest; + xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + if (! VAL_14_BITS_P (distance)) + output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands); + else + { + xoperands[3] = gen_label_rtx (); + output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[3])); + } + + /* Delete the jump. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + return ""; +} + +extern struct obstack permanent_obstack; +extern struct obstack *saveable_obstack; +extern struct obstack *rtl_obstack; +extern struct obstack *current_obstack; + +/* INSN is either a function call. It may have an unconditional jump + in its delay slot. + + CALL_DEST is the routine we are calling. */ + +char * +output_call (insn, call_dest) + rtx insn; + rtx call_dest; +{ + int distance; + rtx xoperands[4]; + rtx seq_insn; + + /* Handle common case -- empty delay slot or no jump in the delay slot, + and we're sure that the branch will reach the beginning of the $CODE$ + subspace. */ + if ((dbr_sequence_length () == 0 + && get_attr_length (insn) == 8) + || (dbr_sequence_length () != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && get_attr_length (insn) == 4)) + { + xoperands[0] = call_dest; + output_asm_insn ("bl %0,%%r2%#", xoperands); + return ""; + } + + /* This call may not reach the beginning of the $CODE$ subspace. */ + if (get_attr_length (insn) > 8) + { + int delay_insn_deleted = 0; + rtx xoperands[2]; + rtx link; + + /* We need to emit an inline long-call branch. Furthermore, + because we're changing a named function call into an indirect + function call well after the parameters have been set up, we + need to make sure any FP args appear in both the integer + and FP registers. Also, we need move any delay slot insn + out of the delay slot. And finally, we can't rely on the linker + being able to fix the call to $$dyncall! -- Yuk!. */ + if (dbr_sequence_length () != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) + { + /* A non-jump insn in the delay slot. By definition we can + emit this insn before the call (and in fact before argument + relocating. */ + final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); + + /* Now delete the delay insn. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + delay_insn_deleted = 1; + } + + /* Now copy any FP arguments into integer registers. */ + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + int arg_mode, regno; + rtx use = XEXP (link, 0); + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + /* Is it a floating point register? */ + if (regno >= 32 && regno <= 39) + { + /* Copy from the FP register into an integer register + (via memory). */ + if (arg_mode == SFmode) + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); + output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + else + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); + output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + } + } + + /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since + we don't have any direct calls in that case. */ + { + int i; + char *name = XSTR (call_dest, 0); + + /* See if we have already put this function on the list + of deferred plabels. This list is generally small, + so a liner search is not too ugly. If it proves too + slow replace it with something faster. */ + for (i = 0; i < n_deferred_plabels; i++) + if (strcmp (name, deferred_plabels[i].name) == 0) + break; + + /* If the deferred plabel list is empty, or this entry was + not found on the list, create a new entry on the list. */ + if (deferred_plabels == NULL || i == n_deferred_plabels) + { + struct obstack *ambient_obstack = current_obstack; + struct obstack *ambient_rtl_obstack = rtl_obstack; + char *real_name; + + /* Any RTL we create here needs to live until the end of + the compilation unit and therefore must live on the + permanent obstack. */ + current_obstack = &permanent_obstack; + rtl_obstack = &permanent_obstack; + + if (deferred_plabels == 0) + deferred_plabels = (struct deferred_plabel *) + xmalloc (1 * sizeof (struct deferred_plabel)); + else + deferred_plabels = (struct deferred_plabel *) + xrealloc (deferred_plabels, + ((n_deferred_plabels + 1) + * sizeof (struct deferred_plabel))); + + i = n_deferred_plabels++; + deferred_plabels[i].internal_label = gen_label_rtx (); + deferred_plabels[i].name = obstack_alloc (&permanent_obstack, + strlen (name) + 1); + strcpy (deferred_plabels[i].name, name); + + /* Switch back to normal obstack allocation. */ + current_obstack = ambient_obstack; + rtl_obstack = ambient_rtl_obstack; + + /* Gross. We have just implicitly taken the address of this + function, mark it as such. */ + STRIP_NAME_ENCODING (real_name, name); + TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1; + } + + /* We have to load the address of the function using a procedure + label (plabel). Inline plabels can lose for PIC and other + cases, so avoid them by creating a 32bit plabel in the data + segment. */ + if (flag_pic) + { + xoperands[0] = deferred_plabels[i].internal_label; + xoperands[1] = gen_label_rtx (); + + output_asm_insn ("addil LT%%%0,%%r19", xoperands); + output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands); + output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands); + + /* Get our address + 8 into %r1. */ + output_asm_insn ("bl .+8,%%r1", xoperands); + + /* Add %r1 to the offset of dyncall from the next insn. */ + output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[1])); + output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands); + + /* Get the return address into %r31. */ + output_asm_insn ("blr 0,%%r31", xoperands); + + /* Branch to our target which is in %r1. */ + output_asm_insn ("bv 0(%%r1)", xoperands); + + /* Copy the return address into %r2 also. */ + output_asm_insn ("copy %%r31,%%r2", xoperands); + } + else + { + xoperands[0] = deferred_plabels[i].internal_label; + + /* Get the address of our target into %r22. */ + output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands); + output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands); + + /* Get the high part of the address of $dyncall into %r2, then + add in the low part in the branch instruction. */ + output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands); + output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands); + + /* Copy the return pointer into both %r31 and %r2. */ + output_asm_insn ("copy %%r31,%%r2", xoperands); + } + } + + /* If we had a jump in the call's delay slot, output it now. */ + if (dbr_sequence_length () != 0 + && !delay_insn_deleted) + { + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + output_asm_insn ("b,n %0", xoperands); + + /* Now delete the delay insn. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + } + return ""; + } + + /* This call has an unconditional jump in its delay slot and the + call is known to reach its target or the beginning of the current + subspace. */ + + /* Use the containing sequence insn's address. */ + seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); + + distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))] + - insn_addresses[INSN_UID (seq_insn)] - 8; + + /* If the branch was too far away, emit a normal call followed + by a nop, followed by the unconditional branch. + + If the branch is close, then adjust %r2 from within the + call's delay slot. */ + + xoperands[0] = call_dest; + xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + if (! VAL_14_BITS_P (distance)) + output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands); + else + { + xoperands[3] = gen_label_rtx (); + output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (xoperands[3])); + } + + /* Delete the jump. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + return ""; +} + +/* In HPUX 8.0's shared library scheme, special relocations are needed + for function labels if they might be passed to a function + in a shared library (because shared libraries don't live in code + space), and special magic is needed to construct their address. + + For reasons too disgusting to describe storage for the new name + is allocated either on the saveable_obstack (released at function + exit) or on the permanent_obstack for things that can never change + (libcall names for example). */ + +void +hppa_encode_label (sym, permanent) + rtx sym; + int permanent; +{ + char *str = XSTR (sym, 0); + int len = strlen (str); + char *newstr; + + newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack), + len + 2); + + if (str[0] == '*') + *newstr++ = *str++; + strcpy (newstr + 1, str); + *newstr = '@'; + XSTR (sym,0) = newstr; +} + +int +function_label_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0)); +} + +/* Returns 1 if OP is a function label involved in a simple addition + with a constant. Used to keep certain patterns from matching + during instruction combination. */ +int +is_function_label_plus_const (op) + rtx op; +{ + /* Strip off any CONST. */ + if (GET_CODE (op) == CONST) + op = XEXP (op, 0); + + return (GET_CODE (op) == PLUS + && function_label_operand (XEXP (op, 0), Pmode) + && GET_CODE (XEXP (op, 1)) == CONST_INT); +} + +/* Returns 1 if the 6 operands specified in OPERANDS are suitable for + use in fmpyadd instructions. */ +int +fmpyaddoperands (operands) + rtx *operands; +{ + enum machine_mode mode = GET_MODE (operands[0]); + + /* Must be a floating point mode. */ + if (mode != SFmode && mode != DFmode) + return 0; + + /* All modes must be the same. */ + if (! (mode == GET_MODE (operands[1]) + && mode == GET_MODE (operands[2]) + && mode == GET_MODE (operands[3]) + && mode == GET_MODE (operands[4]) + && mode == GET_MODE (operands[5]))) + return 0; + + /* All operands must be registers. */ + if (! (GET_CODE (operands[1]) == REG + && GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[4]) == REG + && GET_CODE (operands[5]) == REG)) + return 0; + + /* Only 2 real operands to the addition. One of the input operands must + be the same as the output operand. */ + if (! rtx_equal_p (operands[3], operands[4]) + && ! rtx_equal_p (operands[3], operands[5])) + return 0; + + /* Inout operand of add can not conflict with any operands from multiply. */ + if (rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2])) + return 0; + + /* multiply can not feed into addition operands. */ + if (rtx_equal_p (operands[4], operands[0]) + || rtx_equal_p (operands[5], operands[0])) + return 0; + + /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ + if (mode == SFmode + && (REGNO (operands[0]) < 57 + || REGNO (operands[1]) < 57 + || REGNO (operands[2]) < 57 + || REGNO (operands[3]) < 57 + || REGNO (operands[4]) < 57 + || REGNO (operands[5]) < 57)) + return 0; + + /* Passed. Operands are suitable for fmpyadd. */ + return 1; +} + +/* Returns 1 if the 6 operands specified in OPERANDS are suitable for + use in fmpysub instructions. */ +int +fmpysuboperands (operands) + rtx *operands; +{ + enum machine_mode mode = GET_MODE (operands[0]); + + /* Must be a floating point mode. */ + if (mode != SFmode && mode != DFmode) + return 0; + + /* All modes must be the same. */ + if (! (mode == GET_MODE (operands[1]) + && mode == GET_MODE (operands[2]) + && mode == GET_MODE (operands[3]) + && mode == GET_MODE (operands[4]) + && mode == GET_MODE (operands[5]))) + return 0; + + /* All operands must be registers. */ + if (! (GET_CODE (operands[1]) == REG + && GET_CODE (operands[2]) == REG + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[4]) == REG + && GET_CODE (operands[5]) == REG)) + return 0; + + /* Only 2 real operands to the subtraction. Subtraction is not a commutative + operation, so operands[4] must be the same as operand[3]. */ + if (! rtx_equal_p (operands[3], operands[4])) + return 0; + + /* multiply can not feed into subtraction. */ + if (rtx_equal_p (operands[5], operands[0])) + return 0; + + /* Inout operand of sub can not conflict with any operands from multiply. */ + if (rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2])) + return 0; + + /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ + if (mode == SFmode + && (REGNO (operands[0]) < 57 + || REGNO (operands[1]) < 57 + || REGNO (operands[2]) < 57 + || REGNO (operands[3]) < 57 + || REGNO (operands[4]) < 57 + || REGNO (operands[5]) < 57)) + return 0; + + /* Passed. Operands are suitable for fmpysub. */ + return 1; +} + +int +plus_xor_ior_operator (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR + || GET_CODE (op) == IOR); +} + +/* Return 1 if the given constant is 2, 4, or 8. These are the valid + constants for shadd instructions. */ +static int +shadd_constant_p (val) + int val; +{ + if (val == 2 || val == 4 || val == 8) + return 1; + else + return 0; +} + +/* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are + the valid constant for shadd instructions. */ +int +shadd_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op))); +} + +/* Return 1 if OP is valid as a base register in a reg + reg address. */ + +int +basereg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + /* cse will create some unscaled indexed addresses, however; it + generally isn't a win on the PA, so avoid creating unscaled + indexed addresses until after cse is finished. */ + if (!cse_not_expected) + return 0; + + /* Once reload has started everything is considered valid. Reload should + only create indexed addresses using the stack/frame pointer, and any + others were checked for validity when created by the combine pass. + + Also allow any register when TARGET_NO_SPACE_REGS is in effect since + we don't have to worry about the braindamaged implicit space register + selection using the basereg only (rather than effective address) + screwing us over. */ + if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed) + return (GET_CODE (op) == REG); + + /* Stack is always OK for indexing. */ + if (op == stack_pointer_rtx) + return 1; + + /* While it's always safe to index off the frame pointer, it's not + always profitable, particularly when the frame pointer is being + eliminated. */ + if (! flag_omit_frame_pointer && op == frame_pointer_rtx) + return 1; + + /* The only other valid OPs are pseudo registers with + REGNO_POINTER_FLAG set. */ + if (GET_CODE (op) != REG + || REGNO (op) < FIRST_PSEUDO_REGISTER + || ! register_operand (op, mode)) + return 0; + + return REGNO_POINTER_FLAG (REGNO (op)); +} + +/* Return 1 if this operand is anything other than a hard register. */ + +int +non_hard_reg_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER); +} + +/* Return 1 if INSN branches forward. Should be using insn_addresses + to avoid walking through all the insns... */ +static int +forward_branch_p (insn) + rtx insn; +{ + rtx label = JUMP_LABEL (insn); + + while (insn) + { + if (insn == label) + break; + else + insn = NEXT_INSN (insn); + } + + return (insn == label); +} + +/* Return 1 if OP is an equality comparison, else return 0. */ +int +eq_neq_comparison_operator (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == EQ || GET_CODE (op) == NE); +} + +/* Return 1 if OP is an operator suitable for use in a movb instruction. */ +int +movb_comparison_operator (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return (GET_CODE (op) == EQ || GET_CODE (op) == NE + || GET_CODE (op) == LT || GET_CODE (op) == GE); +} + +/* Return 1 if INSN is in the delay slot of a call instruction. */ +int +jump_in_call_delay (insn) + rtx insn; +{ + + if (GET_CODE (insn) != JUMP_INSN) + return 0; + + if (PREV_INSN (insn) + && PREV_INSN (PREV_INSN (insn)) + && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) + { + rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn))); + + return (GET_CODE (PATTERN (test_insn)) == SEQUENCE + && XVECEXP (PATTERN (test_insn), 0, 1) == insn); + + } + else + return 0; +} + +/* Output an unconditional move and branch insn. */ + +char * +output_parallel_movb (operands, length) + rtx *operands; + int length; +{ + /* These are the cases in which we win. */ + if (length == 4) + return "mov%I1b,tr %1,%0,%2"; + + /* None of these cases wins, but they don't lose either. */ + if (dbr_sequence_length () == 0) + { + /* Nothing in the delay slot, fake it by putting the combined + insn (the copy or add) in the delay slot of a bl. */ + if (GET_CODE (operands[1]) == CONST_INT) + return "bl %2,0\n\tldi %1,%0"; + else + return "bl %2,0\n\tcopy %1,%0"; + } + else + { + /* Something in the delay slot, but we've got a long branch. */ + if (GET_CODE (operands[1]) == CONST_INT) + return "ldi %1,%0\n\tbl %2,0"; + else + return "copy %1,%0\n\tbl %2,0"; + } +} + +/* Output an unconditional add and branch insn. */ + +char * +output_parallel_addb (operands, length) + rtx *operands; + int length; +{ + /* To make life easy we want operand0 to be the shared input/output + operand and operand1 to be the readonly operand. */ + if (operands[0] == operands[1]) + operands[1] = operands[2]; + + /* These are the cases in which we win. */ + if (length == 4) + return "add%I1b,tr %1,%0,%3"; + + /* None of these cases win, but they don't lose either. */ + if (dbr_sequence_length () == 0) + { + /* Nothing in the delay slot, fake it by putting the combined + insn (the copy or add) in the delay slot of a bl. */ + return "bl %3,0\n\tadd%I1 %1,%0,%0"; + } + else + { + /* Something in the delay slot, but we've got a long branch. */ + return "add%I1 %1,%0,%0\n\tbl %3,0"; + } +} + +/* Return nonzero if INSN (a jump insn) immediately follows a call to + a named function. This is used to discourage creating parallel movb/addb + insns since a jump which immediately follows a call can execute in the + delay slot of the call. + + It is also used to avoid filling the delay slot of a jump which + immediately follows a call since the jump can usually be eliminated + completely by modifying RP in the delay slot of the call. */ + +int +following_call (insn) + rtx insn; +{ + /* CYGNUS LOCAL PA8000/law */ + /* We do not parallel movb,addb or place jumps into call delay slots when + optimizing for the PA8000. */ + if (pa_cpu != PROCESSOR_8000) + return 0; + /* END CYGNUS LOCAL */ + + /* Find the previous real insn, skipping NOTEs. */ + insn = PREV_INSN (insn); + while (insn && GET_CODE (insn) == NOTE) + insn = PREV_INSN (insn); + + /* Check for CALL_INSNs and millicode calls. */ + if (insn + && ((GET_CODE (insn) == CALL_INSN + && get_attr_type (insn) != TYPE_DYNCALL) + || (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) != SEQUENCE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI))) + return 1; + + return 0; +} + +/* Restore any INSN_CODEs for insns with unscaled indexed addresses since + the INSN_CODE might be clobberd by rerecognition triggered by reorg. */ + +static void +restore_unscaled_index_insn_codes (insns) + rtx insns; +{ + rtx insn; + + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + if (INSN_UID (insn) < max_unscaled_index_insn_codes_uid + && unscaled_index_insn_codes[INSN_UID (insn)] != -1) + INSN_CODE (insn) = unscaled_index_insn_codes[INSN_UID (insn)]; + } +} + +/* Severe braindamage: + + On the PA, address computations within MEM expressions are not + commutative because of the implicit space register selection + from the base register (instead of the entire effective address). + + Because of this mis-feature we have to know which register in a reg+reg + address is the base and which is the index. + + Before reload, the base can be identified by REGNO_POINTER_FLAG. We use + this to force base + index addresses to match a different insn than + index + base addresses. + + We assume that no pass during or after reload creates new unscaled indexed + addresses, so any unscaled indexed address we find after reload must have + at one time been recognized a base + index or index + base and we accept + any register as a base register. + + This scheme assumes that no pass during/after reload will rerecognize an + insn with an unscaled indexed address. This failed due to a reorg call + to rerecognize certain insns. + + So, we record if an insn uses an unscaled indexed address and which + register is the base (via recording of the INSN_CODE for such insns). + + Just before we output code for the function, we make sure all the insns + using unscaled indexed addresses have the same INSN_CODE as they did + immediately before delay slot scheduling. + + This is extremely gross. Long term, I'd like to be able to look at + REG_POINTER_FLAG to handle these kinds of problems. */ + +static void +record_unscaled_index_insn_codes (insns) + rtx insns; +{ + rtx insn; + + max_unscaled_index_insn_codes_uid = get_max_uid (); + unscaled_index_insn_codes + = (int *)xmalloc (max_unscaled_index_insn_codes_uid * sizeof (int)); + memset (unscaled_index_insn_codes, -1, + max_unscaled_index_insn_codes_uid * sizeof (int)); + + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + rtx set = single_set (insn); + rtx mem = NULL_RTX; + + /* Ignore anything that isn't a normal SET. */ + if (set == NULL_RTX) + continue; + + /* No insns can have more than one MEM. */ + if (GET_CODE (SET_SRC (set)) == MEM) + mem = SET_SRC (set); + + if (GET_CODE (SET_DEST (set)) == MEM) + mem = SET_DEST (set); + + /* If neither operand is a mem, then there's nothing to do. */ + if (mem == NULL_RTX) + continue; + + if (GET_CODE (XEXP (mem, 0)) != PLUS) + continue; + + /* If both are REGs (or SUBREGs), then record the insn code for + this insn. */ + if (REG_P (XEXP (XEXP (mem, 0), 0)) && REG_P (XEXP (XEXP (mem, 0), 1))) + unscaled_index_insn_codes[INSN_UID (insn)] = INSN_CODE (insn); + } +} + +/* We use this hook to perform a PA specific optimization which is difficult + to do in earlier passes. + + We want the delay slots of branches within jump tables to be filled. + None of the compiler passes at the moment even has the notion that a + PA jump table doesn't contain addresses, but instead contains actual + instructions! + + Because we actually jump into the table, the addresses of each entry + must stay constant in relation to the beginning of the table (which + itself must stay constant relative to the instruction to jump into + it). I don't believe we can guarantee earlier passes of the compiler + will adhere to those rules. + + So, late in the compilation process we find all the jump tables, and + expand them into real code -- eg each entry in the jump table vector + will get an appropriate label followed by a jump to the final target. + + Reorg and the final jump pass can then optimize these branches and + fill their delay slots. We end up with smaller, more efficient code. + + The jump instructions within the table are special; we must be able + to identify them during assembly output (if the jumps don't get filled + we need to emit a nop rather than nullifying the delay slot)). We + identify jumps in switch tables by marking the SET with DImode. + + We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB + insns. This serves two purposes, first it prevents jump.c from + noticing that the last N entries in the table jump to the instruction + immediately after the table and deleting the jumps. Second, those + insns mark where we should emit .begin_brtab and .end_brtab directives + when using GAS (allows for better link time optimizations). */ + +void +pa_reorg (insns) + rtx insns; +{ + rtx insn; + + /* Keep track of which insns have unscaled indexed addresses, and which + register is the base address in such insns. */ + record_unscaled_index_insn_codes (insns); + + remove_useless_addtr_insns (insns, 1); + + /* CYGNUS LOCAL PA8000/law */ + /* These optimizations hurt PA8000 performance. */ + if (pa_cpu != PROCESSOR_8000) + pa_combine_instructions (get_insns ()); + /* END CYGNUS LOCAL */ + + /* This is fairly cheap, so always run it if optimizing. */ + if (optimize > 0 && !TARGET_BIG_SWITCH) + { + /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ + insns = get_insns (); + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + rtx pattern, tmp, location; + unsigned int length, i; + + /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ + if (GET_CODE (insn) != JUMP_INSN + || (GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) + continue; + + /* Emit marker for the beginning of the branch table. */ + emit_insn_before (gen_begin_brtab (), insn); + + pattern = PATTERN (insn); + location = PREV_INSN (insn); + length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); + + for (i = 0; i < length; i++) + { + /* Emit a label before each jump to keep jump.c from + removing this code. */ + tmp = gen_label_rtx (); + LABEL_NUSES (tmp) = 1; + emit_label_after (tmp, location); + location = NEXT_INSN (location); + + if (GET_CODE (pattern) == ADDR_VEC) + { + /* Emit the jump itself. */ + tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0)); + tmp = emit_jump_insn_after (tmp, location); + JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0); + /* It is easy to rely on the branch table markers + during assembly output to trigger the correct code + for a switch table jump with an unfilled delay slot, + + However, that requires state and assumes that we look + at insns in order. + + We can't make such assumptions when computing the length + of instructions. Ugh. We could walk the insn chain to + determine if this instruction is in a branch table, but + that can get rather expensive, particularly during the + branch shortening phase of the compiler. + + So instead we mark this jump as being special. This is + far from ideal and knows that no code after this will + muck around with the mode of the JUMP_INSN itself. */ + PUT_MODE (tmp, SImode); + LABEL_NUSES (JUMP_LABEL (tmp))++; + location = NEXT_INSN (location); + } + else + { + /* Emit the jump itself. */ + tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0)); + tmp = emit_jump_insn_after (tmp, location); + JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0); + /* It is easy to rely on the branch table markers + during assembly output to trigger the correct code + for a switch table jump with an unfilled delay slot, + + However, that requires state and assumes that we look + at insns in order. + + We can't make such assumptions when computing the length + of instructions. Ugh. We could walk the insn chain to + determine if this instruction is in a branch table, but + that can get rather expensive, particularly during the + branch shortening phase of the compiler. + + So instead we mark this jump as being special. This is + far from ideal and knows that no code after this will + muck around with the mode of the JUMP_INSN itself. */ + PUT_MODE (tmp, SImode); + LABEL_NUSES (JUMP_LABEL (tmp))++; + location = NEXT_INSN (location); + } + + /* Emit a BARRIER after the jump. */ + emit_barrier_after (location); + location = NEXT_INSN (location); + } + + /* Emit marker for the end of the branch table. */ + emit_insn_before (gen_end_brtab (), location); + location = NEXT_INSN (location); + emit_barrier_after (location); + + /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ + delete_insn (insn); + } + } + else + { + /* Sill need an end_brtab insn. */ + insns = get_insns (); + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + /* Find an ADDR_VEC insn. */ + if (GET_CODE (insn) != JUMP_INSN + || (GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) + continue; + + /* Now generate markers for the beginning and end of the + branch table. */ + emit_insn_before (gen_begin_brtab (), insn); + emit_insn_after (gen_end_brtab (), insn); + } + } +} + +/* The PA has a number of odd instructions which can perform multiple + tasks at once. On first generation PA machines (PA1.0 and PA1.1) + it may be profitable to combine two instructions into one instruction + with two outputs. It's not profitable PA2.0 machines because the + two outputs would take two slots in the reorder buffers. + + This routine finds instructions which can be combined and combines + them. We only support some of the potential combinations, and we + only try common ways to find suitable instructions. + + * addb can add two registers or a register and a small integer + and jump to a nearby (+-8k) location. Normally the jump to the + nearby location is conditional on the result of the add, but by + using the "true" condition we can make the jump unconditional. + Thus addb can perform two independent operations in one insn. + + * movb is similar to addb in that it can perform a reg->reg + or small immediate->reg copy and jump to a nearby (+-8k location). + + * fmpyadd and fmpysub can perform a FP multiply and either an + FP add or FP sub if the operands of the multiply and add/sub are + independent (there are other minor restrictions). Note both + the fmpy and fadd/fsub can in theory move to better spots according + to data dependencies, but for now we require the fmpy stay at a + fixed location. + + * Many of the memory operations can perform pre & post updates + of index registers. GCC's pre/post increment/decrement addressing + is far too simple to take advantage of all the possibilities. This + pass may not be suitable since those insns may not be independent. + + * comclr can compare two ints or an int and a register, nullify + the following instruction and zero some other register. This + is more difficult to use as it's harder to find an insn which + will generate a comclr than finding something like an unconditional + branch. (conditional moves & long branches create comclr insns). + + * Most arithmetic operations can conditionally skip the next + instruction. They can be viewed as "perform this operation + and conditionally jump to this nearby location" (where nearby + is an insns away). These are difficult to use due to the + branch length restrictions. */ + +static void +pa_combine_instructions (insns) + rtx insns ATTRIBUTE_UNUSED; +{ + rtx anchor, new; + + /* This can get expensive since the basic algorithm is on the + order of O(n^2) (or worse). Only do it for -O2 or higher + levels of optimization. */ + if (optimize < 2) + return; + + /* Walk down the list of insns looking for "anchor" insns which + may be combined with "floating" insns. As the name implies, + "anchor" instructions don't move, while "floating" insns may + move around. */ + new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); + new = make_insn_raw (new); + + for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) + { + enum attr_pa_combine_type anchor_attr; + enum attr_pa_combine_type floater_attr; + + /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. + Also ignore any special USE insns. */ + if ((GET_CODE (anchor) != INSN + && GET_CODE (anchor) != JUMP_INSN + && GET_CODE (anchor) != CALL_INSN) + || GET_CODE (PATTERN (anchor)) == USE + || GET_CODE (PATTERN (anchor)) == CLOBBER + || GET_CODE (PATTERN (anchor)) == ADDR_VEC + || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) + continue; + + anchor_attr = get_attr_pa_combine_type (anchor); + /* See if anchor is an insn suitable for combination. */ + if (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB + || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && ! forward_branch_p (anchor))) + { + rtx floater; + + for (floater = PREV_INSN (anchor); + floater; + floater = PREV_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + + else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH + && floater_attr == PA_COMBINE_TYPE_ADDMOVE) + { + if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) + { + if (pa_can_combine_p (new, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN (floater)), 0), + XEXP (SET_SRC (PATTERN (floater)), 1))) + break; + } + else + { + if (pa_can_combine_p (new, anchor, floater, 0, + SET_DEST (PATTERN (floater)), + SET_SRC (PATTERN (floater)), + SET_SRC (PATTERN (floater)))) + break; + } + } + } + + /* If we didn't find anything on the backwards scan try forwards. */ + if (!floater + && (anchor_attr == PA_COMBINE_TYPE_FMPY + || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) + { + for (floater = anchor; floater; floater = NEXT_INSN (floater)) + { + if (GET_CODE (floater) == NOTE + || (GET_CODE (floater) == INSN + && (GET_CODE (PATTERN (floater)) == USE + || GET_CODE (PATTERN (floater)) == CLOBBER))) + + continue; + + /* Anything except a regular INSN will stop our search. */ + if (GET_CODE (floater) != INSN + || GET_CODE (PATTERN (floater)) == ADDR_VEC + || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) + { + floater = NULL_RTX; + break; + } + + /* See if FLOATER is suitable for combination with the + anchor. */ + floater_attr = get_attr_pa_combine_type (floater); + if ((anchor_attr == PA_COMBINE_TYPE_FMPY + && floater_attr == PA_COMBINE_TYPE_FADDSUB) + || (anchor_attr == PA_COMBINE_TYPE_FADDSUB + && floater_attr == PA_COMBINE_TYPE_FMPY)) + { + /* If ANCHOR and FLOATER can be combined, then we're + done with this pass. */ + if (pa_can_combine_p (new, anchor, floater, 1, + SET_DEST (PATTERN (floater)), + XEXP (SET_SRC (PATTERN(floater)),0), + XEXP(SET_SRC(PATTERN(floater)),1))) + break; + } + } + } + + /* FLOATER will be nonzero if we found a suitable floating + insn for combination with ANCHOR. */ + if (floater + && (anchor_attr == PA_COMBINE_TYPE_FADDSUB + || anchor_attr == PA_COMBINE_TYPE_FMPY)) + { + /* Emit the new instruction and delete the old anchor. */ + emit_insn_before (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, + PATTERN (anchor), + PATTERN (floater))), + anchor); + PUT_CODE (anchor, NOTE); + NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (anchor) = 0; + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); + delete_insn (floater); + + continue; + } + else if (floater + && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) + { + rtx temp; + /* Emit the new_jump instruction and delete the old anchor. */ + temp = emit_jump_insn_before (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, PATTERN (anchor), + PATTERN (floater))), + anchor); + JUMP_LABEL (temp) = JUMP_LABEL (anchor); + PUT_CODE (anchor, NOTE); + NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (anchor) = 0; + + /* Emit a special USE insn for FLOATER, then delete + the floating insn. */ + emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); + delete_insn (floater); + continue; + } + } + } +} + +int +pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2) + rtx new, anchor, floater; + int reversed; + rtx dest, src1, src2; +{ + int insn_code_number; + rtx start, end; + + /* Create a PARALLEL with the patterns of ANCHOR and + FLOATER, try to recognize it, then test constraints + for the resulting pattern. + + If the pattern doesn't match or the constraints + aren't met keep searching for a suitable floater + insn. */ + XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor); + XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater); + INSN_CODE (new) = -1; + insn_code_number = recog_memoized (new); + if (insn_code_number < 0 + || !constrain_operands (insn_code_number, 1)) + return 0; + + if (reversed) + { + start = anchor; + end = floater; + } + else + { + start = floater; + end = anchor; + } + + /* There's up to three operands to consider. One + output and two inputs. + + The output must not be used between FLOATER & ANCHOR + exclusive. The inputs must not be set between + FLOATER and ANCHOR exclusive. */ + + if (reg_used_between_p (dest, start, end)) + return 0; + + if (reg_set_between_p (src1, start, end)) + return 0; + + if (reg_set_between_p (src2, start, end)) + return 0; + + /* If we get here, then everything is good. */ + return 1; +} + +/* Return nonzero if sets and references for INSN are delayed. + + Millicode insns are actually function calls with some special + constraints on arguments and register usage. + + Millicode calls always expect their arguments in the integer argument + registers, and always return their result in %r29 (ret1). They + are expected to clobber their arguments, %r1, %r29, and %r31 and + nothing else. + + By considering this effects delayed reorg reorg can put insns + which set the argument registers into the delay slot of the millicode + call -- thus they act more like traditional CALL_INSNs. + + get_attr_type will try to recognize the given insn, so make sure to + filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns + in particular. */ +int +insn_sets_and_refs_are_delayed (insn) + rtx insn; +{ + return ((GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) != SEQUENCE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_type (insn) == TYPE_MILLI)); +} |