diff options
author | YamaArashi <shadow962@live.com> | 2016-01-06 01:47:28 -0800 |
---|---|---|
committer | YamaArashi <shadow962@live.com> | 2016-01-06 01:47:28 -0800 |
commit | be8b04496302184c6e8f04d6179f9c3afc50aeb6 (patch) | |
tree | 726e2468c0c07add773c0dbd86ab6386844259ae /gcc/config/sh/sh.c |
initial commit
Diffstat (limited to 'gcc/config/sh/sh.c')
-rwxr-xr-x | gcc/config/sh/sh.c | 4786 |
1 files changed, 4786 insertions, 0 deletions
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c new file mode 100755 index 0000000..4d4b5cd --- /dev/null +++ b/gcc/config/sh/sh.c @@ -0,0 +1,4786 @@ +/* Output routines for GCC for Hitachi Super-H. + Copyright (C) 1993-1998 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Contributed by Steve Chamberlain (sac@cygnus.com). + Improved by Jim Wilson (wilson@cygnus.com). */ + +#include "config.h" + +#include <stdio.h> + +#include "rtl.h" +#include "tree.h" +#include "flags.h" +#include "insn-flags.h" +#include "expr.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "output.h" +#include "insn-attr.h" + +int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; + +#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) +#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) + +/* ??? The pragma interrupt support will not work for SH3. */ +/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to + output code for the next function appropriate for an interrupt handler. */ +int pragma_interrupt; + +/* This is set by the trap_exit attribute for functions. It specifies + a trap number to be used in a trapa instruction at function exit + (instead of an rte instruction). */ +int trap_exit; + +/* This is used by the sp_switch attribute for functions. It specifies + a variable holding the address of the stack the interrupt function + should switch to/from at entry/exit. */ +rtx sp_switch; + +/* This is set by #pragma trapa, and is similar to the above, except that + the compiler doesn't emit code to preserve all registers. */ +static int pragma_trapa; + +/* This is set by #pragma nosave_low_regs. This is useful on the SH3, + which has a separate set of low regs for User and Supervisor modes. + This should only be used for the lowest level of interrupts. Higher levels + of interrupts must save the registers in case they themselves are + interrupted. */ +int pragma_nosave_low_regs; + +/* This is used for communication between SETUP_INCOMING_VARARGS and + sh_expand_prologue. */ +int current_function_anonymous_args; + +/* Global variables from toplev.c and final.c that are used within, but + not declared in any header file. */ +extern char *version_string; +extern int *insn_addresses; + +/* Global variables for machine-dependent things. */ + +/* Which cpu are we scheduling for. */ +enum processor_type sh_cpu; + +/* Saved operands from the last compare to use when we generate an scc + or bcc insn. */ + +rtx sh_compare_op0; +rtx sh_compare_op1; + +enum machine_mode sh_addr_diff_vec_mode; + +/* Provides the class number of the smallest class containing + reg number. */ + +int regno_reg_class[FIRST_PSEUDO_REGISTER] = +{ + R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, PR_REGS, T_REGS, NO_REGS, + MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS, + FP0_REGS,FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + DF_REGS, DF_REGS, DF_REGS, DF_REGS, + DF_REGS, DF_REGS, DF_REGS, DF_REGS, + FPSCR_REGS, +}; + +char fp_reg_names[][5] = +{ + "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", + "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", + "fpul", + "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14", +}; + +/* Provide reg_class from a letter such as appears in the machine + description. */ + +enum reg_class reg_class_from_letter[] = +{ + /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS, + /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS, + /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS, + /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS, + /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS, + /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS, + /* y */ FPUL_REGS, /* z */ R0_REGS +}; + +int assembler_dialect; + +rtx get_fpscr_rtx (); +void emit_sf_insn (); +void emit_df_insn (); + +static void split_branches PROTO ((rtx)); + +/* Print the operand address in x to the stream. */ + +void +print_operand_address (stream, x) + FILE *stream; + rtx x; +{ + switch (GET_CODE (x)) + { + case REG: + case SUBREG: + fprintf (stream, "@%s", reg_names[true_regnum (x)]); + break; + + case PLUS: + { + rtx base = XEXP (x, 0); + rtx index = XEXP (x, 1); + + switch (GET_CODE (index)) + { + case CONST_INT: + fprintf (stream, "@(%d,%s)", INTVAL (index), + reg_names[true_regnum (base)]); + break; + + case REG: + case SUBREG: + { + int base_num = true_regnum (base); + int index_num = true_regnum (index); + + fprintf (stream, "@(r0,%s)", + reg_names[MAX (base_num, index_num)]); + break; + } + + default: + debug_rtx (x); + abort (); + } + } + break; + + case PRE_DEC: + fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); + break; + + case POST_INC: + fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); + break; + + default: + output_addr_const (stream, x); + break; + } +} + +/* Print operand x (an rtx) in assembler syntax to file stream + according to modifier code. + + '.' print a .s if insn needs delay slot + ',' print LOCAL_LABEL_PREFIX + '@' print trap, rte or rts depending upon pragma interruptness + '#' output a nop if there is nothing to put in the delay slot + 'O' print a constant without the # + 'R' print the LSW of a dp value - changes if in little endian + 'S' print the MSW of a dp value - changes if in little endian + 'T' print the next word of a dp value - same as 'R' in big endian mode. + 'o' output an operator. */ + +void +print_operand (stream, x, code) + FILE *stream; + rtx x; + int code; +{ + switch (code) + { + case '.': + if (final_sequence + && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) + fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); + break; + case ',': + fprintf (stream, "%s", LOCAL_LABEL_PREFIX); + break; + case '@': + { + int interrupt_handler; + + if ((lookup_attribute + ("interrupt_handler", + DECL_MACHINE_ATTRIBUTES (current_function_decl))) + != NULL_TREE) + interrupt_handler = 1; + else + interrupt_handler = 0; + + if (trap_exit) + fprintf (stream, "trapa #%d", trap_exit); + else if (interrupt_handler) + fprintf (stream, "rte"); + else + fprintf (stream, "rts"); + break; + } + case '#': + /* Output a nop if there's nothing in the delay slot. */ + if (dbr_sequence_length () == 0) + fprintf (stream, "\n\tnop"); + break; + case 'O': + output_addr_const (stream, x); + break; + case 'R': + fputs (reg_names[REGNO (x) + LSW], (stream)); + break; + case 'S': + fputs (reg_names[REGNO (x) + MSW], (stream)); + break; + case 'T': + /* Next word of a double. */ + switch (GET_CODE (x)) + { + case REG: + fputs (reg_names[REGNO (x) + 1], (stream)); + break; + case MEM: + if (GET_CODE (XEXP (x, 0)) != PRE_DEC + && GET_CODE (XEXP (x, 0)) != POST_INC) + x = adj_offsettable_operand (x, 4); + print_operand_address (stream, XEXP (x, 0)); + break; + } + break; + case 'o': + switch (GET_CODE (x)) + { + case PLUS: fputs ("add", stream); break; + case MINUS: fputs ("sub", stream); break; + case MULT: fputs ("mul", stream); break; + case DIV: fputs ("div", stream); break; + } + break; + default: + switch (GET_CODE (x)) + { + case REG: + if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG + && GET_MODE_SIZE (GET_MODE (x)) > 4) + fprintf ((stream), "d%s", reg_names[REGNO (x)]+1); + else + fputs (reg_names[REGNO (x)], (stream)); + break; + case MEM: + output_address (XEXP (x, 0)); + break; + default: + fputc ('#', stream); + output_addr_const (stream, x); + break; + } + break; + } +} + +/* Emit code to perform a block move. Choose the best method. + + OPERANDS[0] is the destination. + OPERANDS[1] is the source. + OPERANDS[2] is the size. + OPERANDS[3] is the alignment safe to use. */ + +int +expand_block_move (operands) + rtx *operands; +{ + int align = INTVAL (operands[3]); + int constp = (GET_CODE (operands[2]) == CONST_INT); + int bytes = (constp ? INTVAL (operands[2]) : 0); + + /* If it isn't a constant number of bytes, or if it doesn't have 4 byte + alignment, or if it isn't a multiple of 4 bytes, then fail. */ + if (! constp || align < 4 || (bytes % 4 != 0)) + return 0; + + if (TARGET_HARD_SH4) + { + if (bytes < 12) + return 0; + else if (bytes == 12) + { + tree entry_name; + rtx func_addr_rtx; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + + entry_name = get_identifier ("__movstrSI12_i4"); + + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + emit_insn (gen_block_move_real_i4 (func_addr_rtx)); + return 1; + } + else if (! TARGET_SMALLCODE) + { + tree entry_name; + rtx func_addr_rtx; + int dwords; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + rtx r6 = gen_rtx (REG, SImode, 6); + + entry_name = get_identifier (bytes & 4 + ? "__movstr_i4_odd" + : "__movstr_i4_even"); + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + + dwords = bytes >> 3; + emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); + emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); + return 1; + } + else + return 0; + } + if (bytes < 64) + { + char entry[30]; + tree entry_name; + rtx func_addr_rtx; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + + sprintf (entry, "__movstrSI%d", bytes); + entry_name = get_identifier (entry); + + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx (SYMBOL_REF, Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + emit_insn (gen_block_move_real (func_addr_rtx)); + return 1; + } + + /* This is the same number of bytes as a memcpy call, but to a different + less common function name, so this will occasionally use more space. */ + if (! TARGET_SMALLCODE) + { + tree entry_name; + rtx func_addr_rtx; + int final_switch, while_loop; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + rtx r6 = gen_rtx (REG, SImode, 6); + + entry_name = get_identifier ("__movstr"); + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx (SYMBOL_REF, Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + + /* r6 controls the size of the move. 16 is decremented from it + for each 64 bytes moved. Then the negative bit left over is used + as an index into a list of move instructions. e.g., a 72 byte move + would be set up with size(r6) = 14, for one iteration through the + big while loop, and a switch of -2 for the last part. */ + + final_switch = 16 - ((bytes / 4) % 16); + while_loop = ((bytes / 4) / 16 - 1) * 16; + emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); + emit_insn (gen_block_lump_real (func_addr_rtx)); + return 1; + } + + return 0; +} + +/* Prepare operands for a move define_expand; specifically, one of the + operands must be in a register. */ + +int +prepare_move_operands (operands, mode) + rtx operands[]; + enum machine_mode mode; +{ + if (! reload_in_progress && ! reload_completed) + { + /* Copy the source to a register if both operands aren't registers. */ + if (! register_operand (operands[0], mode) + && ! register_operand (operands[1], mode)) + operands[1] = copy_to_mode_reg (mode, operands[1]); + + /* This case can happen while generating code to move the result + of a library call to the target. Reject `st r0,@(rX,rY)' because + reload will fail to find a spill register for rX, since r0 is already + being used for the source. */ + else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0 + && GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG) + operands[1] = copy_to_mode_reg (mode, operands[1]); + } + + return 0; +} + +/* Prepare the operands for an scc instruction; make sure that the + compare has been done. */ +rtx +prepare_scc_operands (code) + enum rtx_code code; +{ + rtx t_reg = gen_rtx (REG, SImode, T_REG); + enum rtx_code oldcode = code; + enum machine_mode mode; + + /* First need a compare insn. */ + switch (code) + { + case NE: + /* It isn't possible to handle this case. */ + abort (); + case LT: + code = GT; + break; + case LE: + code = GE; + break; + case LTU: + code = GTU; + break; + case LEU: + code = GEU; + break; + } + if (code != oldcode) + { + rtx tmp = sh_compare_op0; + sh_compare_op0 = sh_compare_op1; + sh_compare_op1 = tmp; + } + + mode = GET_MODE (sh_compare_op0); + if (mode == VOIDmode) + mode = GET_MODE (sh_compare_op1); + + sh_compare_op0 = force_reg (mode, sh_compare_op0); + if ((code != EQ && code != NE + && (sh_compare_op1 != const0_rtx + || code == GTU || code == GEU || code == LTU || code == LEU)) + || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT) + sh_compare_op1 = force_reg (mode, sh_compare_op1); + + if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) + (mode == SFmode ? emit_sf_insn : emit_df_insn) + (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, + gen_rtx (SET, VOIDmode, t_reg, + gen_rtx (code, SImode, + sh_compare_op0, sh_compare_op1)), + gen_rtx (USE, VOIDmode, get_fpscr_rtx ())))); + else + emit_insn (gen_rtx (SET, VOIDmode, t_reg, + gen_rtx (code, SImode, sh_compare_op0, + sh_compare_op1))); + + return t_reg; +} + +/* Called from the md file, set up the operands of a compare instruction. */ + +void +from_compare (operands, code) + rtx *operands; + int code; +{ + enum machine_mode mode = GET_MODE (sh_compare_op0); + rtx insn; + if (mode == VOIDmode) + mode = GET_MODE (sh_compare_op1); + if (code != EQ + || mode == DImode + || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) + { + /* Force args into regs, since we can't use constants here. */ + sh_compare_op0 = force_reg (mode, sh_compare_op0); + if (sh_compare_op1 != const0_rtx + || code == GTU || code == GEU + || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) + sh_compare_op1 = force_reg (mode, sh_compare_op1); + } + if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE) + { + from_compare (operands, GT); + insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1); + } + else + insn = gen_rtx (SET, VOIDmode, + gen_rtx (REG, SImode, 18), + gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1)); + if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + insn = gen_rtx (PARALLEL, VOIDmode, + gen_rtvec (2, insn, + gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))); + (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); + } + else + emit_insn (insn); +} + +/* Functions to output assembly code. */ + +/* Return a sequence of instructions to perform DI or DF move. + + Since the SH cannot move a DI or DF in one instruction, we have + to take care when we see overlapping source and dest registers. */ + +char * +output_movedouble (insn, operands, mode) + rtx insn; + rtx operands[]; + enum machine_mode mode; +{ + rtx dst = operands[0]; + rtx src = operands[1]; + + if (GET_CODE (dst) == MEM + && GET_CODE (XEXP (dst, 0)) == PRE_DEC) + return "mov.l %T1,%0\n\tmov.l %1,%0"; + + if (register_operand (dst, mode) + && register_operand (src, mode)) + { + if (REGNO (src) == MACH_REG) + return "sts mach,%S0\n\tsts macl,%R0"; + + /* When mov.d r1,r2 do r2->r3 then r1->r2; + when mov.d r1,r0 do r1->r0 then r2->r1. */ + + if (REGNO (src) + 1 == REGNO (dst)) + return "mov %T1,%T0\n\tmov %1,%0"; + else + return "mov %1,%0\n\tmov %T1,%T0"; + } + else if (GET_CODE (src) == CONST_INT) + { + if (INTVAL (src) < 0) + output_asm_insn ("mov #-1,%S0", operands); + else + output_asm_insn ("mov #0,%S0", operands); + + return "mov %1,%R0"; + } + else if (GET_CODE (src) == MEM) + { + int ptrreg = -1; + int dreg = REGNO (dst); + rtx inside = XEXP (src, 0); + + if (GET_CODE (inside) == REG) + ptrreg = REGNO (inside); + else if (GET_CODE (inside) == SUBREG) + ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside); + else if (GET_CODE (inside) == PLUS) + { + ptrreg = REGNO (XEXP (inside, 0)); + /* ??? A r0+REG address shouldn't be possible here, because it isn't + an offsettable address. Unfortunately, offsettable addresses use + QImode to check the offset, and a QImode offsettable address + requires r0 for the other operand, which is not currently + supported, so we can't use the 'o' constraint. + Thus we must check for and handle r0+REG addresses here. + We punt for now, since this is likely very rare. */ + if (GET_CODE (XEXP (inside, 1)) == REG) + abort (); + } + else if (GET_CODE (inside) == LABEL_REF) + return "mov.l %1,%0\n\tmov.l %1+4,%T0"; + else if (GET_CODE (inside) == POST_INC) + return "mov.l %1,%0\n\tmov.l %1,%T0"; + else + abort (); + + /* Work out the safe way to copy. Copy into the second half first. */ + if (dreg == ptrreg) + return "mov.l %T1,%T0\n\tmov.l %1,%0"; + } + + return "mov.l %1,%0\n\tmov.l %T1,%T0"; +} + +/* Print an instruction which would have gone into a delay slot after + another instruction, but couldn't because the other instruction expanded + into a sequence where putting the slot insn at the end wouldn't work. */ + +static void +print_slot (insn) + rtx insn; +{ + final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1); + + INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; +} + +char * +output_far_jump (insn, op) + rtx insn; + rtx op; +{ + struct { rtx lab, reg, op; } this; + char *jump; + int far; + int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)]; + + this.lab = gen_label_rtx (); + + if (TARGET_SH2 + && offset >= -32764 + && offset - get_attr_length (insn) <= 32766) + { + far = 0; + jump = "mov.w %O0,%1;braf %1"; + } + else + { + far = 1; + jump = "mov.l %O0,%1;jmp @%1"; + } + /* If we have a scratch register available, use it. */ + if (GET_CODE (PREV_INSN (insn)) == INSN + && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch) + { + this.reg = SET_DEST (PATTERN (PREV_INSN (insn))); + output_asm_insn (jump, &this.lab); + if (dbr_sequence_length ()) + print_slot (final_sequence); + else + output_asm_insn ("nop", 0); + } + else + { + /* Output the delay slot insn first if any. */ + if (dbr_sequence_length ()) + print_slot (final_sequence); + + this.reg = gen_rtx (REG, SImode, 13); + output_asm_insn ("mov.l r13,@-r15", 0); + output_asm_insn (jump, &this.lab); + output_asm_insn ("mov.l @r15+,r13", 0); + } + if (far) + output_asm_insn (".align 2", 0); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab)); + this.op = op; + output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab); + return ""; +} + +/* Local label counter, used for constants in the pool and inside + pattern branches. */ + +static int lf = 100; + +/* Output code for ordinary branches. */ + +char * +output_branch (logic, insn, operands) + int logic; + rtx insn; + rtx *operands; +{ + switch (get_attr_length (insn)) + { + case 6: + /* This can happen if filling the delay slot has caused a forward + branch to exceed its range (we could reverse it, but only + when we know we won't overextend other branches; this should + best be handled by relaxation). + It can also happen when other condbranches hoist delay slot insn + from their destination, thus leading to code size increase. + But the branch will still be in the range -4092..+4098 bytes. */ + + if (! TARGET_RELAX) + { + int label = lf++; + /* The call to print_slot will clobber the operands. */ + rtx op0 = operands[0]; + + /* If the instruction in the delay slot is annulled (true), then + there is no delay slot where we can put it now. The only safe + place for it is after the label. final will do that by default. */ + + if (final_sequence + && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) + { + asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", + ASSEMBLER_DIALECT ? "/" : ".", label); + print_slot (final_sequence); + } + else + asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); + + output_asm_insn ("bra\t%l0", &op0); + fprintf (asm_out_file, "\tnop\n"); + ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label); + + return ""; + } + /* When relaxing, handle this like a short branch. The linker + will fix it up if it still doesn't fit after relaxation. */ + case 2: + return logic ? "bt%.\t%l0" : "bf%.\t%l0"; + default: + abort (); + } +} + +char * +output_branchy_insn (code, template, insn, operands) + char *template; + enum rtx_code code; + rtx insn; + rtx *operands; +{ + rtx next_insn = NEXT_INSN (insn); + int label_nr; + + if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn)) + { + rtx src = SET_SRC (PATTERN (next_insn)); + if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) + { + /* Following branch not taken */ + operands[9] = gen_label_rtx (); + emit_label_after (operands[9], next_insn); + return template; + } + else + { + int offset = (branch_dest (next_insn) + - insn_addresses[INSN_UID (next_insn)] + 4); + if (offset >= -252 && offset <= 258) + { + if (GET_CODE (src) == IF_THEN_ELSE) + /* branch_true */ + src = XEXP (src, 1); + operands[9] = src; + return template; + } + } + } + operands[9] = gen_label_rtx (); + emit_label_after (operands[9], insn); + return template; +} + +char * +output_ieee_ccmpeq (insn, operands) + rtx insn, operands; +{ + output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands); +} + +/* Output to FILE the start of the assembler file. */ + +void +output_file_start (file) + FILE *file; +{ + register int pos; + + output_file_directive (file, main_input_filename); + + /* Switch to the data section so that the coffsem symbol and the + gcc2_compiled. symbol aren't in the text section. */ + data_section (); + + if (TARGET_LITTLE_ENDIAN) + fprintf (file, "\t.little\n"); +} + +/* Actual number of instructions used to make a shift by N. */ +static char ashiftrt_insns[] = + { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; + +/* Left shift and logical right shift are the same. */ +static char shift_insns[] = + { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; + +/* Individual shift amounts needed to get the above length sequences. + One bit right shifts clobber the T bit, so when possible, put one bit + shifts in the middle of the sequence, so the ends are eligible for + branch delay slots. */ +static short shift_amounts[32][5] = { + {0}, {1}, {2}, {2, 1}, + {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, + {8}, {8, 1}, {8, 2}, {8, 1, 2}, + {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, + {16}, {16, 1}, {16, 2}, {16, 1, 2}, + {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, + {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, + {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; + +/* Likewise, but for shift amounts < 16, up to three highmost bits + might be clobbered. This is typically used when combined with some + kind of sign or zero extension. */ + +static char ext_shift_insns[] = + { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; + +static short ext_shift_amounts[32][4] = { + {0}, {1}, {2}, {2, 1}, + {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, + {8}, {8, 1}, {8, 2}, {8, 1, 2}, + {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, + {16}, {16, 1}, {16, 2}, {16, 1, 2}, + {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, + {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, + {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; + +/* Assuming we have a value that has been sign-extended by at least one bit, + can we use the ext_shift_amounts with the last shift turned to an arithmetic shift + to shift it by N without data loss, and quicker than by other means? */ +#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) + +/* This is used in length attributes in sh.md to help compute the length + of arbitrary constant shift instructions. */ + +int +shift_insns_rtx (insn) + rtx insn; +{ + rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int shift_count = INTVAL (XEXP (set_src, 1)); + enum rtx_code shift_code = GET_CODE (set_src); + + switch (shift_code) + { + case ASHIFTRT: + return ashiftrt_insns[shift_count]; + case LSHIFTRT: + case ASHIFT: + return shift_insns[shift_count]; + default: + abort(); + } +} + +/* Return the cost of a shift. */ + +int +shiftcosts (x) + rtx x; +{ + int value = INTVAL (XEXP (x, 1)); + + /* If shift by a non constant, then this will be expensive. */ + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return SH_DYNAMIC_SHIFT_COST; + + /* Otherwise, return the true cost in instructions. */ + if (GET_CODE (x) == ASHIFTRT) + { + int cost = ashiftrt_insns[value]; + /* If SH3, then we put the constant in a reg and use shad. */ + if (cost > 1 + SH_DYNAMIC_SHIFT_COST) + cost = 1 + SH_DYNAMIC_SHIFT_COST; + return cost; + } + else + return shift_insns[value]; +} + +/* Return the cost of an AND operation. */ + +int +andcosts (x) + rtx x; +{ + int i; + + /* Anding with a register is a single cycle and instruction. */ + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return 1; + + i = INTVAL (XEXP (x, 1)); + /* These constants are single cycle extu.[bw] instructions. */ + if (i == 0xff || i == 0xffff) + return 1; + /* Constants that can be used in an and immediate instruction is a single + cycle, but this requires r0, so make it a little more expensive. */ + if (CONST_OK_FOR_L (i)) + return 2; + /* Constants that can be loaded with a mov immediate and an and. + This case is probably unnecessary. */ + if (CONST_OK_FOR_I (i)) + return 2; + /* Any other constants requires a 2 cycle pc-relative load plus an and. + This case is probably unnecessary. */ + return 3; +} + +/* Return the cost of a multiply. */ +int +multcosts (x) + rtx x; +{ + if (TARGET_SH2) + { + /* We have a mul insn, so we can never take more than the mul and the + read of the mac reg, but count more because of the latency and extra + reg usage. */ + if (TARGET_SMALLCODE) + return 2; + return 3; + } + + /* If we're aiming at small code, then just count the number of + insns in a multiply call sequence. */ + if (TARGET_SMALLCODE) + return 5; + + /* Otherwise count all the insns in the routine we'd be calling too. */ + return 20; +} + +/* Code to expand a shift. */ + +void +gen_ashift (type, n, reg) + int type; + int n; + rtx reg; +{ + /* Negative values here come from the shift_amounts array. */ + if (n < 0) + { + if (type == ASHIFT) + type = LSHIFTRT; + else + type = ASHIFT; + n = -n; + } + + switch (type) + { + case ASHIFTRT: + emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); + break; + case LSHIFTRT: + if (n == 1) + emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); + else + emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); + break; + case ASHIFT: + emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n))); + break; + } +} + +/* Same for HImode */ + +void +gen_ashift_hi (type, n, reg) + int type; + int n; + rtx reg; +{ + /* Negative values here come from the shift_amounts array. */ + if (n < 0) + { + if (type == ASHIFT) + type = LSHIFTRT; + else + type = ASHIFT; + n = -n; + } + + switch (type) + { + case ASHIFTRT: + case LSHIFTRT: + /* We don't have HImode right shift operations because using the + ordinary 32 bit shift instructions for that doesn't generate proper + zero/sign extension. + gen_ashift_hi is only called in contexts where we know that the + sign extension works out correctly. */ + { + int word = 0; + if (GET_CODE (reg) == SUBREG) + { + word = SUBREG_WORD (reg); + reg = SUBREG_REG (reg); + } + gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word)); + break; + } + case ASHIFT: + emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); + break; + } +} + +/* Output RTL to split a constant shift into its component SH constant + shift instructions. */ + +int +gen_shifty_op (code, operands) + int code; + rtx *operands; +{ + int value = INTVAL (operands[2]); + int max, i; + + /* Truncate the shift count in case it is out of bounds. */ + value = value & 0x1f; + + if (value == 31) + { + if (code == LSHIFTRT) + { + emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); + emit_insn (gen_movt (operands[0])); + return; + } + else if (code == ASHIFT) + { + /* There is a two instruction sequence for 31 bit left shifts, + but it requires r0. */ + if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0) + { + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + return; + } + } + } + else if (value == 0) + { + /* This can happen when not optimizing. We must output something here + to prevent the compiler from aborting in final.c after the try_split + call. */ + emit_insn (gen_nop ()); + return; + } + + max = shift_insns[value]; + for (i = 0; i < max; i++) + gen_ashift (code, shift_amounts[value][i], operands[0]); +} + +/* Same as above, but optimized for values where the topmost bits don't + matter. */ + +int +gen_shifty_hi_op (code, operands) + int code; + rtx *operands; +{ + int value = INTVAL (operands[2]); + int max, i; + void (*gen_fun)(); + + /* This operation is used by and_shl for SImode values with a few + high bits known to be cleared. */ + value &= 31; + if (value == 0) + { + emit_insn (gen_nop ()); + return; + } + + gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; + if (code == ASHIFT) + { + max = ext_shift_insns[value]; + for (i = 0; i < max; i++) + gen_fun (code, ext_shift_amounts[value][i], operands[0]); + } + else + /* When shifting right, emit the shifts in reverse order, so that + solitary negative values come first. */ + for (i = ext_shift_insns[value] - 1; i >= 0; i--) + gen_fun (code, ext_shift_amounts[value][i], operands[0]); +} + +/* Output RTL for an arithmetic right shift. */ + +/* ??? Rewrite to use super-optimizer sequences. */ + +int +expand_ashiftrt (operands) + rtx *operands; +{ + rtx wrk; + char func[18]; + tree func_name; + int value; + + if (TARGET_SH3) + { + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx count = copy_to_mode_reg (SImode, operands[2]); + emit_insn (gen_negsi2 (count, count)); + emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); + return 1; + } + else if (ashiftrt_insns[INTVAL (operands[2]) & 31] + > 1 + SH_DYNAMIC_SHIFT_COST) + { + rtx count + = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); + emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); + return 1; + } + } + if (GET_CODE (operands[2]) != CONST_INT) + return 0; + + value = INTVAL (operands[2]) & 31; + + if (value == 31) + { + emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); + return 1; + } + else if (value >= 16 && value <= 19) + { + wrk = gen_reg_rtx (SImode); + emit_insn (gen_ashrsi2_16 (wrk, operands[1])); + value -= 16; + while (value--) + gen_ashift (ASHIFTRT, 1, wrk); + emit_move_insn (operands[0], wrk); + return 1; + } + /* Expand a short sequence inline, longer call a magic routine. */ + else if (value <= 5) + { + wrk = gen_reg_rtx (SImode); + emit_move_insn (wrk, operands[1]); + while (value--) + gen_ashift (ASHIFTRT, 1, wrk); + emit_move_insn (operands[0], wrk); + return 1; + } + + wrk = gen_reg_rtx (Pmode); + + /* Load the value into an arg reg and call a helper. */ + emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]); + sprintf (func, "__ashiftrt_r4_%d", value); + func_name = get_identifier (func); + emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode, + IDENTIFIER_POINTER (func_name))); + emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); + emit_move_insn (operands[0], gen_rtx (REG, SImode, 4)); + return 1; +} + +int sh_dynamicalize_shift_p (count) + rtx count; +{ + return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST; +} + +/* Try to find a good way to implement the combiner pattern + [(set (match_operand:SI 0 "register_operand" "r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) . + LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. + return 0 for simple right / left or left/right shift combination. + return 1 for a combination of shifts with zero_extend. + return 2 for a combination of shifts with an AND that needs r0. + return 3 for a combination of shifts with an AND that needs an extra + scratch register, when the three highmost bits of the AND mask are clear. + return 4 for a combination of shifts with an AND that needs an extra + scratch register, when any of the three highmost bits of the AND mask + is set. + If ATTRP is set, store an initial right shift width in ATTRP[0], + and the instruction length in ATTRP[1] . These values are not valid + when returning 0. + When ATTRP is set and returning 1, ATTRP[2] gets set to the index into + shift_amounts for the last shift value that is to be used before the + sign extend. */ +int +shl_and_kind (left_rtx, mask_rtx, attrp) + rtx left_rtx, mask_rtx; + int *attrp; +{ + unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; + int left = INTVAL (left_rtx), right; + int best = 0; + int cost, best_cost = 10000; + int best_right = 0, best_len = 0; + int i; + int can_ext; + + if (left < 0 || left > 31) + return 0; + if (GET_CODE (mask_rtx) == CONST_INT) + mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; + else + mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; + /* Can this be expressed as a right shift / left shift pair ? */ + lsb = ((mask ^ (mask - 1)) >> 1) + 1; + right = exact_log2 (lsb); + mask2 = ~(mask + lsb - 1); + lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; + /* mask has no zeroes but trailing zeroes <==> ! mask2 */ + if (! mask2) + best_cost = shift_insns[right] + shift_insns[right + left]; + /* mask has no trailing zeroes <==> ! right */ + else if (! right && mask2 == ~(lsb2 - 1)) + { + int late_right = exact_log2 (lsb2); + best_cost = shift_insns[left + late_right] + shift_insns[late_right]; + } + /* Try to use zero extend */ + if (mask2 == ~(lsb2 - 1)) + { + int width, first; + + for (width = 8; width <= 16; width += 8) + { + /* Can we zero-extend right away? */ + if (lsb2 == (HOST_WIDE_INT)1 << width) + { + cost + = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; + if (cost < best_cost) + { + best = 1; + best_cost = cost; + best_right = right; + best_len = cost; + if (attrp) + attrp[2] = -1; + } + continue; + } + /* ??? Could try to put zero extend into initial right shift, + or even shift a bit left before the right shift. */ + /* Determine value of first part of left shift, to get to the + zero extend cut-off point. */ + first = width - exact_log2 (lsb2) + right; + if (first >= 0 && right + left - first >= 0) + { + cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 + + ext_shift_insns[right + left - first]; + if (cost < best_cost) + { + best = 1; + best_cost = cost; + best_right = right; + best_len = cost; + if (attrp) + attrp[2] = first; + } + } + } + } + /* Try to use r0 AND pattern */ + for (i = 0; i <= 2; i++) + { + if (i > right) + break; + if (! CONST_OK_FOR_L (mask >> i)) + continue; + cost = (i != 0) + 2 + ext_shift_insns[left + i]; + if (cost < best_cost) + { + best = 2; + best_cost = cost; + best_right = i; + best_len = cost - 1; + } + } + /* Try to use a scratch register to hold the AND operand. */ + can_ext = ((mask << left) & 0xe0000000) == 0; + for (i = 0; i <= 2; i++) + { + if (i > right) + break; + cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3) + + (can_ext ? ext_shift_insns : shift_insns)[left + i]; + if (cost < best_cost) + { + best = 4 - can_ext; + best_cost = cost; + best_right = i; + best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i); + } + } + + if (attrp) + { + attrp[0] = best_right; + attrp[1] = best_len; + } + return best; +} + +/* This is used in length attributes of the unnamed instructions + corresponding to shl_and_kind return values of 1 and 2. */ +int +shl_and_length (insn) + rtx insn; +{ + rtx set_src, left_rtx, mask_rtx; + int attributes[3]; + + set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + left_rtx = XEXP (XEXP (set_src, 0), 1); + mask_rtx = XEXP (set_src, 1); + shl_and_kind (left_rtx, mask_rtx, attributes); + return attributes[1]; +} + +/* This is used in length attribute of the and_shl_scratch instruction. */ + +int +shl_and_scr_length (insn) + rtx insn; +{ + rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int len = shift_insns[INTVAL (XEXP (set_src, 1))]; + rtx op = XEXP (set_src, 0); + len += shift_insns[INTVAL (XEXP (op, 1))] + 1; + op = XEXP (XEXP (op, 0), 0); + return len + shift_insns[INTVAL (XEXP (op, 1))]; +} + +/* Generating rtl? */ +extern int rtx_equal_function_value_matters; + +/* Generate rtl for instructions for which shl_and_kind advised a particular + method of generating them, i.e. returned zero. */ + +int +gen_shl_and (dest, left_rtx, mask_rtx, source) + rtx dest, left_rtx, mask_rtx, source; +{ + int attributes[3]; + unsigned HOST_WIDE_INT mask; + int kind = shl_and_kind (left_rtx, mask_rtx, attributes); + int right, total_shift; + int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op; + + right = attributes[0]; + total_shift = INTVAL (left_rtx) + right; + mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; + switch (kind) + { + default: + return -1; + case 1: + { + int first = attributes[2]; + rtx operands[3]; + + if (first < 0) + { + emit_insn ((mask << right) <= 0xff + ? gen_zero_extendqisi2(dest, + gen_lowpart (QImode, source)) + : gen_zero_extendhisi2(dest, + gen_lowpart (HImode, source))); + source = dest; + } + if (source != dest) + emit_insn (gen_movsi (dest, source)); + operands[0] = dest; + if (right) + { + operands[2] = GEN_INT (right); + gen_shifty_hi_op (LSHIFTRT, operands); + } + if (first > 0) + { + operands[2] = GEN_INT (first); + gen_shifty_hi_op (ASHIFT, operands); + total_shift -= first; + mask <<= first; + } + if (first >= 0) + emit_insn (mask <= 0xff + ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest)) + : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest))); + if (total_shift > 0) + { + operands[2] = GEN_INT (total_shift); + gen_shifty_hi_op (ASHIFT, operands); + } + break; + } + case 4: + shift_gen_fun = gen_shifty_op; + case 3: + /* If the topmost bit that matters is set, set the topmost bits + that don't matter. This way, we might be able to get a shorter + signed constant. */ + if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift)) + mask |= (HOST_WIDE_INT)~0 << (31 - total_shift); + case 2: + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (rtx_equal_function_value_matters + || reload_in_progress || reload_completed) + { + rtx operands[3]; + + /* Cases 3 and 4 should be handled by this split + only while combining */ + if (kind > 2) + abort (); + if (right) + { + emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); + source = dest; + } + emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); + if (total_shift) + { + operands[0] = dest; + operands[1] = dest; + operands[2] = GEN_INT (total_shift); + shift_gen_fun (ASHIFT, operands); + } + break; + } + else + { + int neg = 0; + if (kind != 4 && total_shift < 16) + { + neg = -ext_shift_amounts[total_shift][1]; + if (neg > 0) + neg -= ext_shift_amounts[total_shift][2]; + else + neg = 0; + } + emit_insn (gen_and_shl_scratch (dest, source, + GEN_INT (right), + GEN_INT (mask), + GEN_INT (total_shift + neg), + GEN_INT (neg))); + emit_insn (gen_movsi (dest, dest)); + break; + } + } + return 0; +} + +/* Try to find a good way to implement the combiner pattern + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI 18))] + LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. + return 0 for simple left / right shift combination. + return 1 for left shift / 8 bit sign extend / left shift. + return 2 for left shift / 16 bit sign extend / left shift. + return 3 for left shift / 8 bit sign extend / shift / sign extend. + return 4 for left shift / 16 bit sign extend / shift / sign extend. + return 5 for left shift / 16 bit sign extend / right shift + return 6 for < 8 bit sign extend / left shift. + return 7 for < 8 bit sign extend / left shift / single right shift. + If COSTP is nonzero, assign the calculated cost to *COSTP. */ + +int +shl_sext_kind (left_rtx, size_rtx, costp) + rtx left_rtx, size_rtx; + int *costp; +{ + int left, size, insize, ext; + int cost, best_cost; + int kind; + + left = INTVAL (left_rtx); + size = INTVAL (size_rtx); + insize = size - left; + if (insize <= 0) + abort (); + /* Default to left / right shift. */ + kind = 0; + best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; + if (size <= 16) + { + /* 16 bit shift / sign extend / 16 bit shift */ + cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; + /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden + below, by alternative 3 or something even better. */ + if (cost < best_cost) + { + kind = 5; + best_cost = cost; + } + } + /* Try a plain sign extend between two shifts. */ + for (ext = 16; ext >= insize; ext -= 8) + { + if (ext <= size) + { + cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; + if (cost < best_cost) + { + kind = ext / 8U; + best_cost = cost; + } + } + /* Check if we can do a sloppy shift with a final signed shift + restoring the sign. */ + if (EXT_SHIFT_SIGNED (size - ext)) + cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; + /* If not, maybe it's still cheaper to do the second shift sloppy, + and do a final sign extend? */ + else if (size <= 16) + cost = ext_shift_insns[ext - insize] + 1 + + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; + else + continue; + if (cost < best_cost) + { + kind = ext / 8U + 2; + best_cost = cost; + } + } + /* Check if we can sign extend in r0 */ + if (insize < 8) + { + cost = 3 + shift_insns[left]; + if (cost < best_cost) + { + kind = 6; + best_cost = cost; + } + /* Try the same with a final signed shift. */ + if (left < 31) + { + cost = 3 + ext_shift_insns[left + 1] + 1; + if (cost < best_cost) + { + kind = 7; + best_cost = cost; + } + } + } + if (TARGET_SH3) + { + /* Try to use a dynamic shift. */ + cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; + if (cost < best_cost) + { + kind = 0; + best_cost = cost; + } + } + if (costp) + *costp = cost; + return kind; +} + +/* Function to be used in the length attribute of the instructions + implementing this pattern. */ + +int +shl_sext_length (insn) + rtx insn; +{ + rtx set_src, left_rtx, size_rtx; + int cost; + + set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + left_rtx = XEXP (XEXP (set_src, 0), 1); + size_rtx = XEXP (set_src, 1); + shl_sext_kind (left_rtx, size_rtx, &cost); + return cost; +} + +/* Generate rtl for this pattern */ + +int +gen_shl_sext (dest, left_rtx, size_rtx, source) + rtx dest, left_rtx, size_rtx, source; +{ + int kind; + int left, size, insize, cost; + rtx operands[3]; + + kind = shl_sext_kind (left_rtx, size_rtx, &cost); + left = INTVAL (left_rtx); + size = INTVAL (size_rtx); + insize = size - left; + switch (kind) + { + case 1: + case 2: + case 3: + case 4: + { + int ext = kind & 1 ? 8 : 16; + int shift2 = size - ext; + + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (! rtx_equal_function_value_matters + && ! reload_in_progress && ! reload_completed) + { + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + emit_insn (gen_movsi (dest, source)); + break; + } + if (dest != source) + emit_insn (gen_movsi (dest, source)); + operands[0] = dest; + if (ext - insize) + { + operands[2] = GEN_INT (ext - insize); + gen_shifty_hi_op (ASHIFT, operands); + } + emit_insn (kind & 1 + ? gen_extendqisi2(dest, gen_lowpart (QImode, dest)) + : gen_extendhisi2(dest, gen_lowpart (HImode, dest))); + if (kind <= 2) + { + if (shift2) + { + operands[2] = GEN_INT (shift2); + gen_shifty_op (ASHIFT, operands); + } + } + else + { + if (shift2 > 0) + { + if (EXT_SHIFT_SIGNED (shift2)) + { + operands[2] = GEN_INT (shift2 + 1); + gen_shifty_op (ASHIFT, operands); + operands[2] = GEN_INT (1); + gen_shifty_op (ASHIFTRT, operands); + break; + } + operands[2] = GEN_INT (shift2); + gen_shifty_hi_op (ASHIFT, operands); + } + else if (shift2) + { + operands[2] = GEN_INT (-shift2); + gen_shifty_hi_op (LSHIFTRT, operands); + } + emit_insn (size <= 8 + ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) + : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + } + break; + } + case 5: + { + int i = 16 - size; + if (! rtx_equal_function_value_matters + && ! reload_in_progress && ! reload_completed) + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + else + { + operands[0] = dest; + operands[2] = GEN_INT (16 - insize); + gen_shifty_hi_op (ASHIFT, operands); + emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + } + /* Don't use gen_ashrsi3 because it generates new pseudos. */ + while (--i >= 0) + gen_ashift (ASHIFTRT, 1, dest); + break; + } + case 6: + case 7: + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (! rtx_equal_function_value_matters + && ! reload_in_progress && ! reload_completed) + { + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + emit_insn (gen_movsi (dest, source)); + break; + } + emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); + emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); + emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); + operands[0] = dest; + operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; + gen_shifty_op (ASHIFT, operands); + if (kind == 7) + emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1))); + break; + default: + return -1; + } + return 0; +} + +/* The SH cannot load a large constant into a register, constants have to + come from a pc relative load. The reference of a pc relative load + instruction must be less than 1k infront of the instruction. This + means that we often have to dump a constant inside a function, and + generate code to branch around it. + + It is important to minimize this, since the branches will slow things + down and make things bigger. + + Worst case code looks like: + + mov.l L1,rn + bra L2 + nop + align + L1: .long value + L2: + .. + + mov.l L3,rn + bra L4 + nop + align + L3: .long value + L4: + .. + + We fix this by performing a scan before scheduling, which notices which + instructions need to have their operands fetched from the constant table + and builds the table. + + The algorithm is: + + scan, find an instruction which needs a pcrel move. Look forward, find the + last barrier which is within MAX_COUNT bytes of the requirement. + If there isn't one, make one. Process all the instructions between + the find and the barrier. + + In the above example, we can tell that L3 is within 1k of L1, so + the first move can be shrunk from the 3 insn+constant sequence into + just 1 insn, and the constant moved to L3 to make: + + mov.l L1,rn + .. + mov.l L3,rn + bra L4 + nop + align + L3:.long value + L4:.long value + + Then the second move becomes the target for the shortening process. */ + +typedef struct +{ + rtx value; /* Value in table. */ + rtx label; /* Label of value. */ + enum machine_mode mode; /* Mode of value. */ +} pool_node; + +/* The maximum number of constants that can fit into one pool, since + the pc relative range is 0...1020 bytes and constants are at least 4 + bytes long. */ + +#define MAX_POOL_SIZE (1020/4) +static pool_node pool_vector[MAX_POOL_SIZE]; +static int pool_size; + +/* ??? If we need a constant in HImode which is the truncated value of a + constant we need in SImode, we could combine the two entries thus saving + two bytes. Is this common enough to be worth the effort of implementing + it? */ + +/* ??? This stuff should be done at the same time that we shorten branches. + As it is now, we must assume that all branches are the maximum size, and + this causes us to almost always output constant pools sooner than + necessary. */ + +/* Add a constant to the pool and return its label. */ + +static rtx +add_constant (x, mode, last_value) + rtx last_value; + rtx x; + enum machine_mode mode; +{ + int i; + rtx lab; + + /* First see if we've already got it. */ + for (i = 0; i < pool_size; i++) + { + if (x->code == pool_vector[i].value->code + && mode == pool_vector[i].mode) + { + if (x->code == CODE_LABEL) + { + if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) + continue; + } + if (rtx_equal_p (x, pool_vector[i].value)) + { + lab = 0; + if (! last_value + || ! i + || ! rtx_equal_p (last_value, pool_vector[i-1].value)) + { + lab = pool_vector[i].label; + if (! lab) + pool_vector[i].label = lab = gen_label_rtx (); + } + return lab; + } + } + } + + /* Need a new one. */ + pool_vector[pool_size].value = x; + if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) + lab = 0; + else + lab = gen_label_rtx (); + pool_vector[pool_size].mode = mode; + pool_vector[pool_size].label = lab; + pool_size++; + return lab; +} + +/* Output the literal table. */ + +static void +dump_table (scan) + rtx scan; +{ + int i; + int need_align = 1; + + /* Do two passes, first time dump out the HI sized constants. */ + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + if (p->mode == HImode) + { + if (need_align) + { + scan = emit_insn_after (gen_align_2 (), scan); + need_align = 0; + } + scan = emit_label_after (p->label, scan); + scan = emit_insn_after (gen_consttable_2 (p->value), scan); + } + } + + need_align = 1; + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + switch (p->mode) + { + case HImode: + break; + case SImode: + case SFmode: + if (need_align) + { + need_align = 0; + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_4 (), scan); + } + if (p->label) + scan = emit_label_after (p->label, scan); + scan = emit_insn_after (gen_consttable_4 (p->value), scan); + break; + case DFmode: + case DImode: + if (need_align) + { + need_align = 0; + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_4 (), scan); + } + if (p->label) + scan = emit_label_after (p->label, scan); + scan = emit_insn_after (gen_consttable_8 (p->value), scan); + break; + default: + abort (); + break; + } + } + + scan = emit_insn_after (gen_consttable_end (), scan); + scan = emit_barrier_after (scan); + pool_size = 0; +} + +/* Return non-zero if constant would be an ok source for a + mov.w instead of a mov.l. */ + +static int +hi_const (src) + rtx src; +{ + return (GET_CODE (src) == CONST_INT + && INTVAL (src) >= -32768 + && INTVAL (src) <= 32767); +} + +/* Non-zero if the insn is a move instruction which needs to be fixed. */ + +/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the + CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't + need to fix it if the input value is CONST_OK_FOR_I. */ + +static int +broken_move (insn) + rtx insn; +{ + if (GET_CODE (insn) == INSN) + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET + /* We can load any 8 bit value if we don't care what the high + order bits end up as. */ + && GET_MODE (SET_DEST (pat)) != QImode + && CONSTANT_P (SET_SRC (pat)) + && ! (TARGET_SH3E + && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE + && (fp_zero_operand (SET_SRC (pat)) + || fp_one_operand (SET_SRC (pat))) + && GET_CODE (SET_DEST (pat)) == REG + && REGNO (SET_DEST (pat)) >= FIRST_FP_REG + && REGNO (SET_DEST (pat)) <= LAST_FP_REG) + && (GET_CODE (SET_SRC (pat)) != CONST_INT + || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat))))) + return 1; + } + + return 0; +} + +static int +mova_p (insn) + rtx insn; +{ + return (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC + && XINT (SET_SRC (PATTERN (insn)), 1) == 1); +} + +/* Find the last barrier from insn FROM which is close enough to hold the + constant pool. If we can't find one, then create one near the end of + the range. */ + +static rtx +find_barrier (num_mova, mova, from) + int num_mova; + rtx mova, from; +{ + int count_si = 0; + int count_hi = 0; + int found_hi = 0; + int found_si = 0; + int hi_align = 2; + int si_align = 2; + int leading_mova = num_mova; + rtx barrier_before_mova, found_barrier = 0, good_barrier = 0; + int si_limit; + int hi_limit; + + /* For HImode: range is 510, add 4 because pc counts from address of + second instruction after this one, subtract 2 for the jump instruction + that we may need to emit before the table, subtract 2 for the instruction + that fills the jump delay slot (in very rare cases, reorg will take an + instruction from after the constant pool or will leave the delay slot + empty). This gives 510. + For SImode: range is 1020, add 4 because pc counts from address of + second instruction after this one, subtract 2 in case pc is 2 byte + aligned, subtract 2 for the jump instruction that we may need to emit + before the table, subtract 2 for the instruction that fills the jump + delay slot. This gives 1018. */ + + /* The branch will always be shortened now that the reference address for + forward branches is the successor address, thus we need no longer make + adjustments to the [sh]i_limit for -O0. */ + + si_limit = 1018; + hi_limit = 510; + + while (from && count_si < si_limit && count_hi < hi_limit) + { + int inc = get_attr_length (from); + int new_align = 1; + + if (GET_CODE (from) == CODE_LABEL) + { + if (optimize) + new_align = 1 << label_to_alignment (from); + else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER) + new_align = 1 << barrier_align (from); + else + new_align = 1; + inc = 0; + } + + if (GET_CODE (from) == BARRIER) + { + + found_barrier = from; + + /* If we are at the end of the function, or in front of an alignment + instruction, we need not insert an extra alignment. We prefer + this kind of barrier. */ + if (barrier_align (from) > 2) + good_barrier = from; + } + + if (broken_move (from)) + { + rtx pat, src, dst; + enum machine_mode mode; + + pat = PATTERN (from); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + src = SET_SRC (pat); + dst = SET_DEST (pat); + mode = GET_MODE (dst); + + /* We must explicitly check the mode, because sometimes the + front end will generate code to load unsigned constants into + HImode targets without properly sign extending them. */ + if (mode == HImode + || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG)) + { + found_hi += 2; + /* We put the short constants before the long constants, so + we must count the length of short constants in the range + for the long constants. */ + /* ??? This isn't optimal, but is easy to do. */ + si_limit -= 2; + } + else + { + while (si_align > 2 && found_si + si_align - 2 > count_si) + si_align >>= 1; + if (found_si > count_si) + count_si = found_si; + found_si += GET_MODE_SIZE (mode); + if (num_mova) + si_limit -= GET_MODE_SIZE (mode); + } + } + + if (mova_p (from)) + { + if (! num_mova++) + { + leading_mova = 0; + mova = from; + barrier_before_mova = good_barrier ? good_barrier : found_barrier; + } + if (found_si > count_si) + count_si = found_si; + } + else if (GET_CODE (from) == JUMP_INSN + && (GET_CODE (PATTERN (from)) == ADDR_VEC + || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)) + { + if (num_mova) + num_mova--; + if (barrier_align (next_real_insn (from)) == CACHE_LOG) + { + /* We have just passed the barrier in front of the + ADDR_DIFF_VEC, which is stored in found_barrier. Since + the ADDR_DIFF_VEC is accessed as data, just like our pool + constants, this is a good opportunity to accommodate what + we have gathered so far. + If we waited any longer, we could end up at a barrier in + front of code, which gives worse cache usage for separated + instruction / data caches. */ + good_barrier = found_barrier; + break; + } + else + { + rtx body = PATTERN (from); + inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); + } + } + + if (found_si) + { + if (new_align > si_align) + { + si_limit -= count_si - 1 & new_align - si_align; + si_align = new_align; + } + count_si = count_si + new_align - 1 & -new_align; + count_si += inc; + } + if (found_hi) + { + if (new_align > hi_align) + { + hi_limit -= count_hi - 1 & new_align - hi_align; + hi_align = new_align; + } + count_hi = count_hi + new_align - 1 & -new_align; + count_hi += inc; + } + from = NEXT_INSN (from); + } + + if (num_mova) + if (leading_mova) + { + /* Try as we might, the leading mova is out of range. Change + it into a load (which will become a pcload) and retry. */ + SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); + INSN_CODE (mova) = -1; + return find_barrier (0, 0, mova); + } + else + { + /* Insert the constant pool table before the mova instruction, + to prevent the mova label reference from going out of range. */ + from = mova; + good_barrier = found_barrier = barrier_before_mova; + } + + if (found_barrier) + { + if (good_barrier && next_real_insn (found_barrier)) + found_barrier = good_barrier; + } + else + { + /* We didn't find a barrier in time to dump our stuff, + so we'll make one. */ + rtx label = gen_label_rtx (); + + /* If we exceeded the range, then we must back up over the last + instruction we looked at. Otherwise, we just need to undo the + NEXT_INSN at the end of the loop. */ + if (count_hi > hi_limit || count_si > si_limit) + from = PREV_INSN (PREV_INSN (from)); + else + from = PREV_INSN (from); + + /* Walk back to be just before any jump or label. + Putting it before a label reduces the number of times the branch + around the constant pool table will be hit. Putting it before + a jump makes it more likely that the bra delay slot will be + filled. */ + while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE + || GET_CODE (from) == CODE_LABEL) + from = PREV_INSN (from); + + from = emit_jump_insn_after (gen_jump (label), from); + JUMP_LABEL (from) = label; + LABEL_NUSES (label) = 1; + found_barrier = emit_barrier_after (from); + emit_label_after (label, found_barrier); + } + + return found_barrier; +} + +/* If the instruction INSN is implemented by a special function, and we can + positively find the register that is used to call the sfunc, and this + register is not used anywhere else in this instruction - except as the + destination of a set, return this register; else, return 0. */ +rtx +sfunc_uses_reg (insn) + rtx insn; +{ + int i; + rtx pattern, part, reg_part, reg; + + if (GET_CODE (insn) != INSN) + return 0; + pattern = PATTERN (insn); + if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) + return 0; + + for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) + { + part = XVECEXP (pattern, 0, i); + if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) + reg_part = part; + } + if (! reg_part) + return 0; + reg = XEXP (reg_part, 0); + for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) + { + part = XVECEXP (pattern, 0, i); + if (part == reg_part || GET_CODE (part) == CLOBBER) + continue; + if (reg_mentioned_p (reg, ((GET_CODE (part) == SET + && GET_CODE (SET_DEST (part)) == REG) + ? SET_SRC (part) : part))) + return 0; + } + return reg; +} + +/* See if the only way in which INSN uses REG is by calling it, or by + setting it while calling it. Set *SET to a SET rtx if the register + is set by INSN. */ + +static int +noncall_uses_reg (reg, insn, set) + rtx reg; + rtx insn; + rtx *set; +{ + rtx pattern, reg2; + + *set = NULL_RTX; + + reg2 = sfunc_uses_reg (insn); + if (reg2 && REGNO (reg2) == REGNO (reg)) + { + pattern = single_set (insn); + if (pattern + && GET_CODE (SET_DEST (pattern)) == REG + && REGNO (reg) == REGNO (SET_DEST (pattern))) + *set = pattern; + return 0; + } + if (GET_CODE (insn) != CALL_INSN) + { + /* We don't use rtx_equal_p because we don't care if the mode is + different. */ + pattern = single_set (insn); + if (pattern + && GET_CODE (SET_DEST (pattern)) == REG + && REGNO (reg) == REGNO (SET_DEST (pattern))) + { + rtx par, part; + int i; + + *set = pattern; + par = PATTERN (insn); + if (GET_CODE (par) == PARALLEL) + for (i = XVECLEN (par, 0) - 1; i >= 0; i--) + { + part = XVECEXP (par, 0, i); + if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) + return 1; + } + return reg_mentioned_p (reg, SET_SRC (pattern)); + } + + return 1; + } + + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + { + int i; + + for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) + if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) + return 1; + pattern = XVECEXP (pattern, 0, 0); + } + + if (GET_CODE (pattern) == SET) + { + if (reg_mentioned_p (reg, SET_DEST (pattern))) + { + /* We don't use rtx_equal_p, because we don't care if the + mode is different. */ + if (GET_CODE (SET_DEST (pattern)) != REG + || REGNO (reg) != REGNO (SET_DEST (pattern))) + return 1; + + *set = pattern; + } + + pattern = SET_SRC (pattern); + } + + if (GET_CODE (pattern) != CALL + || GET_CODE (XEXP (pattern, 0)) != MEM + || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) + return 1; + + return 0; +} + +/* Given a X, a pattern of an insn or a part of it, return a mask of used + general registers. Bits 0..15 mean that the respective registers + are used as inputs in the instruction. Bits 16..31 mean that the + registers 0..15, respectively, are used as outputs, or are clobbered. + IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ +int +regs_used (x, is_dest) + rtx x; int is_dest; +{ + enum rtx_code code; + char *fmt; + int i, used = 0; + + if (! x) + return used; + code = GET_CODE (x); + switch (code) + { + case REG: + if (REGNO (x) < 16) + return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) + << (REGNO (x) + is_dest)); + return 0; + case SUBREG: + { + rtx y = SUBREG_REG (x); + + if (GET_CODE (y) != REG) + break; + if (REGNO (y) < 16) + return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) + << (REGNO (y) + SUBREG_WORD (x) + is_dest)); + return 0; + } + case SET: + return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); + case RETURN: + /* If there was a return value, it must have been indicated with USE. */ + return 0x00ffff00; + case CLOBBER: + is_dest = 1; + break; + case MEM: + is_dest = 0; + break; + case CALL: + used |= 0x00ff00f0; + break; + } + + fmt = GET_RTX_FORMAT (code); + + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + used |= regs_used (XVECEXP (x, i, j), is_dest); + } + else if (fmt[i] == 'e') + used |= regs_used (XEXP (x, i), is_dest); + } + return used; +} + +/* Create an instruction that prevents redirection of a conditional branch + to the destination of the JUMP with address ADDR. + If the branch needs to be implemented as an indirect jump, try to find + a scratch register for it. + If NEED_BLOCK is 0, don't do anything unless we need a scratch register. + If any preceding insn that doesn't fit into a delay slot is good enough, + pass 1. Pass 2 if a definite blocking insn is needed. + -1 is used internally to avoid deep recursion. + If a blocking instruction is made or recognized, return it. */ + +static rtx +gen_block_redirect (jump, addr, need_block) + rtx jump; + int addr, need_block; +{ + int dead = 0; + rtx prev = prev_nonnote_insn (jump); + rtx dest; + + /* First, check if we already have an instruction that satisfies our need. */ + if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev)) + { + if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) + return prev; + if (GET_CODE (PATTERN (prev)) == USE + || GET_CODE (PATTERN (prev)) == CLOBBER + || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) + prev = jump; + else if ((need_block &= ~1) < 0) + return prev; + else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) + need_block = 0; + } + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + dest = XEXP (SET_SRC (PATTERN (jump)), 0); + /* If the branch is out of range, try to find a scratch register for it. */ + if (optimize + && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)) + { + rtx scan; + /* Don't look for the stack pointer as a scratch register, + it would cause trouble if an interrupt occurred. */ + unsigned try = 0x7fff, used; + int jump_left = flag_expensive_optimizations + 1; + + /* It is likely that the most recent eligible instruction is wanted for + the delay slot. Therefore, find out which registers it uses, and + try to avoid using them. */ + + for (scan = jump; scan = PREV_INSN (scan); ) + { + enum rtx_code code; + + if (INSN_DELETED_P (scan)) + continue; + code = GET_CODE (scan); + if (code == CODE_LABEL || code == JUMP_INSN) + break; + if (code == INSN + && GET_CODE (PATTERN (scan)) != USE + && GET_CODE (PATTERN (scan)) != CLOBBER + && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) + { + try &= ~regs_used (PATTERN (scan), 0); + break; + } + } + for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); ) + { + enum rtx_code code; + + if (INSN_DELETED_P (scan)) + continue; + code = GET_CODE (scan); + if (GET_RTX_CLASS (code) == 'i') + { + used |= regs_used (PATTERN (scan), 0); + if (code == CALL_INSN) + used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); + dead |= (used >> 16) & ~used; + if (dead & try) + { + dead &= try; + break; + } + if (code == JUMP_INSN) + if (jump_left-- && simplejump_p (scan)) + scan = JUMP_LABEL (scan); + else + break; + } + } + /* Mask out the stack pointer again, in case it was + the only 'free' register we have found. */ + dead &= 0x7fff; + } + /* If the immediate destination is still in range, check for possible + threading with a jump beyond the delay slot insn. + Don't check if we are called recursively; the jump has been or will be + checked in a different invocation then. */ + + else if (optimize && need_block >= 0) + { + rtx next = next_active_insn (next_active_insn (dest)); + if (next && GET_CODE (next) == JUMP_INSN + && GET_CODE (PATTERN (next)) == SET + && recog_memoized (next) == CODE_FOR_jump) + { + dest = JUMP_LABEL (next); + if (dest + && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098) + gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1); + } + } + + if (dead) + { + rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead)); + + /* It would be nice if we could convert the jump into an indirect + jump / far branch right now, and thus exposing all constituent + instructions to further optimization. However, reorg uses + simplejump_p to determine if there is an unconditional jump where + it should try to schedule instructions from the target of the + branch; simplejump_p fails for indirect jumps even if they have + a JUMP_LABEL. */ + rtx insn = emit_insn_before (gen_indirect_jump_scratch + (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump)))) + , jump); + INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; + return insn; + } + else if (need_block) + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + return emit_insn_before (gen_block_branch_redirect + (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0)))) + , jump); + return prev; +} + +#define CONDJUMP_MIN -252 +#define CONDJUMP_MAX 262 +struct far_branch +{ + /* A label (to be placed) in front of the jump + that jumps to our ultimate destination. */ + rtx near_label; + /* Where we are going to insert it if we cannot move the jump any farther, + or the jump itself if we have picked up an existing jump. */ + rtx insert_place; + /* The ultimate destination. */ + rtx far_label; + struct far_branch *prev; + /* If the branch has already been created, its address; + else the address of its first prospective user. */ + int address; +}; + +enum mdep_reorg_phase_e mdep_reorg_phase; +void +gen_far_branch (bp) + struct far_branch *bp; +{ + rtx insn = bp->insert_place; + rtx jump; + rtx label = gen_label_rtx (); + + emit_label_after (label, insn); + if (bp->far_label) + { + jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); + LABEL_NUSES (bp->far_label)++; + } + else + jump = emit_jump_insn_after (gen_return (), insn); + /* Emit a barrier so that reorg knows that any following instructions + are not reachable via a fall-through path. + But don't do this when not optimizing, since we wouldn't supress the + alignment for the barrier then, and could end up with out-of-range + pc-relative loads. */ + if (optimize) + emit_barrier_after (jump); + emit_label_after (bp->near_label, insn); + JUMP_LABEL (jump) = bp->far_label; + if (! invert_jump (insn, label)) + abort (); + /* Prevent reorg from undoing our splits. */ + gen_block_redirect (jump, bp->address += 2, 2); +} + +/* Fix up ADDR_DIFF_VECs. */ +void +fixup_addr_diff_vecs (first) + rtx first; +{ + rtx insn; + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx vec_lab, pat, prev, prevpat, x, braf_label; + + if (GET_CODE (insn) != JUMP_INSN + || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) + continue; + pat = PATTERN (insn); + vec_lab = XEXP (XEXP (pat, 0), 0); + + /* Search the matching casesi_jump_2. */ + for (prev = vec_lab; ; prev = PREV_INSN (prev)) + { + if (GET_CODE (prev) != JUMP_INSN) + continue; + prevpat = PATTERN (prev); + if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) + continue; + x = XVECEXP (prevpat, 0, 1); + if (GET_CODE (x) != USE) + continue; + x = XEXP (x, 0); + if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) + break; + } + + /* Emit the reference label of the braf where it belongs, right after + the casesi_jump_2 (i.e. braf). */ + braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); + emit_label_after (braf_label, prev); + + /* Fix up the ADDR_DIF_VEC to be relative + to the reference address of the braf. */ + XEXP (XEXP (pat, 0), 0) = braf_label; + } +} + +/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following + a barrier. Return the base 2 logarithm of the desired alignment. */ +int +barrier_align (barrier_or_label) + rtx barrier_or_label; +{ + rtx next = next_real_insn (barrier_or_label), pat, prev; + int slot, credit; + + if (! next) + return 0; + + pat = PATTERN (next); + + if (GET_CODE (pat) == ADDR_DIFF_VEC) + return 2; + + if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1) + /* This is a barrier in front of a constant table. */ + return 0; + + prev = prev_real_insn (barrier_or_label); + if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC) + { + pat = PATTERN (prev); + /* If this is a very small table, we want to keep the alignment after + the table to the minimum for proper code alignment. */ + return ((TARGET_SMALLCODE + || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) + <= 1 << (CACHE_LOG - 2))) + ? 1 : CACHE_LOG); + } + + if (TARGET_SMALLCODE) + return 0; + + if (! TARGET_SH3 || ! optimize) + return CACHE_LOG; + + /* When fixing up pcloads, a constant table might be inserted just before + the basic block that ends with the barrier. Thus, we can't trust the + instruction lengths before that. */ + if (mdep_reorg_phase > SH_FIXUP_PCLOAD) + { + /* Check if there is an immediately preceding branch to the insn beyond + the barrier. We must weight the cost of discarding useful information + from the current cache line when executing this branch and there is + an alignment, against that of fetching unneeded insn in front of the + branch target when there is no alignment. */ + + /* PREV is presumed to be the JUMP_INSN for the barrier under + investigation. Skip to the insn before it. */ + prev = prev_real_insn (prev); + + for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2; + credit >= 0 && prev && GET_CODE (prev) == INSN; + prev = prev_real_insn (prev)) + { + if (GET_CODE (PATTERN (prev)) == USE + || GET_CODE (PATTERN (prev)) == CLOBBER) + continue; + if (GET_CODE (PATTERN (prev)) == SEQUENCE) + prev = XVECEXP (PATTERN (prev), 0, 1); + if (slot && + get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) + slot = 0; + credit -= get_attr_length (prev); + } + if (prev + && GET_CODE (prev) == JUMP_INSN + && JUMP_LABEL (prev) + && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label) + && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0))) + return 0; + } + + return CACHE_LOG; +} + +/* If we are inside a phony loop, lmost any kind of label can turn up as the + first one in the loop. Aligning a braf label causes incorrect switch + destination addresses; we can detect braf labels because they are + followed by a BARRIER. + Applying loop alignment to small constant or switch tables is a waste + of space, so we suppress this too. */ +int +sh_loop_align (label) + rtx label; +{ + rtx next = label; + + do + next = next_nonnote_insn (next); + while (next && GET_CODE (next) == CODE_LABEL); + + if (! next + || GET_RTX_CLASS (GET_CODE (next)) != 'i' + || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC + || recog_memoized (next) == CODE_FOR_consttable_2) + return 0; + return 2; +} + +/* Exported to toplev.c. + + Do a final pass over the function, just before delayed branch + scheduling. */ + +void +machine_dependent_reorg (first) + rtx first; +{ + rtx insn, mova; + int num_mova; + rtx r0_rtx = gen_rtx (REG, Pmode, 0); + rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx); + + /* If relaxing, generate pseudo-ops to associate function calls with + the symbols they call. It does no harm to not generate these + pseudo-ops. However, when we can generate them, it enables to + linker to potentially relax the jsr to a bsr, and eliminate the + register load and, possibly, the constant pool entry. */ + + mdep_reorg_phase = SH_INSERT_USES_LABELS; + if (TARGET_RELAX) + { + /* Remove all REG_LABEL notes. We want to use them for our own + purposes. This works because none of the remaining passes + need to look at them. + + ??? But it may break in the future. We should use a machine + dependent REG_NOTE, or some other approach entirely. */ + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + rtx note; + + while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0) + remove_note (insn, note); + } + } + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx pattern, reg, link, set, scan, dies, label; + int rescan = 0, foundinsn = 0; + + if (GET_CODE (insn) == CALL_INSN) + { + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + if (GET_CODE (pattern) == SET) + pattern = SET_SRC (pattern); + + if (GET_CODE (pattern) != CALL + || GET_CODE (XEXP (pattern, 0)) != MEM) + continue; + + reg = XEXP (XEXP (pattern, 0), 0); + } + else + { + reg = sfunc_uses_reg (insn); + if (! reg) + continue; + } + + if (GET_CODE (reg) != REG) + continue; + + /* This is a function call via REG. If the only uses of REG + between the time that it is set and the time that it dies + are in function calls, then we can associate all the + function calls with the setting of REG. */ + + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + { + if (REG_NOTE_KIND (link) != 0) + continue; + set = single_set (XEXP (link, 0)); + if (set && rtx_equal_p (reg, SET_DEST (set))) + { + link = XEXP (link, 0); + break; + } + } + + if (! link) + { + /* ??? Sometimes global register allocation will have + deleted the insn pointed to by LOG_LINKS. Try + scanning backward to find where the register is set. */ + for (scan = PREV_INSN (insn); + scan && GET_CODE (scan) != CODE_LABEL; + scan = PREV_INSN (scan)) + { + if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') + continue; + + if (! reg_mentioned_p (reg, scan)) + continue; + + if (noncall_uses_reg (reg, scan, &set)) + break; + + if (set) + { + link = scan; + break; + } + } + } + + if (! link) + continue; + + /* The register is set at LINK. */ + + /* We can only optimize the function call if the register is + being set to a symbol. In theory, we could sometimes + optimize calls to a constant location, but the assembler + and linker do not support that at present. */ + if (GET_CODE (SET_SRC (set)) != SYMBOL_REF + && GET_CODE (SET_SRC (set)) != LABEL_REF) + continue; + + /* Scan forward from LINK to the place where REG dies, and + make sure that the only insns which use REG are + themselves function calls. */ + + /* ??? This doesn't work for call targets that were allocated + by reload, since there may not be a REG_DEAD note for the + register. */ + + dies = NULL_RTX; + for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) + { + rtx scanset; + + /* Don't try to trace forward past a CODE_LABEL if we haven't + seen INSN yet. Ordinarily, we will only find the setting insn + in LOG_LINKS if it is in the same basic block. However, + cross-jumping can insert code labels in between the load and + the call, and can result in situations where a single call + insn may have two targets depending on where we came from. */ + + if (GET_CODE (scan) == CODE_LABEL && ! foundinsn) + break; + + if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') + continue; + + /* Don't try to trace forward past a JUMP. To optimize + safely, we would have to check that all the + instructions at the jump destination did not use REG. */ + + if (GET_CODE (scan) == JUMP_INSN) + break; + + if (! reg_mentioned_p (reg, scan)) + continue; + + if (noncall_uses_reg (reg, scan, &scanset)) + break; + + if (scan == insn) + foundinsn = 1; + + if (scan != insn + && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan))) + { + /* There is a function call to this register other + than the one we are checking. If we optimize + this call, we need to rescan again below. */ + rescan = 1; + } + + /* ??? We shouldn't have to worry about SCANSET here. + We should just be able to check for a REG_DEAD note + on a function call. However, the REG_DEAD notes are + apparently not dependable around libcalls; c-torture + execute/920501-2 is a test case. If SCANSET is set, + then this insn sets the register, so it must have + died earlier. Unfortunately, this will only handle + the cases in which the register is, in fact, set in a + later insn. */ + + /* ??? We shouldn't have to use FOUNDINSN here. + However, the LOG_LINKS fields are apparently not + entirely reliable around libcalls; + newlib/libm/math/e_pow.c is a test case. Sometimes + an insn will appear in LOG_LINKS even though it is + not the most recent insn which sets the register. */ + + if (foundinsn + && (scanset + || find_reg_note (scan, REG_DEAD, reg))) + { + dies = scan; + break; + } + } + + if (! dies) + { + /* Either there was a branch, or some insn used REG + other than as a function call address. */ + continue; + } + + /* Create a code label, and put it in a REG_LABEL note on + the insn which sets the register, and on each call insn + which uses the register. In final_prescan_insn we look + for the REG_LABEL notes, and output the appropriate label + or pseudo-op. */ + + label = gen_label_rtx (); + REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label, + REG_NOTES (link)); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label, + REG_NOTES (insn)); + if (rescan) + { + scan = link; + do + { + rtx reg2; + + scan = NEXT_INSN (scan); + if (scan != insn + && ((GET_CODE (scan) == CALL_INSN + && reg_mentioned_p (reg, scan)) + || ((reg2 = sfunc_uses_reg (scan)) + && REGNO (reg2) == REGNO (reg)))) + REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL, + label, REG_NOTES (scan)); + } + while (scan != dies); + } + } + } + + if (TARGET_SH2) + fixup_addr_diff_vecs (first); + + if (optimize) + { + mdep_reorg_phase = SH_SHORTEN_BRANCHES0; + shorten_branches (first); + } + /* Scan the function looking for move instructions which have to be + changed to pc-relative loads and insert the literal tables. */ + + mdep_reorg_phase = SH_FIXUP_PCLOAD; + for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) + { + if (mova_p (insn)) + { + if (! num_mova++) + mova = insn; + } + else if (GET_CODE (insn) == JUMP_INSN + && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + && num_mova) + { + rtx scan; + int total; + + num_mova--; + + /* Some code might have been inserted between the mova and + its ADDR_DIFF_VEC. Check if the mova is still in range. */ + for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) + total += get_attr_length (scan); + + /* range of mova is 1020, add 4 because pc counts from address of + second instruction after this one, subtract 2 in case pc is 2 + byte aligned. Possible alignment needed for the ADDR_DIFF_VEC + cancels out with alignment effects of the mova itself. */ + if (total > 1022) + { + /* Change the mova into a load, and restart scanning + there. broken_move will then return true for mova. */ + SET_SRC (PATTERN (mova)) + = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); + INSN_CODE (mova) = -1; + insn = mova; + } + } + if (broken_move (insn)) + { + rtx scan; + /* Scan ahead looking for a barrier to stick the constant table + behind. */ + rtx barrier = find_barrier (num_mova, mova, insn); + rtx last_float_move, last_float = 0, *last_float_addr; + + if (num_mova && ! mova_p (mova)) + { + /* find_barrier had to change the first mova into a + pcload; thus, we have to start with this new pcload. */ + insn = mova; + num_mova = 0; + } + /* Now find all the moves between the points and modify them. */ + for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) + { + if (GET_CODE (scan) == CODE_LABEL) + last_float = 0; + if (broken_move (scan)) + { + rtx *patp = &PATTERN (scan), pat = *patp; + rtx src, dst; + rtx lab; + rtx newinsn; + rtx newsrc; + enum machine_mode mode; + + if (GET_CODE (pat) == PARALLEL) + patp = &XVECEXP (pat, 0, 0), pat = *patp; + src = SET_SRC (pat); + dst = SET_DEST (pat); + mode = GET_MODE (dst); + + if (mode == SImode && hi_const (src) + && REGNO (dst) != FPUL_REG) + { + int offset = 0; + + mode = HImode; + while (GET_CODE (dst) == SUBREG) + { + offset += SUBREG_WORD (dst); + dst = SUBREG_REG (dst); + } + dst = gen_rtx (REG, HImode, REGNO (dst) + offset); + } + + if (GET_CODE (dst) == REG + && ((REGNO (dst) >= FIRST_FP_REG + && REGNO (dst) <= LAST_XD_REG) + || REGNO (dst) == FPUL_REG)) + { + if (last_float + && reg_set_between_p (r0_rtx, last_float_move, scan)) + last_float = 0; + lab = add_constant (src, mode, last_float); + if (lab) + emit_insn_before (gen_mova (lab), scan); + else + *last_float_addr = r0_inc_rtx; + last_float_move = scan; + last_float = src; + newsrc = gen_rtx (MEM, mode, + ((TARGET_SH4 && ! TARGET_FMOVD + || REGNO (dst) == FPUL_REG) + ? r0_inc_rtx + : r0_rtx)); + last_float_addr = &XEXP (newsrc, 0); + } + else + { + lab = add_constant (src, mode, 0); + newsrc = gen_rtx (MEM, mode, + gen_rtx (LABEL_REF, VOIDmode, lab)); + } + RTX_UNCHANGING_P (newsrc) = 1; + *patp = gen_rtx (SET, VOIDmode, dst, newsrc); + INSN_CODE (scan) = -1; + } + } + dump_table (barrier); + insn = barrier; + } + } + + mdep_reorg_phase = SH_SHORTEN_BRANCHES1; + insn_addresses = 0; + split_branches (first); + + /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it + also has an effect on the register that holds the addres of the sfunc. + Insert an extra dummy insn in front of each sfunc that pretends to + use this register. */ + if (flag_delayed_branch) + { + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx reg = sfunc_uses_reg (insn); + + if (! reg) + continue; + emit_insn_before (gen_use_sfunc_addr (reg), insn); + } + } +#if 0 + /* fpscr is not actually a user variable, but we pretend it is for the + sake of the previous optimization passes, since we want it handled like + one. However, we don't have eny debugging information for it, so turn + it into a non-user variable now. */ + if (TARGET_SH4) + REG_USERVAR_P (get_fpscr_rtx ()) = 0; +#endif + if (optimize) + sh_flag_remove_dead_before_cse = 1; + mdep_reorg_phase = SH_AFTER_MDEP_REORG; +} + +int +get_dest_uid (label, max_uid) + rtx label; + int max_uid; +{ + rtx dest = next_real_insn (label); + int dest_uid; + if (! dest) + /* This can happen for an undefined label. */ + return 0; + dest_uid = INSN_UID (dest); + /* If this is a newly created branch redirection blocking instruction, + we cannot index the branch_uid or insn_addresses arrays with its + uid. But then, we won't need to, because the actual destination is + the following branch. */ + while (dest_uid >= max_uid) + { + dest = NEXT_INSN (dest); + dest_uid = INSN_UID (dest); + } + if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN) + return 0; + return dest_uid; +} + +/* Split condbranches that are out of range. Also add clobbers for + scratch registers that are needed in far jumps. + We do this before delay slot scheduling, so that it can take our + newly created instructions into account. It also allows us to + find branches with common targets more easily. */ + +static void +split_branches (first) + rtx first; +{ + rtx insn; + struct far_branch **uid_branch, *far_branch_list = 0; + int max_uid = get_max_uid (); + + /* Find out which branches are out of range. */ + shorten_branches (first); + + uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); + bzero ((char *) uid_branch, max_uid * sizeof *uid_branch); + + for (insn = first; insn; insn = NEXT_INSN (insn)) + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + else if (INSN_DELETED_P (insn)) + { + /* Shorten_branches would split this instruction again, + so transform it into a note. */ + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + } + else if (GET_CODE (insn) == JUMP_INSN + /* Don't mess with ADDR_DIFF_VEC */ + && (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == RETURN)) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_CBRANCH) + { + rtx next, beyond; + + if (get_attr_length (insn) > 4) + { + rtx src = SET_SRC (PATTERN (insn)); + rtx cond = XEXP (src, 0); + rtx olabel = XEXP (XEXP (src, 1), 0); + rtx jump; + int addr = insn_addresses[INSN_UID (insn)]; + rtx label = 0; + int dest_uid = get_dest_uid (olabel, max_uid); + struct far_branch *bp = uid_branch[dest_uid]; + + /* redirect_jump needs a valid JUMP_LABEL, and it might delete + the label if the LABEL_NUSES count drops to zero. There is + always a jump_optimize pass that sets these values, but it + proceeds to delete unreferenced code, and then if not + optimizing, to un-delete the deleted instructions, thus + leaving labels with too low uses counts. */ + if (! optimize) + { + JUMP_LABEL (insn) = olabel; + LABEL_NUSES (olabel)++; + } + if (! bp) + { + bp = (struct far_branch *) alloca (sizeof *bp); + uid_branch[dest_uid] = bp; + bp->prev = far_branch_list; + far_branch_list = bp; + bp->far_label + = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); + LABEL_NUSES (bp->far_label)++; + } + else + { + label = bp->near_label; + if (! label && bp->address - addr >= CONDJUMP_MIN) + { + rtx block = bp->insert_place; + + if (GET_CODE (PATTERN (block)) == RETURN) + block = PREV_INSN (block); + else + block = gen_block_redirect (block, + bp->address, 2); + label = emit_label_after (gen_label_rtx (), + PREV_INSN (block)); + bp->near_label = label; + } + else if (label && ! NEXT_INSN (label)) + if (addr + 2 - bp->address <= CONDJUMP_MAX) + bp->insert_place = insn; + else + gen_far_branch (bp); + } + if (! label + || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN) + { + bp->near_label = label = gen_label_rtx (); + bp->insert_place = insn; + bp->address = addr; + } + if (! redirect_jump (insn, label)) + abort (); + } + else + { + /* get_attr_length (insn) == 2 */ + /* Check if we have a pattern where reorg wants to redirect + the branch to a label from an unconditional branch that + is too far away. */ + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + /* A syntax error might cause beyond to be NULL_RTX. */ + beyond + = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), + 0)); + + if (beyond + && (GET_CODE (beyond) == JUMP_INSN + || (GET_CODE (beyond = next_active_insn (beyond)) + == JUMP_INSN)) + && GET_CODE (PATTERN (beyond)) == SET + && recog_memoized (beyond) == CODE_FOR_jump + && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))] + - insn_addresses[INSN_UID (insn)] + 252U) + > 252 + 258 + 2)) + gen_block_redirect (beyond, + insn_addresses[INSN_UID (beyond)], 1); + } + + next = next_active_insn (insn); + + if ((GET_CODE (next) == JUMP_INSN + || GET_CODE (next = next_active_insn (next)) == JUMP_INSN) + && GET_CODE (PATTERN (next)) == SET + && recog_memoized (next) == CODE_FOR_jump + && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))] + - insn_addresses[INSN_UID (insn)] + 252U) + > 252 + 258 + 2)) + gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1); + } + else if (type == TYPE_JUMP || type == TYPE_RETURN) + { + int addr = insn_addresses[INSN_UID (insn)]; + rtx far_label = 0; + int dest_uid = 0; + struct far_branch *bp; + + if (type == TYPE_JUMP) + { + far_label = XEXP (SET_SRC (PATTERN (insn)), 0); + dest_uid = get_dest_uid (far_label, max_uid); + if (! dest_uid) + { + /* Parse errors can lead to labels outside + the insn stream. */ + if (! NEXT_INSN (far_label)) + continue; + + if (! optimize) + { + JUMP_LABEL (insn) = far_label; + LABEL_NUSES (far_label)++; + } + redirect_jump (insn, NULL_RTX); + far_label = 0; + } + } + bp = uid_branch[dest_uid]; + if (! bp) + { + bp = (struct far_branch *) alloca (sizeof *bp); + uid_branch[dest_uid] = bp; + bp->prev = far_branch_list; + far_branch_list = bp; + bp->near_label = 0; + bp->far_label = far_label; + if (far_label) + LABEL_NUSES (far_label)++; + } + else if (bp->near_label && ! NEXT_INSN (bp->near_label)) + if (addr - bp->address <= CONDJUMP_MAX) + emit_label_after (bp->near_label, PREV_INSN (insn)); + else + { + gen_far_branch (bp); + bp->near_label = 0; + } + else + bp->near_label = 0; + bp->address = addr; + bp->insert_place = insn; + if (! far_label) + emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); + else + gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); + } + } + /* Generate all pending far branches, + and free our references to the far labels. */ + while (far_branch_list) + { + if (far_branch_list->near_label + && ! NEXT_INSN (far_branch_list->near_label)) + gen_far_branch (far_branch_list); + if (optimize + && far_branch_list->far_label + && ! --LABEL_NUSES (far_branch_list->far_label)) + delete_insn (far_branch_list->far_label); + far_branch_list = far_branch_list->prev; + } + + /* Instruction length information is no longer valid due to the new + instructions that have been generated. */ + init_insn_lengths (); +} + +/* Dump out instruction addresses, which is useful for debugging the + constant pool table stuff. + + If relaxing, output the label and pseudo-ops used to link together + calls and the instruction which set the registers. */ + +/* ??? This is unnecessary, and probably should be deleted. This makes + the insn_addresses declaration above unnecessary. */ + +/* ??? The addresses printed by this routine for insns are nonsense for + insns which are inside of a sequence where none of the inner insns have + variable length. This is because the second pass of shorten_branches + does not bother to update them. */ + +void +final_prescan_insn (insn, opvec, noperands) + rtx insn; + rtx *opvec; + int noperands; +{ + if (TARGET_DUMPISIZE) + fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]); + + if (TARGET_RELAX) + { + rtx note; + + note = find_reg_note (insn, REG_LABEL, NULL_RTX); + if (note) + { + rtx pattern; + + pattern = PATTERN (insn); + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + if (GET_CODE (pattern) == CALL + || (GET_CODE (pattern) == SET + && (GET_CODE (SET_SRC (pattern)) == CALL + || get_attr_type (insn) == TYPE_SFUNC))) + asm_fprintf (asm_out_file, "\t.uses %LL%d\n", + CODE_LABEL_NUMBER (XEXP (note, 0))); + else if (GET_CODE (pattern) == SET) + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (XEXP (note, 0))); + else + abort (); + } + } +} + +/* Dump out any constants accumulated in the final pass. These will + only be labels. */ + +char * +output_jump_label_table () +{ + int i; + + if (pool_size) + { + fprintf (asm_out_file, "\t.align 2\n"); + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (p->label)); + output_asm_insn (".long %O0", &p->value); + } + pool_size = 0; + } + + return ""; +} + +/* A full frame looks like: + + arg-5 + arg-4 + [ if current_function_anonymous_args + arg-3 + arg-2 + arg-1 + arg-0 ] + saved-fp + saved-r10 + saved-r11 + saved-r12 + saved-pr + local-n + .. + local-1 + local-0 <- fp points here. */ + +/* Number of bytes pushed for anonymous args, used to pass information + between expand_prologue and expand_epilogue. */ + +static int extra_push; + +/* Adjust the stack by SIZE bytes. REG holds the rtl of the register + to be adjusted, and TEMP, if nonnegative, holds the register number + of a general register that we may clobber. */ + +static void +output_stack_adjust (size, reg, temp) + int size; + rtx reg; + int temp; +{ + if (size) + { + if (CONST_OK_FOR_I (size)) + emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); + /* Try to do it with two partial adjustments; however, we must make + sure that the stack is properly aligned at all times, in case + an interrupt occurs between the two partial adjustments. */ + else if (CONST_OK_FOR_I (size / 2 & -4) + && CONST_OK_FOR_I (size - (size / 2 & -4))) + { + emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4))); + emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4)))); + } + else + { + rtx const_reg; + + /* If TEMP is invalid, we could temporarily save a general + register to MACL. However, there is currently no need + to handle this case, so just abort when we see it. */ + if (temp < 0) + abort (); + const_reg = gen_rtx (REG, SImode, temp); + + /* If SIZE is negative, subtract the positive value. + This sometimes allows a constant pool entry to be shared + between prologue and epilogue code. */ + if (size < 0) + { + emit_insn (gen_movsi (const_reg, GEN_INT (-size))); + emit_insn (gen_subsi3 (reg, reg, const_reg)); + } + else + { + emit_insn (gen_movsi (const_reg, GEN_INT (size))); + emit_insn (gen_addsi3 (reg, reg, const_reg)); + } + } + } +} + +/* Output RTL to push register RN onto the stack. */ + +static void +push (rn) + int rn; +{ + rtx x; + if (rn == FPUL_REG) + x = gen_push_fpul (); + else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE + && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) + { + if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) + return; + x = gen_push_4 (gen_rtx (REG, DFmode, rn)); + } + else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) + x = gen_push_e (gen_rtx (REG, SFmode, rn)); + else + x = gen_push (gen_rtx (REG, SImode, rn)); + + x = emit_insn (x); + REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, + gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0); +} + +/* Output RTL to pop register RN from the stack. */ + +static void +pop (rn) + int rn; +{ + rtx x; + if (rn == FPUL_REG) + x = gen_pop_fpul (); + else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE + && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) + { + if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) + return; + x = gen_pop_4 (gen_rtx (REG, DFmode, rn)); + } + else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) + x = gen_pop_e (gen_rtx (REG, SFmode, rn)); + else + x = gen_pop (gen_rtx (REG, SImode, rn)); + + x = emit_insn (x); + REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, + gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0); +} + +/* Generate code to push the regs specified in the mask. */ + +static void +push_regs (mask, mask2) + int mask, mask2; +{ + int i; + + /* Push PR last; this gives better latencies after the prologue, and + candidates for the return delay slot when there are no general + registers pushed. */ + for (i = 0; i < 32; i++) + if (mask & (1 << i) && i != PR_REG) + push (i); + for (i = 32; i < FIRST_PSEUDO_REGISTER; i++) + if (mask2 & (1 << (i - 32))) + push (i); + if (mask & (1 << PR_REG)) + push (PR_REG); +} + +/* Work out the registers which need to be saved, both as a mask and a + count of saved words. + + If doing a pragma interrupt function, then push all regs used by the + function, and if we call another function (we can tell by looking at PR), + make sure that all the regs it clobbers are safe too. */ + +static int +calc_live_regs (count_ptr, live_regs_mask2) + int *count_ptr; + int *live_regs_mask2; +{ + int reg; + int live_regs_mask = 0; + int count; + int interrupt_handler; + + if ((lookup_attribute + ("interrupt_handler", + DECL_MACHINE_ATTRIBUTES (current_function_decl))) + != NULL_TREE) + interrupt_handler = 1; + else + interrupt_handler = 0; + + *live_regs_mask2 = 0; + /* If we can save a lot of saves by switching to double mode, do that. */ + if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE) + for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) + if (regs_ever_live[reg] && regs_ever_live[reg+1] + && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa)) + && ++count > 2) + { + target_flags &= ~FPU_SINGLE_BIT; + break; + } + for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--) + { + if ((interrupt_handler && ! pragma_trapa) + ? (/* Need to save all the regs ever live. */ + (regs_ever_live[reg] + || (call_used_regs[reg] + && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG) + && regs_ever_live[PR_REG])) + && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM + && reg != RETURN_ADDRESS_POINTER_REGNUM + && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG) + : (/* Only push those regs which are used and need to be saved. */ + regs_ever_live[reg] && ! call_used_regs[reg])) + { + if (reg >= 32) + *live_regs_mask2 |= 1 << (reg - 32); + else + live_regs_mask |= 1 << reg; + count++; + if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG) + if (reg <= LAST_FP_REG) + { + if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1]) + { + if (reg >= 32) + *live_regs_mask2 |= 1 << ((reg ^ 1) - 32); + else + live_regs_mask |= 1 << (reg ^ 1); + count++; + } + } + else if (reg <= LAST_XD_REG) + { + /* Must switch to double mode to access these registers. */ + target_flags &= ~FPU_SINGLE_BIT; + count++; + } + } + } + + *count_ptr = count; + return live_regs_mask; +} + +/* Code to generate prologue and epilogue sequences */ + +void +sh_expand_prologue () +{ + int live_regs_mask; + int d, i; + int live_regs_mask2; + int save_flags = target_flags; + int double_align = 0; + + /* We have pretend args if we had an object sent partially in registers + and partially on the stack, e.g. a large structure. */ + output_stack_adjust (-current_function_pretend_args_size, + stack_pointer_rtx, 3); + + extra_push = 0; + + /* This is set by SETUP_VARARGS to indicate that this is a varargs + routine. Clear it here so that the next function isn't affected. */ + if (current_function_anonymous_args) + { + current_function_anonymous_args = 0; + + /* This is not used by the SH3E calling convention */ + if (!TARGET_SH3E) + { + /* Push arg regs as if they'd been provided by caller in stack. */ + for (i = 0; i < NPARM_REGS(SImode); i++) + { + int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; + if (i >= (NPARM_REGS(SImode) + - current_function_args_info.arg_count[(int) SH_ARG_INT] + )) + break; + push (rn); + extra_push += 4; + } + } + } + + /* If we're supposed to switch stacks at function entry, do so now. */ + if (sp_switch) + emit_insn (gen_sp_switch_1 ()); + + live_regs_mask = calc_live_regs (&d, &live_regs_mask2); + /* ??? Maybe we could save some switching if we can move a mode switch + that already happens to be at the function start into the prologue. */ + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + push_regs (live_regs_mask, live_regs_mask2); + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + + if (TARGET_ALIGN_DOUBLE && d & 1) + double_align = 4; + + target_flags = save_flags; + + output_stack_adjust (-get_frame_size () - double_align, + stack_pointer_rtx, 3); + + if (frame_pointer_needed) + emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); +} + +void +sh_expand_epilogue () +{ + int live_regs_mask; + int d, i; + + int live_regs_mask2; + int save_flags = target_flags; + int frame_size = get_frame_size (); + + live_regs_mask = calc_live_regs (&d, &live_regs_mask2); + + if (TARGET_ALIGN_DOUBLE && d & 1) + frame_size += 4; + + if (frame_pointer_needed) + { + output_stack_adjust (frame_size, frame_pointer_rtx, 7); + + /* We must avoid moving the stack pointer adjustment past code + which reads from the local frame, else an interrupt could + occur after the SP adjustment and clobber data in the local + frame. */ + emit_insn (gen_blockage ()); + emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx)); + } + else if (frame_size) + { + /* We must avoid moving the stack pointer adjustment past code + which reads from the local frame, else an interrupt could + occur after the SP adjustment and clobber data in the local + frame. */ + emit_insn (gen_blockage ()); + output_stack_adjust (frame_size, stack_pointer_rtx, 7); + } + + /* Pop all the registers. */ + + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + if (live_regs_mask & (1 << PR_REG)) + pop (PR_REG); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + int j = (FIRST_PSEUDO_REGISTER - 1) - i; + if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG) + pop (j); + else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32)))) + pop (j); + } + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + target_flags = save_flags; + + output_stack_adjust (extra_push + current_function_pretend_args_size, + stack_pointer_rtx, 7); + + /* Switch back to the normal stack if necessary. */ + if (sp_switch) + emit_insn (gen_sp_switch_2 ()); +} + +/* Clear variables at function end. */ + +void +function_epilogue (stream, size) + FILE *stream; + int size; +{ + trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0; + sp_switch = NULL_RTX; +} + +rtx +sh_builtin_saveregs (arglist) + tree arglist; +{ + tree fntype = TREE_TYPE (current_function_decl); + /* First unnamed integer register. */ + int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT]; + /* Number of integer registers we need to save. */ + int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); + /* First unnamed SFmode float reg */ + int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT]; + /* Number of SFmode float regs to save. */ + int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); + int ptrsize = GET_MODE_SIZE (Pmode); + rtx valist, regbuf, fpregs; + int bufsize, regno; + + /* Allocate block of memory for the regs. */ + /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? + Or can assign_stack_local accept a 0 SIZE argument? */ + bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); + + regbuf = assign_stack_local (BLKmode, bufsize, 0); + MEM_SET_IN_STRUCT_P (regbuf, 1); + + /* Save int args. + This is optimized to only save the regs that are necessary. Explicitly + named args need not be saved. */ + if (n_intregs > 0) + move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, + gen_rtx (MEM, BLKmode, + plus_constant (XEXP (regbuf, 0), + n_floatregs * UNITS_PER_WORD)), + n_intregs, n_intregs * UNITS_PER_WORD); + + /* Save float args. + This is optimized to only save the regs that are necessary. Explicitly + named args need not be saved. + We explicitly build a pointer to the buffer because it halves the insn + count when not optimizing (otherwise the pointer is built for each reg + saved). + We emit the moves in reverse order so that we can use predecrement. */ + + fpregs = gen_reg_rtx (Pmode); + emit_move_insn (fpregs, XEXP (regbuf, 0)); + emit_insn (gen_addsi3 (fpregs, fpregs, + GEN_INT (n_floatregs * UNITS_PER_WORD))); + if (TARGET_SH4) + { + for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) + { + emit_insn (gen_addsi3 (fpregs, fpregs, + GEN_INT (-2 * UNITS_PER_WORD))); + emit_move_insn (gen_rtx (MEM, DFmode, fpregs), + gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno)); + } + regno = first_floatreg; + if (regno & 1) + { + emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); + emit_move_insn (gen_rtx (MEM, SFmode, fpregs), + gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno + - (TARGET_LITTLE_ENDIAN != 0))); + } + } + else + for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) + { + emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); + emit_move_insn (gen_rtx (MEM, SFmode, fpregs), + gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno)); + } + + /* Return the address of the regbuf. */ + return XEXP (regbuf, 0); +} + +/* Define the offset between two registers, one to be eliminated, and + the other its replacement, at the start of a routine. */ + +int +initial_elimination_offset (from, to) + int from; + int to; +{ + int regs_saved; + int total_saved_regs_space; + int total_auto_space = get_frame_size (); + int save_flags = target_flags; + + int live_regs_mask, live_regs_mask2; + live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2); + if (TARGET_ALIGN_DOUBLE && regs_saved & 1) + total_auto_space += 4; + target_flags = save_flags; + + total_saved_regs_space = (regs_saved) * 4; + + if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) + return total_saved_regs_space + total_auto_space; + + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return total_saved_regs_space + total_auto_space; + + /* Initial gap between fp and sp is 0. */ + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return 0; + + if (from == RETURN_ADDRESS_POINTER_REGNUM + && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM)) + { + int i, n = total_saved_regs_space; + for (i = PR_REG-1; i >= 0; i--) + if (live_regs_mask & (1 << i)) + n -= 4; + return n + total_auto_space; + } + + abort (); +} + +/* Handle machine specific pragmas to be semi-compatible with Hitachi + compiler. */ + +int +sh_handle_pragma (p_getc, p_ungetc, pname) + int (* p_getc) PROTO((void)); + void (* p_ungetc) PROTO((int)); + char * pname; +{ + int retval = 0; + + if (strcmp (pname, "interrupt") == 0) + pragma_interrupt = retval = 1; + else if (strcmp (pname, "trapa") == 0) + pragma_interrupt = pragma_trapa = retval = 1; + else if (strcmp (pname, "nosave_low_regs") == 0) + pragma_nosave_low_regs = retval = 1; + + return retval; +} + +/* Generate 'handle_interrupt' attribute for decls */ + +void +sh_pragma_insert_attributes (node, attributes, prefix) + tree node; + tree * attributes; + tree * prefix; +{ + tree a; + + if (! pragma_interrupt + || TREE_CODE (node) != FUNCTION_DECL) + return; + + /* We are only interested in fields. */ + if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd') + return; + + /* Add a 'handle_interrupt' attribute. */ + * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes); + + return; +} + +/* Return nonzero if ATTR is a valid attribute for DECL. + ATTRIBUTES are any existing attributes and ARGS are the arguments + supplied with ATTR. + + Supported attributes: + + interrupt_handler -- specifies this function is an interrupt handler. + + sp_switch -- specifies an alternate stack for an interrupt handler + to run on. + + trap_exit -- use a trapa to exit an interrupt function instead of + an rte instruction. */ + +int +sh_valid_machine_decl_attribute (decl, attributes, attr, args) + tree decl; + tree attributes; + tree attr; + tree args; +{ + int retval = 0; + + if (TREE_CODE (decl) != FUNCTION_DECL) + return 0; + + if (is_attribute_p ("interrupt_handler", attr)) + { + return 1; + } + + if (is_attribute_p ("sp_switch", attr)) + { + /* The sp_switch attribute only has meaning for interrupt functions. */ + if (!pragma_interrupt) + return 0; + + /* sp_switch must have an argument. */ + if (!args || TREE_CODE (args) != TREE_LIST) + return 0; + + /* The argument must be a constant string. */ + if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) + return 0; + + sp_switch = gen_rtx (SYMBOL_REF, VOIDmode, + TREE_STRING_POINTER (TREE_VALUE (args))); + return 1; + } + + if (is_attribute_p ("trap_exit", attr)) + { + /* The trap_exit attribute only has meaning for interrupt functions. */ + if (!pragma_interrupt) + return 0; + + /* trap_exit must have an argument. */ + if (!args || TREE_CODE (args) != TREE_LIST) + return 0; + + /* The argument must be a constant integer. */ + if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) + return 0; + + trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args)); + return 1; + } +} + + +/* Predicates used by the templates. */ + +/* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx. + Used only in general_movsrc_operand. */ + +int +system_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (REGNO (op)) + { + case PR_REG: + case MACL_REG: + case MACH_REG: + return 1; + } + return 0; +} + +/* Returns 1 if OP can be source of a simple move operation. + Same as general_operand, but a LABEL_REF is valid, PRE_DEC is + invalid as are subregs of system registers. */ + +int +general_movsrc_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == MEM) + { + rtx inside = XEXP (op, 0); + if (GET_CODE (inside) == CONST) + inside = XEXP (inside, 0); + + if (GET_CODE (inside) == LABEL_REF) + return 1; + + if (GET_CODE (inside) == PLUS + && GET_CODE (XEXP (inside, 0)) == LABEL_REF + && GET_CODE (XEXP (inside, 1)) == CONST_INT) + return 1; + + /* Only post inc allowed. */ + if (GET_CODE (inside) == PRE_DEC) + return 0; + } + + if ((mode == QImode || mode == HImode) + && (GET_CODE (op) == SUBREG + && GET_CODE (XEXP (op, 0)) == REG + && system_reg_operand (XEXP (op, 0), mode))) + return 0; + + return general_operand (op, mode); +} + +/* Returns 1 if OP can be a destination of a move. + Same as general_operand, but no preinc allowed. */ + +int +general_movdst_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + /* Only pre dec allowed. */ + if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC) + return 0; + + return general_operand (op, mode); +} + +/* Returns 1 if OP is a normal arithmetic register. */ + +int +arith_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + { + int regno; + + if (GET_CODE (op) == REG) + regno = REGNO (op); + else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) + regno = REGNO (SUBREG_REG (op)); + else + return 1; + + return (regno != T_REG && regno != PR_REG + && (regno != FPUL_REG || TARGET_SH4) + && regno != MACH_REG && regno != MACL_REG); + } + return 0; +} + +int +fp_arith_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + { + int regno; + + if (GET_CODE (op) == REG) + regno = REGNO (op); + else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) + regno = REGNO (SUBREG_REG (op)); + else + return 1; + + return (regno != T_REG && regno != PR_REG && regno > 15 + && regno != MACH_REG && regno != MACL_REG); + } + return 0; +} + +int +fp_extended_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + return fp_arith_reg_operand (op, mode); +} + +/* Returns 1 if OP is a valid source operand for an arithmetic insn. */ + +int +arith_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) + return 1; + + return 0; +} + +/* Returns 1 if OP is a valid source operand for a compare insn. */ + +int +arith_reg_or_0_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op))) + return 1; + + return 0; +} + +/* Returns 1 if OP is a valid source operand for a logical operation. */ + +int +logical_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) + return 1; + + return 0; +} + +/* Nonzero if OP is a floating point value with value 0.0. */ + +int +fp_zero_operand (op) + rtx op; +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (op) != SFmode) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); +} + +/* Nonzero if OP is a floating point value with value 1.0. */ + +int +fp_one_operand (op) + rtx op; +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (op) != SFmode) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + return REAL_VALUES_EQUAL (r, dconst1); +} + +int +tertiary_reload_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE); +} + +int +fpscr_operand (op) + rtx op; +{ + return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG + && GET_MODE (op) == PSImode); +} + +int +commutative_float_operator (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_MODE (op) != mode) + return 0; + switch (GET_CODE (op)) + { + case PLUS: + case MULT: + return 1; + } + return 0; +} + +int +noncommutative_float_operator (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_MODE (op) != mode) + return 0; + switch (GET_CODE (op)) + { + case MINUS: + case DIV: + return 1; + } + return 0; +} + +int +binary_float_operator (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_MODE (op) != mode) + return 0; + switch (GET_CODE (op)) + { + case PLUS: + case MINUS: + case MULT: + case DIV: + return 1; + } + return 0; +} + +/* Return the destination address of a branch. */ + +int +branch_dest (branch) + rtx branch; +{ + rtx dest = SET_SRC (PATTERN (branch)); + int dest_uid; + + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, 1); + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + return insn_addresses[dest_uid]; +} + +/* Return non-zero if REG is not used after INSN. + We assume REG is a reload reg, and therefore does + not live past labels. It may live past calls or jumps though. */ +int +reg_unused_after (reg, insn) + rtx reg; + rtx insn; +{ + enum rtx_code code; + rtx set; + + /* If the reg is set by this instruction, then it is safe for our + case. Disregard the case where this is a store to memory, since + we are checking a register used in the store address. */ + set = single_set (insn); + if (set && GET_CODE (SET_DEST (set)) != MEM + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; + + while (insn = NEXT_INSN (insn)) + { + code = GET_CODE (insn); + +#if 0 + /* If this is a label that existed before reload, then the register + if dead here. However, if this is a label added by reorg, then + the register may still be live here. We can't tell the difference, + so we just ignore labels completely. */ + if (code == CODE_LABEL) + return 1; + /* else */ +#endif + + if (code == JUMP_INSN) + return 0; + + /* If this is a sequence, we must handle them all at once. + We could have for instance a call that sets the target register, + and a insn in a delay slot that uses the register. In this case, + we must return 0. */ + else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + { + int i; + int retval = 0; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx this_insn = XVECEXP (PATTERN (insn), 0, i); + rtx set = single_set (this_insn); + + if (GET_CODE (this_insn) == CALL_INSN) + code = CALL_INSN; + else if (GET_CODE (this_insn) == JUMP_INSN) + { + if (INSN_ANNULLED_BRANCH_P (this_insn)) + return 0; + code = JUMP_INSN; + } + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + { + if (GET_CODE (SET_DEST (set)) != MEM) + retval = 1; + else + return 0; + } + if (set == 0 + && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) + return 0; + } + if (retval == 1) + return 1; + else if (code == JUMP_INSN) + return 0; + } + else if (GET_RTX_CLASS (code) == 'i') + { + rtx set = single_set (insn); + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return GET_CODE (SET_DEST (set)) != MEM; + if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return 0; + } + + if (code == CALL_INSN && call_used_regs[REGNO (reg)]) + return 1; + } + return 1; +} + +extern struct obstack permanent_obstack; + +rtx +get_fpscr_rtx () +{ + static rtx fpscr_rtx; + + if (! fpscr_rtx) + { + push_obstacks (&permanent_obstack, &permanent_obstack); + fpscr_rtx = gen_rtx (REG, PSImode, 48); + REG_USERVAR_P (fpscr_rtx) = 1; + pop_obstacks (); + mark_user_reg (fpscr_rtx); + } + if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) + mark_user_reg (fpscr_rtx); + return fpscr_rtx; +} + +void +emit_sf_insn (pat) + rtx pat; +{ + rtx addr; + /* When generating reload insns, we must not create new registers. FPSCR + should already have the correct value, so do nothing to change it. */ + if (! TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch0 (addr)); + } + emit_insn (pat); + if (! TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch1 (addr)); + } +} + +void +emit_df_insn (pat) + rtx pat; +{ + rtx addr; + if (TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch0 (addr)); + } + emit_insn (pat); + if (TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch1 (addr)); + } +} + +void +expand_sf_unop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); +} + +void +expand_sf_binop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_sf_insn ((*fun) (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); +} + +void +expand_df_unop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); +} + +void +expand_df_binop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_df_insn ((*fun) (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); +} + +void +expand_fp_branch (compare, branch) + rtx (*compare) (), (*branch) (); +{ + (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn) + ((*compare) ()); + emit_jump_insn ((*branch) ()); +} + +/* We don't want to make fpscr call-saved, because that would prevent + channging it, and it would also cost an exstra instruction to save it. + We don't want it to be known as a global register either, because + that disables all flow analysis. But it has to be live at the function + return. Thus, we need to insert a USE at the end of the function. */ +/* This should best be called at about the time FINALIZE_PIC is called, + but not dependent on flag_pic. Alas, there is no suitable hook there, + so this gets called from HAVE_RETURN. */ +int +emit_fpscr_use () +{ + static int fpscr_uses = 0; + + if (rtx_equal_function_value_matters) + { + emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ())); + fpscr_uses++; + } + else + { + if (fpscr_uses > 1) + { + /* Due to he crude way we emit the USEs, we might end up with + some extra ones. Delete all but the last one. */ + rtx insn; + + for (insn = get_last_insn(); insn; insn = PREV_INSN (insn)) + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == USE + && GET_CODE (XEXP (PATTERN (insn), 0)) == REG + && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) + { + insn = PREV_INSN (insn); + break; + } + for (; insn; insn = PREV_INSN (insn)) + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == USE + && GET_CODE (XEXP (PATTERN (insn), 0)) == REG + && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) + { + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + } + } + fpscr_uses = 0; + } +} + +/* ??? gcc does flow analysis strictly after common subexpression + elimination. As a result, common subespression elimination fails + when there are some intervening statements setting the same register. + If we did nothing about this, this would hurt the precision switching + for SH4 badly. There is some cse after reload, but it is unable to + undo the extra register pressure from the unused instructions, and + it cannot remove auto-increment loads. + + A C code example that shows this flow/cse weakness for (at least) SH + and sparc (as of gcc ss-970706) is this: + +double +f(double a) +{ + double d; + d = 0.1; + a += d; + d = 1.1; + d = 0.1; + a *= d; + return a; +} + + So we add another pass before common subexpression elimination, to + remove assignments that are dead due to a following assignment in the + same basic block. */ + +int sh_flag_remove_dead_before_cse; + +static void +mark_use (x, reg_set_block) + rtx x, *reg_set_block; +{ + enum rtx_code code; + + if (! x) + return; + code = GET_CODE (x); + switch (code) + { + case REG: + { + int regno = REGNO (x); + int nregs = (regno < FIRST_PSEUDO_REGISTER + ? HARD_REGNO_NREGS (regno, GET_MODE (x)) + : 1); + do + { + reg_set_block[regno + nregs - 1] = 0; + } + while (--nregs); + break; + } + case SET: + { + rtx dest = SET_DEST (x); + + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + if (GET_CODE (dest) != REG) + mark_use (dest, reg_set_block); + mark_use (SET_SRC (x), reg_set_block); + break; + } + case CLOBBER: + break; + default: + { + char *fmt = GET_RTX_FORMAT (code); + int i, j; + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + mark_use (XEXP (x, i), reg_set_block); + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + mark_use (XVECEXP (x, i, j), reg_set_block); + } + break; + } + } +} + +int +remove_dead_before_cse () +{ + rtx *reg_set_block, last, last_call, insn, set; + int in_libcall = 0; + + /* This pass should run just once, after rtl generation. */ + + if (! sh_flag_remove_dead_before_cse + || rtx_equal_function_value_matters + || reload_completed) + return; + + sh_flag_remove_dead_before_cse = 0; + + reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx)); + bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx)); + last_call = last = get_last_insn (); + for (insn = last; insn; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + if (GET_CODE (insn) == JUMP_INSN) + { + last_call = last = insn; + continue; + } + set = single_set (insn); + + /* Don't delete parts of libcalls, since that would confuse cse, loop + and flow. */ + if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) + in_libcall = 1; + else if (in_libcall) + { + if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) + in_libcall = 0; + } + else if (set && GET_CODE (SET_DEST (set)) == REG) + { + int regno = REGNO (SET_DEST (set)); + rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno] + ? last_call + : last); + if (reg_set_block[regno] == ref_insn + && (regno >= FIRST_PSEUDO_REGISTER + || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1) + && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn))) + { + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + continue; + } + else + reg_set_block[REGNO (SET_DEST (set))] = ref_insn; + } + if (GET_CODE (insn) == CALL_INSN) + { + last_call = insn; + mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block); + } + mark_use (PATTERN (insn), reg_set_block); + } + return 0; +} |