diff options
Diffstat (limited to 'gcc/config/sh/sh.c')
-rwxr-xr-x | gcc/config/sh/sh.c | 4786 |
1 files changed, 0 insertions, 4786 deletions
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c deleted file mode 100755 index 4d4b5cd..0000000 --- a/gcc/config/sh/sh.c +++ /dev/null @@ -1,4786 +0,0 @@ -/* Output routines for GCC for Hitachi Super-H. - Copyright (C) 1993-1998 Free Software Foundation, Inc. - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -/* Contributed by Steve Chamberlain (sac@cygnus.com). - Improved by Jim Wilson (wilson@cygnus.com). */ - -#include "config.h" - -#include <stdio.h> - -#include "rtl.h" -#include "tree.h" -#include "flags.h" -#include "insn-flags.h" -#include "expr.h" -#include "regs.h" -#include "hard-reg-set.h" -#include "output.h" -#include "insn-attr.h" - -int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; - -#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) -#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) - -/* ??? The pragma interrupt support will not work for SH3. */ -/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to - output code for the next function appropriate for an interrupt handler. */ -int pragma_interrupt; - -/* This is set by the trap_exit attribute for functions. It specifies - a trap number to be used in a trapa instruction at function exit - (instead of an rte instruction). */ -int trap_exit; - -/* This is used by the sp_switch attribute for functions. It specifies - a variable holding the address of the stack the interrupt function - should switch to/from at entry/exit. */ -rtx sp_switch; - -/* This is set by #pragma trapa, and is similar to the above, except that - the compiler doesn't emit code to preserve all registers. */ -static int pragma_trapa; - -/* This is set by #pragma nosave_low_regs. This is useful on the SH3, - which has a separate set of low regs for User and Supervisor modes. - This should only be used for the lowest level of interrupts. Higher levels - of interrupts must save the registers in case they themselves are - interrupted. */ -int pragma_nosave_low_regs; - -/* This is used for communication between SETUP_INCOMING_VARARGS and - sh_expand_prologue. */ -int current_function_anonymous_args; - -/* Global variables from toplev.c and final.c that are used within, but - not declared in any header file. */ -extern char *version_string; -extern int *insn_addresses; - -/* Global variables for machine-dependent things. */ - -/* Which cpu are we scheduling for. */ -enum processor_type sh_cpu; - -/* Saved operands from the last compare to use when we generate an scc - or bcc insn. */ - -rtx sh_compare_op0; -rtx sh_compare_op1; - -enum machine_mode sh_addr_diff_vec_mode; - -/* Provides the class number of the smallest class containing - reg number. */ - -int regno_reg_class[FIRST_PSEUDO_REGISTER] = -{ - R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, - GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, - GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, - GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, - GENERAL_REGS, PR_REGS, T_REGS, NO_REGS, - MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS, - FP0_REGS,FP_REGS, FP_REGS, FP_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - DF_REGS, DF_REGS, DF_REGS, DF_REGS, - DF_REGS, DF_REGS, DF_REGS, DF_REGS, - FPSCR_REGS, -}; - -char fp_reg_names[][5] = -{ - "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", - "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", - "fpul", - "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14", -}; - -/* Provide reg_class from a letter such as appears in the machine - description. */ - -enum reg_class reg_class_from_letter[] = -{ - /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS, - /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS, - /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS, - /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS, - /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS, - /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS, - /* y */ FPUL_REGS, /* z */ R0_REGS -}; - -int assembler_dialect; - -rtx get_fpscr_rtx (); -void emit_sf_insn (); -void emit_df_insn (); - -static void split_branches PROTO ((rtx)); - -/* Print the operand address in x to the stream. */ - -void -print_operand_address (stream, x) - FILE *stream; - rtx x; -{ - switch (GET_CODE (x)) - { - case REG: - case SUBREG: - fprintf (stream, "@%s", reg_names[true_regnum (x)]); - break; - - case PLUS: - { - rtx base = XEXP (x, 0); - rtx index = XEXP (x, 1); - - switch (GET_CODE (index)) - { - case CONST_INT: - fprintf (stream, "@(%d,%s)", INTVAL (index), - reg_names[true_regnum (base)]); - break; - - case REG: - case SUBREG: - { - int base_num = true_regnum (base); - int index_num = true_regnum (index); - - fprintf (stream, "@(r0,%s)", - reg_names[MAX (base_num, index_num)]); - break; - } - - default: - debug_rtx (x); - abort (); - } - } - break; - - case PRE_DEC: - fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); - break; - - case POST_INC: - fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); - break; - - default: - output_addr_const (stream, x); - break; - } -} - -/* Print operand x (an rtx) in assembler syntax to file stream - according to modifier code. - - '.' print a .s if insn needs delay slot - ',' print LOCAL_LABEL_PREFIX - '@' print trap, rte or rts depending upon pragma interruptness - '#' output a nop if there is nothing to put in the delay slot - 'O' print a constant without the # - 'R' print the LSW of a dp value - changes if in little endian - 'S' print the MSW of a dp value - changes if in little endian - 'T' print the next word of a dp value - same as 'R' in big endian mode. - 'o' output an operator. */ - -void -print_operand (stream, x, code) - FILE *stream; - rtx x; - int code; -{ - switch (code) - { - case '.': - if (final_sequence - && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) - fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); - break; - case ',': - fprintf (stream, "%s", LOCAL_LABEL_PREFIX); - break; - case '@': - { - int interrupt_handler; - - if ((lookup_attribute - ("interrupt_handler", - DECL_MACHINE_ATTRIBUTES (current_function_decl))) - != NULL_TREE) - interrupt_handler = 1; - else - interrupt_handler = 0; - - if (trap_exit) - fprintf (stream, "trapa #%d", trap_exit); - else if (interrupt_handler) - fprintf (stream, "rte"); - else - fprintf (stream, "rts"); - break; - } - case '#': - /* Output a nop if there's nothing in the delay slot. */ - if (dbr_sequence_length () == 0) - fprintf (stream, "\n\tnop"); - break; - case 'O': - output_addr_const (stream, x); - break; - case 'R': - fputs (reg_names[REGNO (x) + LSW], (stream)); - break; - case 'S': - fputs (reg_names[REGNO (x) + MSW], (stream)); - break; - case 'T': - /* Next word of a double. */ - switch (GET_CODE (x)) - { - case REG: - fputs (reg_names[REGNO (x) + 1], (stream)); - break; - case MEM: - if (GET_CODE (XEXP (x, 0)) != PRE_DEC - && GET_CODE (XEXP (x, 0)) != POST_INC) - x = adj_offsettable_operand (x, 4); - print_operand_address (stream, XEXP (x, 0)); - break; - } - break; - case 'o': - switch (GET_CODE (x)) - { - case PLUS: fputs ("add", stream); break; - case MINUS: fputs ("sub", stream); break; - case MULT: fputs ("mul", stream); break; - case DIV: fputs ("div", stream); break; - } - break; - default: - switch (GET_CODE (x)) - { - case REG: - if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG - && GET_MODE_SIZE (GET_MODE (x)) > 4) - fprintf ((stream), "d%s", reg_names[REGNO (x)]+1); - else - fputs (reg_names[REGNO (x)], (stream)); - break; - case MEM: - output_address (XEXP (x, 0)); - break; - default: - fputc ('#', stream); - output_addr_const (stream, x); - break; - } - break; - } -} - -/* Emit code to perform a block move. Choose the best method. - - OPERANDS[0] is the destination. - OPERANDS[1] is the source. - OPERANDS[2] is the size. - OPERANDS[3] is the alignment safe to use. */ - -int -expand_block_move (operands) - rtx *operands; -{ - int align = INTVAL (operands[3]); - int constp = (GET_CODE (operands[2]) == CONST_INT); - int bytes = (constp ? INTVAL (operands[2]) : 0); - - /* If it isn't a constant number of bytes, or if it doesn't have 4 byte - alignment, or if it isn't a multiple of 4 bytes, then fail. */ - if (! constp || align < 4 || (bytes % 4 != 0)) - return 0; - - if (TARGET_HARD_SH4) - { - if (bytes < 12) - return 0; - else if (bytes == 12) - { - tree entry_name; - rtx func_addr_rtx; - rtx r4 = gen_rtx (REG, SImode, 4); - rtx r5 = gen_rtx (REG, SImode, 5); - - entry_name = get_identifier ("__movstrSI12_i4"); - - func_addr_rtx - = copy_to_mode_reg (Pmode, - gen_rtx_SYMBOL_REF (Pmode, - IDENTIFIER_POINTER (entry_name))); - emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); - emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); - emit_insn (gen_block_move_real_i4 (func_addr_rtx)); - return 1; - } - else if (! TARGET_SMALLCODE) - { - tree entry_name; - rtx func_addr_rtx; - int dwords; - rtx r4 = gen_rtx (REG, SImode, 4); - rtx r5 = gen_rtx (REG, SImode, 5); - rtx r6 = gen_rtx (REG, SImode, 6); - - entry_name = get_identifier (bytes & 4 - ? "__movstr_i4_odd" - : "__movstr_i4_even"); - func_addr_rtx - = copy_to_mode_reg (Pmode, - gen_rtx_SYMBOL_REF (Pmode, - IDENTIFIER_POINTER (entry_name))); - emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); - emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); - - dwords = bytes >> 3; - emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); - emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); - return 1; - } - else - return 0; - } - if (bytes < 64) - { - char entry[30]; - tree entry_name; - rtx func_addr_rtx; - rtx r4 = gen_rtx (REG, SImode, 4); - rtx r5 = gen_rtx (REG, SImode, 5); - - sprintf (entry, "__movstrSI%d", bytes); - entry_name = get_identifier (entry); - - func_addr_rtx - = copy_to_mode_reg (Pmode, - gen_rtx (SYMBOL_REF, Pmode, - IDENTIFIER_POINTER (entry_name))); - emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); - emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); - emit_insn (gen_block_move_real (func_addr_rtx)); - return 1; - } - - /* This is the same number of bytes as a memcpy call, but to a different - less common function name, so this will occasionally use more space. */ - if (! TARGET_SMALLCODE) - { - tree entry_name; - rtx func_addr_rtx; - int final_switch, while_loop; - rtx r4 = gen_rtx (REG, SImode, 4); - rtx r5 = gen_rtx (REG, SImode, 5); - rtx r6 = gen_rtx (REG, SImode, 6); - - entry_name = get_identifier ("__movstr"); - func_addr_rtx - = copy_to_mode_reg (Pmode, - gen_rtx (SYMBOL_REF, Pmode, - IDENTIFIER_POINTER (entry_name))); - emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); - emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); - - /* r6 controls the size of the move. 16 is decremented from it - for each 64 bytes moved. Then the negative bit left over is used - as an index into a list of move instructions. e.g., a 72 byte move - would be set up with size(r6) = 14, for one iteration through the - big while loop, and a switch of -2 for the last part. */ - - final_switch = 16 - ((bytes / 4) % 16); - while_loop = ((bytes / 4) / 16 - 1) * 16; - emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); - emit_insn (gen_block_lump_real (func_addr_rtx)); - return 1; - } - - return 0; -} - -/* Prepare operands for a move define_expand; specifically, one of the - operands must be in a register. */ - -int -prepare_move_operands (operands, mode) - rtx operands[]; - enum machine_mode mode; -{ - if (! reload_in_progress && ! reload_completed) - { - /* Copy the source to a register if both operands aren't registers. */ - if (! register_operand (operands[0], mode) - && ! register_operand (operands[1], mode)) - operands[1] = copy_to_mode_reg (mode, operands[1]); - - /* This case can happen while generating code to move the result - of a library call to the target. Reject `st r0,@(rX,rY)' because - reload will fail to find a spill register for rX, since r0 is already - being used for the source. */ - else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (XEXP (operands[0], 0)) == PLUS - && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG) - operands[1] = copy_to_mode_reg (mode, operands[1]); - } - - return 0; -} - -/* Prepare the operands for an scc instruction; make sure that the - compare has been done. */ -rtx -prepare_scc_operands (code) - enum rtx_code code; -{ - rtx t_reg = gen_rtx (REG, SImode, T_REG); - enum rtx_code oldcode = code; - enum machine_mode mode; - - /* First need a compare insn. */ - switch (code) - { - case NE: - /* It isn't possible to handle this case. */ - abort (); - case LT: - code = GT; - break; - case LE: - code = GE; - break; - case LTU: - code = GTU; - break; - case LEU: - code = GEU; - break; - } - if (code != oldcode) - { - rtx tmp = sh_compare_op0; - sh_compare_op0 = sh_compare_op1; - sh_compare_op1 = tmp; - } - - mode = GET_MODE (sh_compare_op0); - if (mode == VOIDmode) - mode = GET_MODE (sh_compare_op1); - - sh_compare_op0 = force_reg (mode, sh_compare_op0); - if ((code != EQ && code != NE - && (sh_compare_op1 != const0_rtx - || code == GTU || code == GEU || code == LTU || code == LEU)) - || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT) - sh_compare_op1 = force_reg (mode, sh_compare_op1); - - if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) - (mode == SFmode ? emit_sf_insn : emit_df_insn) - (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, - gen_rtx (SET, VOIDmode, t_reg, - gen_rtx (code, SImode, - sh_compare_op0, sh_compare_op1)), - gen_rtx (USE, VOIDmode, get_fpscr_rtx ())))); - else - emit_insn (gen_rtx (SET, VOIDmode, t_reg, - gen_rtx (code, SImode, sh_compare_op0, - sh_compare_op1))); - - return t_reg; -} - -/* Called from the md file, set up the operands of a compare instruction. */ - -void -from_compare (operands, code) - rtx *operands; - int code; -{ - enum machine_mode mode = GET_MODE (sh_compare_op0); - rtx insn; - if (mode == VOIDmode) - mode = GET_MODE (sh_compare_op1); - if (code != EQ - || mode == DImode - || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) - { - /* Force args into regs, since we can't use constants here. */ - sh_compare_op0 = force_reg (mode, sh_compare_op0); - if (sh_compare_op1 != const0_rtx - || code == GTU || code == GEU - || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) - sh_compare_op1 = force_reg (mode, sh_compare_op1); - } - if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE) - { - from_compare (operands, GT); - insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1); - } - else - insn = gen_rtx (SET, VOIDmode, - gen_rtx (REG, SImode, 18), - gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1)); - if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) - { - insn = gen_rtx (PARALLEL, VOIDmode, - gen_rtvec (2, insn, - gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))); - (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); - } - else - emit_insn (insn); -} - -/* Functions to output assembly code. */ - -/* Return a sequence of instructions to perform DI or DF move. - - Since the SH cannot move a DI or DF in one instruction, we have - to take care when we see overlapping source and dest registers. */ - -char * -output_movedouble (insn, operands, mode) - rtx insn; - rtx operands[]; - enum machine_mode mode; -{ - rtx dst = operands[0]; - rtx src = operands[1]; - - if (GET_CODE (dst) == MEM - && GET_CODE (XEXP (dst, 0)) == PRE_DEC) - return "mov.l %T1,%0\n\tmov.l %1,%0"; - - if (register_operand (dst, mode) - && register_operand (src, mode)) - { - if (REGNO (src) == MACH_REG) - return "sts mach,%S0\n\tsts macl,%R0"; - - /* When mov.d r1,r2 do r2->r3 then r1->r2; - when mov.d r1,r0 do r1->r0 then r2->r1. */ - - if (REGNO (src) + 1 == REGNO (dst)) - return "mov %T1,%T0\n\tmov %1,%0"; - else - return "mov %1,%0\n\tmov %T1,%T0"; - } - else if (GET_CODE (src) == CONST_INT) - { - if (INTVAL (src) < 0) - output_asm_insn ("mov #-1,%S0", operands); - else - output_asm_insn ("mov #0,%S0", operands); - - return "mov %1,%R0"; - } - else if (GET_CODE (src) == MEM) - { - int ptrreg = -1; - int dreg = REGNO (dst); - rtx inside = XEXP (src, 0); - - if (GET_CODE (inside) == REG) - ptrreg = REGNO (inside); - else if (GET_CODE (inside) == SUBREG) - ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside); - else if (GET_CODE (inside) == PLUS) - { - ptrreg = REGNO (XEXP (inside, 0)); - /* ??? A r0+REG address shouldn't be possible here, because it isn't - an offsettable address. Unfortunately, offsettable addresses use - QImode to check the offset, and a QImode offsettable address - requires r0 for the other operand, which is not currently - supported, so we can't use the 'o' constraint. - Thus we must check for and handle r0+REG addresses here. - We punt for now, since this is likely very rare. */ - if (GET_CODE (XEXP (inside, 1)) == REG) - abort (); - } - else if (GET_CODE (inside) == LABEL_REF) - return "mov.l %1,%0\n\tmov.l %1+4,%T0"; - else if (GET_CODE (inside) == POST_INC) - return "mov.l %1,%0\n\tmov.l %1,%T0"; - else - abort (); - - /* Work out the safe way to copy. Copy into the second half first. */ - if (dreg == ptrreg) - return "mov.l %T1,%T0\n\tmov.l %1,%0"; - } - - return "mov.l %1,%0\n\tmov.l %T1,%T0"; -} - -/* Print an instruction which would have gone into a delay slot after - another instruction, but couldn't because the other instruction expanded - into a sequence where putting the slot insn at the end wouldn't work. */ - -static void -print_slot (insn) - rtx insn; -{ - final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1); - - INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; -} - -char * -output_far_jump (insn, op) - rtx insn; - rtx op; -{ - struct { rtx lab, reg, op; } this; - char *jump; - int far; - int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)]; - - this.lab = gen_label_rtx (); - - if (TARGET_SH2 - && offset >= -32764 - && offset - get_attr_length (insn) <= 32766) - { - far = 0; - jump = "mov.w %O0,%1;braf %1"; - } - else - { - far = 1; - jump = "mov.l %O0,%1;jmp @%1"; - } - /* If we have a scratch register available, use it. */ - if (GET_CODE (PREV_INSN (insn)) == INSN - && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch) - { - this.reg = SET_DEST (PATTERN (PREV_INSN (insn))); - output_asm_insn (jump, &this.lab); - if (dbr_sequence_length ()) - print_slot (final_sequence); - else - output_asm_insn ("nop", 0); - } - else - { - /* Output the delay slot insn first if any. */ - if (dbr_sequence_length ()) - print_slot (final_sequence); - - this.reg = gen_rtx (REG, SImode, 13); - output_asm_insn ("mov.l r13,@-r15", 0); - output_asm_insn (jump, &this.lab); - output_asm_insn ("mov.l @r15+,r13", 0); - } - if (far) - output_asm_insn (".align 2", 0); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab)); - this.op = op; - output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab); - return ""; -} - -/* Local label counter, used for constants in the pool and inside - pattern branches. */ - -static int lf = 100; - -/* Output code for ordinary branches. */ - -char * -output_branch (logic, insn, operands) - int logic; - rtx insn; - rtx *operands; -{ - switch (get_attr_length (insn)) - { - case 6: - /* This can happen if filling the delay slot has caused a forward - branch to exceed its range (we could reverse it, but only - when we know we won't overextend other branches; this should - best be handled by relaxation). - It can also happen when other condbranches hoist delay slot insn - from their destination, thus leading to code size increase. - But the branch will still be in the range -4092..+4098 bytes. */ - - if (! TARGET_RELAX) - { - int label = lf++; - /* The call to print_slot will clobber the operands. */ - rtx op0 = operands[0]; - - /* If the instruction in the delay slot is annulled (true), then - there is no delay slot where we can put it now. The only safe - place for it is after the label. final will do that by default. */ - - if (final_sequence - && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) - { - asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", - ASSEMBLER_DIALECT ? "/" : ".", label); - print_slot (final_sequence); - } - else - asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); - - output_asm_insn ("bra\t%l0", &op0); - fprintf (asm_out_file, "\tnop\n"); - ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label); - - return ""; - } - /* When relaxing, handle this like a short branch. The linker - will fix it up if it still doesn't fit after relaxation. */ - case 2: - return logic ? "bt%.\t%l0" : "bf%.\t%l0"; - default: - abort (); - } -} - -char * -output_branchy_insn (code, template, insn, operands) - char *template; - enum rtx_code code; - rtx insn; - rtx *operands; -{ - rtx next_insn = NEXT_INSN (insn); - int label_nr; - - if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn)) - { - rtx src = SET_SRC (PATTERN (next_insn)); - if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) - { - /* Following branch not taken */ - operands[9] = gen_label_rtx (); - emit_label_after (operands[9], next_insn); - return template; - } - else - { - int offset = (branch_dest (next_insn) - - insn_addresses[INSN_UID (next_insn)] + 4); - if (offset >= -252 && offset <= 258) - { - if (GET_CODE (src) == IF_THEN_ELSE) - /* branch_true */ - src = XEXP (src, 1); - operands[9] = src; - return template; - } - } - } - operands[9] = gen_label_rtx (); - emit_label_after (operands[9], insn); - return template; -} - -char * -output_ieee_ccmpeq (insn, operands) - rtx insn, operands; -{ - output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands); -} - -/* Output to FILE the start of the assembler file. */ - -void -output_file_start (file) - FILE *file; -{ - register int pos; - - output_file_directive (file, main_input_filename); - - /* Switch to the data section so that the coffsem symbol and the - gcc2_compiled. symbol aren't in the text section. */ - data_section (); - - if (TARGET_LITTLE_ENDIAN) - fprintf (file, "\t.little\n"); -} - -/* Actual number of instructions used to make a shift by N. */ -static char ashiftrt_insns[] = - { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; - -/* Left shift and logical right shift are the same. */ -static char shift_insns[] = - { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; - -/* Individual shift amounts needed to get the above length sequences. - One bit right shifts clobber the T bit, so when possible, put one bit - shifts in the middle of the sequence, so the ends are eligible for - branch delay slots. */ -static short shift_amounts[32][5] = { - {0}, {1}, {2}, {2, 1}, - {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, - {8}, {8, 1}, {8, 2}, {8, 1, 2}, - {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, - {16}, {16, 1}, {16, 2}, {16, 1, 2}, - {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, - {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, - {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; - -/* Likewise, but for shift amounts < 16, up to three highmost bits - might be clobbered. This is typically used when combined with some - kind of sign or zero extension. */ - -static char ext_shift_insns[] = - { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; - -static short ext_shift_amounts[32][4] = { - {0}, {1}, {2}, {2, 1}, - {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, - {8}, {8, 1}, {8, 2}, {8, 1, 2}, - {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, - {16}, {16, 1}, {16, 2}, {16, 1, 2}, - {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, - {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, - {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; - -/* Assuming we have a value that has been sign-extended by at least one bit, - can we use the ext_shift_amounts with the last shift turned to an arithmetic shift - to shift it by N without data loss, and quicker than by other means? */ -#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) - -/* This is used in length attributes in sh.md to help compute the length - of arbitrary constant shift instructions. */ - -int -shift_insns_rtx (insn) - rtx insn; -{ - rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - int shift_count = INTVAL (XEXP (set_src, 1)); - enum rtx_code shift_code = GET_CODE (set_src); - - switch (shift_code) - { - case ASHIFTRT: - return ashiftrt_insns[shift_count]; - case LSHIFTRT: - case ASHIFT: - return shift_insns[shift_count]; - default: - abort(); - } -} - -/* Return the cost of a shift. */ - -int -shiftcosts (x) - rtx x; -{ - int value = INTVAL (XEXP (x, 1)); - - /* If shift by a non constant, then this will be expensive. */ - if (GET_CODE (XEXP (x, 1)) != CONST_INT) - return SH_DYNAMIC_SHIFT_COST; - - /* Otherwise, return the true cost in instructions. */ - if (GET_CODE (x) == ASHIFTRT) - { - int cost = ashiftrt_insns[value]; - /* If SH3, then we put the constant in a reg and use shad. */ - if (cost > 1 + SH_DYNAMIC_SHIFT_COST) - cost = 1 + SH_DYNAMIC_SHIFT_COST; - return cost; - } - else - return shift_insns[value]; -} - -/* Return the cost of an AND operation. */ - -int -andcosts (x) - rtx x; -{ - int i; - - /* Anding with a register is a single cycle and instruction. */ - if (GET_CODE (XEXP (x, 1)) != CONST_INT) - return 1; - - i = INTVAL (XEXP (x, 1)); - /* These constants are single cycle extu.[bw] instructions. */ - if (i == 0xff || i == 0xffff) - return 1; - /* Constants that can be used in an and immediate instruction is a single - cycle, but this requires r0, so make it a little more expensive. */ - if (CONST_OK_FOR_L (i)) - return 2; - /* Constants that can be loaded with a mov immediate and an and. - This case is probably unnecessary. */ - if (CONST_OK_FOR_I (i)) - return 2; - /* Any other constants requires a 2 cycle pc-relative load plus an and. - This case is probably unnecessary. */ - return 3; -} - -/* Return the cost of a multiply. */ -int -multcosts (x) - rtx x; -{ - if (TARGET_SH2) - { - /* We have a mul insn, so we can never take more than the mul and the - read of the mac reg, but count more because of the latency and extra - reg usage. */ - if (TARGET_SMALLCODE) - return 2; - return 3; - } - - /* If we're aiming at small code, then just count the number of - insns in a multiply call sequence. */ - if (TARGET_SMALLCODE) - return 5; - - /* Otherwise count all the insns in the routine we'd be calling too. */ - return 20; -} - -/* Code to expand a shift. */ - -void -gen_ashift (type, n, reg) - int type; - int n; - rtx reg; -{ - /* Negative values here come from the shift_amounts array. */ - if (n < 0) - { - if (type == ASHIFT) - type = LSHIFTRT; - else - type = ASHIFT; - n = -n; - } - - switch (type) - { - case ASHIFTRT: - emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); - break; - case LSHIFTRT: - if (n == 1) - emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); - else - emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); - break; - case ASHIFT: - emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n))); - break; - } -} - -/* Same for HImode */ - -void -gen_ashift_hi (type, n, reg) - int type; - int n; - rtx reg; -{ - /* Negative values here come from the shift_amounts array. */ - if (n < 0) - { - if (type == ASHIFT) - type = LSHIFTRT; - else - type = ASHIFT; - n = -n; - } - - switch (type) - { - case ASHIFTRT: - case LSHIFTRT: - /* We don't have HImode right shift operations because using the - ordinary 32 bit shift instructions for that doesn't generate proper - zero/sign extension. - gen_ashift_hi is only called in contexts where we know that the - sign extension works out correctly. */ - { - int word = 0; - if (GET_CODE (reg) == SUBREG) - { - word = SUBREG_WORD (reg); - reg = SUBREG_REG (reg); - } - gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word)); - break; - } - case ASHIFT: - emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); - break; - } -} - -/* Output RTL to split a constant shift into its component SH constant - shift instructions. */ - -int -gen_shifty_op (code, operands) - int code; - rtx *operands; -{ - int value = INTVAL (operands[2]); - int max, i; - - /* Truncate the shift count in case it is out of bounds. */ - value = value & 0x1f; - - if (value == 31) - { - if (code == LSHIFTRT) - { - emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); - emit_insn (gen_movt (operands[0])); - return; - } - else if (code == ASHIFT) - { - /* There is a two instruction sequence for 31 bit left shifts, - but it requires r0. */ - if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0) - { - emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); - emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); - return; - } - } - } - else if (value == 0) - { - /* This can happen when not optimizing. We must output something here - to prevent the compiler from aborting in final.c after the try_split - call. */ - emit_insn (gen_nop ()); - return; - } - - max = shift_insns[value]; - for (i = 0; i < max; i++) - gen_ashift (code, shift_amounts[value][i], operands[0]); -} - -/* Same as above, but optimized for values where the topmost bits don't - matter. */ - -int -gen_shifty_hi_op (code, operands) - int code; - rtx *operands; -{ - int value = INTVAL (operands[2]); - int max, i; - void (*gen_fun)(); - - /* This operation is used by and_shl for SImode values with a few - high bits known to be cleared. */ - value &= 31; - if (value == 0) - { - emit_insn (gen_nop ()); - return; - } - - gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; - if (code == ASHIFT) - { - max = ext_shift_insns[value]; - for (i = 0; i < max; i++) - gen_fun (code, ext_shift_amounts[value][i], operands[0]); - } - else - /* When shifting right, emit the shifts in reverse order, so that - solitary negative values come first. */ - for (i = ext_shift_insns[value] - 1; i >= 0; i--) - gen_fun (code, ext_shift_amounts[value][i], operands[0]); -} - -/* Output RTL for an arithmetic right shift. */ - -/* ??? Rewrite to use super-optimizer sequences. */ - -int -expand_ashiftrt (operands) - rtx *operands; -{ - rtx wrk; - char func[18]; - tree func_name; - int value; - - if (TARGET_SH3) - { - if (GET_CODE (operands[2]) != CONST_INT) - { - rtx count = copy_to_mode_reg (SImode, operands[2]); - emit_insn (gen_negsi2 (count, count)); - emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); - return 1; - } - else if (ashiftrt_insns[INTVAL (operands[2]) & 31] - > 1 + SH_DYNAMIC_SHIFT_COST) - { - rtx count - = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); - emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); - return 1; - } - } - if (GET_CODE (operands[2]) != CONST_INT) - return 0; - - value = INTVAL (operands[2]) & 31; - - if (value == 31) - { - emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); - return 1; - } - else if (value >= 16 && value <= 19) - { - wrk = gen_reg_rtx (SImode); - emit_insn (gen_ashrsi2_16 (wrk, operands[1])); - value -= 16; - while (value--) - gen_ashift (ASHIFTRT, 1, wrk); - emit_move_insn (operands[0], wrk); - return 1; - } - /* Expand a short sequence inline, longer call a magic routine. */ - else if (value <= 5) - { - wrk = gen_reg_rtx (SImode); - emit_move_insn (wrk, operands[1]); - while (value--) - gen_ashift (ASHIFTRT, 1, wrk); - emit_move_insn (operands[0], wrk); - return 1; - } - - wrk = gen_reg_rtx (Pmode); - - /* Load the value into an arg reg and call a helper. */ - emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]); - sprintf (func, "__ashiftrt_r4_%d", value); - func_name = get_identifier (func); - emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode, - IDENTIFIER_POINTER (func_name))); - emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); - emit_move_insn (operands[0], gen_rtx (REG, SImode, 4)); - return 1; -} - -int sh_dynamicalize_shift_p (count) - rtx count; -{ - return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST; -} - -/* Try to find a good way to implement the combiner pattern - [(set (match_operand:SI 0 "register_operand" "r") - (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "const_int_operand" "n")) - (match_operand:SI 3 "const_int_operand" "n"))) . - LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. - return 0 for simple right / left or left/right shift combination. - return 1 for a combination of shifts with zero_extend. - return 2 for a combination of shifts with an AND that needs r0. - return 3 for a combination of shifts with an AND that needs an extra - scratch register, when the three highmost bits of the AND mask are clear. - return 4 for a combination of shifts with an AND that needs an extra - scratch register, when any of the three highmost bits of the AND mask - is set. - If ATTRP is set, store an initial right shift width in ATTRP[0], - and the instruction length in ATTRP[1] . These values are not valid - when returning 0. - When ATTRP is set and returning 1, ATTRP[2] gets set to the index into - shift_amounts for the last shift value that is to be used before the - sign extend. */ -int -shl_and_kind (left_rtx, mask_rtx, attrp) - rtx left_rtx, mask_rtx; - int *attrp; -{ - unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; - int left = INTVAL (left_rtx), right; - int best = 0; - int cost, best_cost = 10000; - int best_right = 0, best_len = 0; - int i; - int can_ext; - - if (left < 0 || left > 31) - return 0; - if (GET_CODE (mask_rtx) == CONST_INT) - mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; - else - mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; - /* Can this be expressed as a right shift / left shift pair ? */ - lsb = ((mask ^ (mask - 1)) >> 1) + 1; - right = exact_log2 (lsb); - mask2 = ~(mask + lsb - 1); - lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; - /* mask has no zeroes but trailing zeroes <==> ! mask2 */ - if (! mask2) - best_cost = shift_insns[right] + shift_insns[right + left]; - /* mask has no trailing zeroes <==> ! right */ - else if (! right && mask2 == ~(lsb2 - 1)) - { - int late_right = exact_log2 (lsb2); - best_cost = shift_insns[left + late_right] + shift_insns[late_right]; - } - /* Try to use zero extend */ - if (mask2 == ~(lsb2 - 1)) - { - int width, first; - - for (width = 8; width <= 16; width += 8) - { - /* Can we zero-extend right away? */ - if (lsb2 == (HOST_WIDE_INT)1 << width) - { - cost - = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; - if (cost < best_cost) - { - best = 1; - best_cost = cost; - best_right = right; - best_len = cost; - if (attrp) - attrp[2] = -1; - } - continue; - } - /* ??? Could try to put zero extend into initial right shift, - or even shift a bit left before the right shift. */ - /* Determine value of first part of left shift, to get to the - zero extend cut-off point. */ - first = width - exact_log2 (lsb2) + right; - if (first >= 0 && right + left - first >= 0) - { - cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 - + ext_shift_insns[right + left - first]; - if (cost < best_cost) - { - best = 1; - best_cost = cost; - best_right = right; - best_len = cost; - if (attrp) - attrp[2] = first; - } - } - } - } - /* Try to use r0 AND pattern */ - for (i = 0; i <= 2; i++) - { - if (i > right) - break; - if (! CONST_OK_FOR_L (mask >> i)) - continue; - cost = (i != 0) + 2 + ext_shift_insns[left + i]; - if (cost < best_cost) - { - best = 2; - best_cost = cost; - best_right = i; - best_len = cost - 1; - } - } - /* Try to use a scratch register to hold the AND operand. */ - can_ext = ((mask << left) & 0xe0000000) == 0; - for (i = 0; i <= 2; i++) - { - if (i > right) - break; - cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3) - + (can_ext ? ext_shift_insns : shift_insns)[left + i]; - if (cost < best_cost) - { - best = 4 - can_ext; - best_cost = cost; - best_right = i; - best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i); - } - } - - if (attrp) - { - attrp[0] = best_right; - attrp[1] = best_len; - } - return best; -} - -/* This is used in length attributes of the unnamed instructions - corresponding to shl_and_kind return values of 1 and 2. */ -int -shl_and_length (insn) - rtx insn; -{ - rtx set_src, left_rtx, mask_rtx; - int attributes[3]; - - set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - left_rtx = XEXP (XEXP (set_src, 0), 1); - mask_rtx = XEXP (set_src, 1); - shl_and_kind (left_rtx, mask_rtx, attributes); - return attributes[1]; -} - -/* This is used in length attribute of the and_shl_scratch instruction. */ - -int -shl_and_scr_length (insn) - rtx insn; -{ - rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - int len = shift_insns[INTVAL (XEXP (set_src, 1))]; - rtx op = XEXP (set_src, 0); - len += shift_insns[INTVAL (XEXP (op, 1))] + 1; - op = XEXP (XEXP (op, 0), 0); - return len + shift_insns[INTVAL (XEXP (op, 1))]; -} - -/* Generating rtl? */ -extern int rtx_equal_function_value_matters; - -/* Generate rtl for instructions for which shl_and_kind advised a particular - method of generating them, i.e. returned zero. */ - -int -gen_shl_and (dest, left_rtx, mask_rtx, source) - rtx dest, left_rtx, mask_rtx, source; -{ - int attributes[3]; - unsigned HOST_WIDE_INT mask; - int kind = shl_and_kind (left_rtx, mask_rtx, attributes); - int right, total_shift; - int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op; - - right = attributes[0]; - total_shift = INTVAL (left_rtx) + right; - mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; - switch (kind) - { - default: - return -1; - case 1: - { - int first = attributes[2]; - rtx operands[3]; - - if (first < 0) - { - emit_insn ((mask << right) <= 0xff - ? gen_zero_extendqisi2(dest, - gen_lowpart (QImode, source)) - : gen_zero_extendhisi2(dest, - gen_lowpart (HImode, source))); - source = dest; - } - if (source != dest) - emit_insn (gen_movsi (dest, source)); - operands[0] = dest; - if (right) - { - operands[2] = GEN_INT (right); - gen_shifty_hi_op (LSHIFTRT, operands); - } - if (first > 0) - { - operands[2] = GEN_INT (first); - gen_shifty_hi_op (ASHIFT, operands); - total_shift -= first; - mask <<= first; - } - if (first >= 0) - emit_insn (mask <= 0xff - ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest)) - : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest))); - if (total_shift > 0) - { - operands[2] = GEN_INT (total_shift); - gen_shifty_hi_op (ASHIFT, operands); - } - break; - } - case 4: - shift_gen_fun = gen_shifty_op; - case 3: - /* If the topmost bit that matters is set, set the topmost bits - that don't matter. This way, we might be able to get a shorter - signed constant. */ - if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift)) - mask |= (HOST_WIDE_INT)~0 << (31 - total_shift); - case 2: - /* Don't expand fine-grained when combining, because that will - make the pattern fail. */ - if (rtx_equal_function_value_matters - || reload_in_progress || reload_completed) - { - rtx operands[3]; - - /* Cases 3 and 4 should be handled by this split - only while combining */ - if (kind > 2) - abort (); - if (right) - { - emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); - source = dest; - } - emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); - if (total_shift) - { - operands[0] = dest; - operands[1] = dest; - operands[2] = GEN_INT (total_shift); - shift_gen_fun (ASHIFT, operands); - } - break; - } - else - { - int neg = 0; - if (kind != 4 && total_shift < 16) - { - neg = -ext_shift_amounts[total_shift][1]; - if (neg > 0) - neg -= ext_shift_amounts[total_shift][2]; - else - neg = 0; - } - emit_insn (gen_and_shl_scratch (dest, source, - GEN_INT (right), - GEN_INT (mask), - GEN_INT (total_shift + neg), - GEN_INT (neg))); - emit_insn (gen_movsi (dest, dest)); - break; - } - } - return 0; -} - -/* Try to find a good way to implement the combiner pattern - [(set (match_operand:SI 0 "register_operand" "=r") - (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "const_int_operand" "n") - (match_operand:SI 3 "const_int_operand" "n") - (const_int 0))) - (clobber (reg:SI 18))] - LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. - return 0 for simple left / right shift combination. - return 1 for left shift / 8 bit sign extend / left shift. - return 2 for left shift / 16 bit sign extend / left shift. - return 3 for left shift / 8 bit sign extend / shift / sign extend. - return 4 for left shift / 16 bit sign extend / shift / sign extend. - return 5 for left shift / 16 bit sign extend / right shift - return 6 for < 8 bit sign extend / left shift. - return 7 for < 8 bit sign extend / left shift / single right shift. - If COSTP is nonzero, assign the calculated cost to *COSTP. */ - -int -shl_sext_kind (left_rtx, size_rtx, costp) - rtx left_rtx, size_rtx; - int *costp; -{ - int left, size, insize, ext; - int cost, best_cost; - int kind; - - left = INTVAL (left_rtx); - size = INTVAL (size_rtx); - insize = size - left; - if (insize <= 0) - abort (); - /* Default to left / right shift. */ - kind = 0; - best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; - if (size <= 16) - { - /* 16 bit shift / sign extend / 16 bit shift */ - cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; - /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden - below, by alternative 3 or something even better. */ - if (cost < best_cost) - { - kind = 5; - best_cost = cost; - } - } - /* Try a plain sign extend between two shifts. */ - for (ext = 16; ext >= insize; ext -= 8) - { - if (ext <= size) - { - cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; - if (cost < best_cost) - { - kind = ext / 8U; - best_cost = cost; - } - } - /* Check if we can do a sloppy shift with a final signed shift - restoring the sign. */ - if (EXT_SHIFT_SIGNED (size - ext)) - cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; - /* If not, maybe it's still cheaper to do the second shift sloppy, - and do a final sign extend? */ - else if (size <= 16) - cost = ext_shift_insns[ext - insize] + 1 - + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; - else - continue; - if (cost < best_cost) - { - kind = ext / 8U + 2; - best_cost = cost; - } - } - /* Check if we can sign extend in r0 */ - if (insize < 8) - { - cost = 3 + shift_insns[left]; - if (cost < best_cost) - { - kind = 6; - best_cost = cost; - } - /* Try the same with a final signed shift. */ - if (left < 31) - { - cost = 3 + ext_shift_insns[left + 1] + 1; - if (cost < best_cost) - { - kind = 7; - best_cost = cost; - } - } - } - if (TARGET_SH3) - { - /* Try to use a dynamic shift. */ - cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; - if (cost < best_cost) - { - kind = 0; - best_cost = cost; - } - } - if (costp) - *costp = cost; - return kind; -} - -/* Function to be used in the length attribute of the instructions - implementing this pattern. */ - -int -shl_sext_length (insn) - rtx insn; -{ - rtx set_src, left_rtx, size_rtx; - int cost; - - set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); - left_rtx = XEXP (XEXP (set_src, 0), 1); - size_rtx = XEXP (set_src, 1); - shl_sext_kind (left_rtx, size_rtx, &cost); - return cost; -} - -/* Generate rtl for this pattern */ - -int -gen_shl_sext (dest, left_rtx, size_rtx, source) - rtx dest, left_rtx, size_rtx, source; -{ - int kind; - int left, size, insize, cost; - rtx operands[3]; - - kind = shl_sext_kind (left_rtx, size_rtx, &cost); - left = INTVAL (left_rtx); - size = INTVAL (size_rtx); - insize = size - left; - switch (kind) - { - case 1: - case 2: - case 3: - case 4: - { - int ext = kind & 1 ? 8 : 16; - int shift2 = size - ext; - - /* Don't expand fine-grained when combining, because that will - make the pattern fail. */ - if (! rtx_equal_function_value_matters - && ! reload_in_progress && ! reload_completed) - { - emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); - emit_insn (gen_movsi (dest, source)); - break; - } - if (dest != source) - emit_insn (gen_movsi (dest, source)); - operands[0] = dest; - if (ext - insize) - { - operands[2] = GEN_INT (ext - insize); - gen_shifty_hi_op (ASHIFT, operands); - } - emit_insn (kind & 1 - ? gen_extendqisi2(dest, gen_lowpart (QImode, dest)) - : gen_extendhisi2(dest, gen_lowpart (HImode, dest))); - if (kind <= 2) - { - if (shift2) - { - operands[2] = GEN_INT (shift2); - gen_shifty_op (ASHIFT, operands); - } - } - else - { - if (shift2 > 0) - { - if (EXT_SHIFT_SIGNED (shift2)) - { - operands[2] = GEN_INT (shift2 + 1); - gen_shifty_op (ASHIFT, operands); - operands[2] = GEN_INT (1); - gen_shifty_op (ASHIFTRT, operands); - break; - } - operands[2] = GEN_INT (shift2); - gen_shifty_hi_op (ASHIFT, operands); - } - else if (shift2) - { - operands[2] = GEN_INT (-shift2); - gen_shifty_hi_op (LSHIFTRT, operands); - } - emit_insn (size <= 8 - ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) - : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); - } - break; - } - case 5: - { - int i = 16 - size; - if (! rtx_equal_function_value_matters - && ! reload_in_progress && ! reload_completed) - emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); - else - { - operands[0] = dest; - operands[2] = GEN_INT (16 - insize); - gen_shifty_hi_op (ASHIFT, operands); - emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); - } - /* Don't use gen_ashrsi3 because it generates new pseudos. */ - while (--i >= 0) - gen_ashift (ASHIFTRT, 1, dest); - break; - } - case 6: - case 7: - /* Don't expand fine-grained when combining, because that will - make the pattern fail. */ - if (! rtx_equal_function_value_matters - && ! reload_in_progress && ! reload_completed) - { - emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); - emit_insn (gen_movsi (dest, source)); - break; - } - emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); - emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); - emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); - operands[0] = dest; - operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; - gen_shifty_op (ASHIFT, operands); - if (kind == 7) - emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1))); - break; - default: - return -1; - } - return 0; -} - -/* The SH cannot load a large constant into a register, constants have to - come from a pc relative load. The reference of a pc relative load - instruction must be less than 1k infront of the instruction. This - means that we often have to dump a constant inside a function, and - generate code to branch around it. - - It is important to minimize this, since the branches will slow things - down and make things bigger. - - Worst case code looks like: - - mov.l L1,rn - bra L2 - nop - align - L1: .long value - L2: - .. - - mov.l L3,rn - bra L4 - nop - align - L3: .long value - L4: - .. - - We fix this by performing a scan before scheduling, which notices which - instructions need to have their operands fetched from the constant table - and builds the table. - - The algorithm is: - - scan, find an instruction which needs a pcrel move. Look forward, find the - last barrier which is within MAX_COUNT bytes of the requirement. - If there isn't one, make one. Process all the instructions between - the find and the barrier. - - In the above example, we can tell that L3 is within 1k of L1, so - the first move can be shrunk from the 3 insn+constant sequence into - just 1 insn, and the constant moved to L3 to make: - - mov.l L1,rn - .. - mov.l L3,rn - bra L4 - nop - align - L3:.long value - L4:.long value - - Then the second move becomes the target for the shortening process. */ - -typedef struct -{ - rtx value; /* Value in table. */ - rtx label; /* Label of value. */ - enum machine_mode mode; /* Mode of value. */ -} pool_node; - -/* The maximum number of constants that can fit into one pool, since - the pc relative range is 0...1020 bytes and constants are at least 4 - bytes long. */ - -#define MAX_POOL_SIZE (1020/4) -static pool_node pool_vector[MAX_POOL_SIZE]; -static int pool_size; - -/* ??? If we need a constant in HImode which is the truncated value of a - constant we need in SImode, we could combine the two entries thus saving - two bytes. Is this common enough to be worth the effort of implementing - it? */ - -/* ??? This stuff should be done at the same time that we shorten branches. - As it is now, we must assume that all branches are the maximum size, and - this causes us to almost always output constant pools sooner than - necessary. */ - -/* Add a constant to the pool and return its label. */ - -static rtx -add_constant (x, mode, last_value) - rtx last_value; - rtx x; - enum machine_mode mode; -{ - int i; - rtx lab; - - /* First see if we've already got it. */ - for (i = 0; i < pool_size; i++) - { - if (x->code == pool_vector[i].value->code - && mode == pool_vector[i].mode) - { - if (x->code == CODE_LABEL) - { - if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) - continue; - } - if (rtx_equal_p (x, pool_vector[i].value)) - { - lab = 0; - if (! last_value - || ! i - || ! rtx_equal_p (last_value, pool_vector[i-1].value)) - { - lab = pool_vector[i].label; - if (! lab) - pool_vector[i].label = lab = gen_label_rtx (); - } - return lab; - } - } - } - - /* Need a new one. */ - pool_vector[pool_size].value = x; - if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) - lab = 0; - else - lab = gen_label_rtx (); - pool_vector[pool_size].mode = mode; - pool_vector[pool_size].label = lab; - pool_size++; - return lab; -} - -/* Output the literal table. */ - -static void -dump_table (scan) - rtx scan; -{ - int i; - int need_align = 1; - - /* Do two passes, first time dump out the HI sized constants. */ - - for (i = 0; i < pool_size; i++) - { - pool_node *p = &pool_vector[i]; - - if (p->mode == HImode) - { - if (need_align) - { - scan = emit_insn_after (gen_align_2 (), scan); - need_align = 0; - } - scan = emit_label_after (p->label, scan); - scan = emit_insn_after (gen_consttable_2 (p->value), scan); - } - } - - need_align = 1; - - for (i = 0; i < pool_size; i++) - { - pool_node *p = &pool_vector[i]; - - switch (p->mode) - { - case HImode: - break; - case SImode: - case SFmode: - if (need_align) - { - need_align = 0; - scan = emit_label_after (gen_label_rtx (), scan); - scan = emit_insn_after (gen_align_4 (), scan); - } - if (p->label) - scan = emit_label_after (p->label, scan); - scan = emit_insn_after (gen_consttable_4 (p->value), scan); - break; - case DFmode: - case DImode: - if (need_align) - { - need_align = 0; - scan = emit_label_after (gen_label_rtx (), scan); - scan = emit_insn_after (gen_align_4 (), scan); - } - if (p->label) - scan = emit_label_after (p->label, scan); - scan = emit_insn_after (gen_consttable_8 (p->value), scan); - break; - default: - abort (); - break; - } - } - - scan = emit_insn_after (gen_consttable_end (), scan); - scan = emit_barrier_after (scan); - pool_size = 0; -} - -/* Return non-zero if constant would be an ok source for a - mov.w instead of a mov.l. */ - -static int -hi_const (src) - rtx src; -{ - return (GET_CODE (src) == CONST_INT - && INTVAL (src) >= -32768 - && INTVAL (src) <= 32767); -} - -/* Non-zero if the insn is a move instruction which needs to be fixed. */ - -/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the - CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't - need to fix it if the input value is CONST_OK_FOR_I. */ - -static int -broken_move (insn) - rtx insn; -{ - if (GET_CODE (insn) == INSN) - { - rtx pat = PATTERN (insn); - if (GET_CODE (pat) == PARALLEL) - pat = XVECEXP (pat, 0, 0); - if (GET_CODE (pat) == SET - /* We can load any 8 bit value if we don't care what the high - order bits end up as. */ - && GET_MODE (SET_DEST (pat)) != QImode - && CONSTANT_P (SET_SRC (pat)) - && ! (TARGET_SH3E - && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE - && (fp_zero_operand (SET_SRC (pat)) - || fp_one_operand (SET_SRC (pat))) - && GET_CODE (SET_DEST (pat)) == REG - && REGNO (SET_DEST (pat)) >= FIRST_FP_REG - && REGNO (SET_DEST (pat)) <= LAST_FP_REG) - && (GET_CODE (SET_SRC (pat)) != CONST_INT - || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat))))) - return 1; - } - - return 0; -} - -static int -mova_p (insn) - rtx insn; -{ - return (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == SET - && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC - && XINT (SET_SRC (PATTERN (insn)), 1) == 1); -} - -/* Find the last barrier from insn FROM which is close enough to hold the - constant pool. If we can't find one, then create one near the end of - the range. */ - -static rtx -find_barrier (num_mova, mova, from) - int num_mova; - rtx mova, from; -{ - int count_si = 0; - int count_hi = 0; - int found_hi = 0; - int found_si = 0; - int hi_align = 2; - int si_align = 2; - int leading_mova = num_mova; - rtx barrier_before_mova, found_barrier = 0, good_barrier = 0; - int si_limit; - int hi_limit; - - /* For HImode: range is 510, add 4 because pc counts from address of - second instruction after this one, subtract 2 for the jump instruction - that we may need to emit before the table, subtract 2 for the instruction - that fills the jump delay slot (in very rare cases, reorg will take an - instruction from after the constant pool or will leave the delay slot - empty). This gives 510. - For SImode: range is 1020, add 4 because pc counts from address of - second instruction after this one, subtract 2 in case pc is 2 byte - aligned, subtract 2 for the jump instruction that we may need to emit - before the table, subtract 2 for the instruction that fills the jump - delay slot. This gives 1018. */ - - /* The branch will always be shortened now that the reference address for - forward branches is the successor address, thus we need no longer make - adjustments to the [sh]i_limit for -O0. */ - - si_limit = 1018; - hi_limit = 510; - - while (from && count_si < si_limit && count_hi < hi_limit) - { - int inc = get_attr_length (from); - int new_align = 1; - - if (GET_CODE (from) == CODE_LABEL) - { - if (optimize) - new_align = 1 << label_to_alignment (from); - else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER) - new_align = 1 << barrier_align (from); - else - new_align = 1; - inc = 0; - } - - if (GET_CODE (from) == BARRIER) - { - - found_barrier = from; - - /* If we are at the end of the function, or in front of an alignment - instruction, we need not insert an extra alignment. We prefer - this kind of barrier. */ - if (barrier_align (from) > 2) - good_barrier = from; - } - - if (broken_move (from)) - { - rtx pat, src, dst; - enum machine_mode mode; - - pat = PATTERN (from); - if (GET_CODE (pat) == PARALLEL) - pat = XVECEXP (pat, 0, 0); - src = SET_SRC (pat); - dst = SET_DEST (pat); - mode = GET_MODE (dst); - - /* We must explicitly check the mode, because sometimes the - front end will generate code to load unsigned constants into - HImode targets without properly sign extending them. */ - if (mode == HImode - || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG)) - { - found_hi += 2; - /* We put the short constants before the long constants, so - we must count the length of short constants in the range - for the long constants. */ - /* ??? This isn't optimal, but is easy to do. */ - si_limit -= 2; - } - else - { - while (si_align > 2 && found_si + si_align - 2 > count_si) - si_align >>= 1; - if (found_si > count_si) - count_si = found_si; - found_si += GET_MODE_SIZE (mode); - if (num_mova) - si_limit -= GET_MODE_SIZE (mode); - } - } - - if (mova_p (from)) - { - if (! num_mova++) - { - leading_mova = 0; - mova = from; - barrier_before_mova = good_barrier ? good_barrier : found_barrier; - } - if (found_si > count_si) - count_si = found_si; - } - else if (GET_CODE (from) == JUMP_INSN - && (GET_CODE (PATTERN (from)) == ADDR_VEC - || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)) - { - if (num_mova) - num_mova--; - if (barrier_align (next_real_insn (from)) == CACHE_LOG) - { - /* We have just passed the barrier in front of the - ADDR_DIFF_VEC, which is stored in found_barrier. Since - the ADDR_DIFF_VEC is accessed as data, just like our pool - constants, this is a good opportunity to accommodate what - we have gathered so far. - If we waited any longer, we could end up at a barrier in - front of code, which gives worse cache usage for separated - instruction / data caches. */ - good_barrier = found_barrier; - break; - } - else - { - rtx body = PATTERN (from); - inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); - } - } - - if (found_si) - { - if (new_align > si_align) - { - si_limit -= count_si - 1 & new_align - si_align; - si_align = new_align; - } - count_si = count_si + new_align - 1 & -new_align; - count_si += inc; - } - if (found_hi) - { - if (new_align > hi_align) - { - hi_limit -= count_hi - 1 & new_align - hi_align; - hi_align = new_align; - } - count_hi = count_hi + new_align - 1 & -new_align; - count_hi += inc; - } - from = NEXT_INSN (from); - } - - if (num_mova) - if (leading_mova) - { - /* Try as we might, the leading mova is out of range. Change - it into a load (which will become a pcload) and retry. */ - SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); - INSN_CODE (mova) = -1; - return find_barrier (0, 0, mova); - } - else - { - /* Insert the constant pool table before the mova instruction, - to prevent the mova label reference from going out of range. */ - from = mova; - good_barrier = found_barrier = barrier_before_mova; - } - - if (found_barrier) - { - if (good_barrier && next_real_insn (found_barrier)) - found_barrier = good_barrier; - } - else - { - /* We didn't find a barrier in time to dump our stuff, - so we'll make one. */ - rtx label = gen_label_rtx (); - - /* If we exceeded the range, then we must back up over the last - instruction we looked at. Otherwise, we just need to undo the - NEXT_INSN at the end of the loop. */ - if (count_hi > hi_limit || count_si > si_limit) - from = PREV_INSN (PREV_INSN (from)); - else - from = PREV_INSN (from); - - /* Walk back to be just before any jump or label. - Putting it before a label reduces the number of times the branch - around the constant pool table will be hit. Putting it before - a jump makes it more likely that the bra delay slot will be - filled. */ - while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE - || GET_CODE (from) == CODE_LABEL) - from = PREV_INSN (from); - - from = emit_jump_insn_after (gen_jump (label), from); - JUMP_LABEL (from) = label; - LABEL_NUSES (label) = 1; - found_barrier = emit_barrier_after (from); - emit_label_after (label, found_barrier); - } - - return found_barrier; -} - -/* If the instruction INSN is implemented by a special function, and we can - positively find the register that is used to call the sfunc, and this - register is not used anywhere else in this instruction - except as the - destination of a set, return this register; else, return 0. */ -rtx -sfunc_uses_reg (insn) - rtx insn; -{ - int i; - rtx pattern, part, reg_part, reg; - - if (GET_CODE (insn) != INSN) - return 0; - pattern = PATTERN (insn); - if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) - return 0; - - for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) - { - part = XVECEXP (pattern, 0, i); - if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) - reg_part = part; - } - if (! reg_part) - return 0; - reg = XEXP (reg_part, 0); - for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) - { - part = XVECEXP (pattern, 0, i); - if (part == reg_part || GET_CODE (part) == CLOBBER) - continue; - if (reg_mentioned_p (reg, ((GET_CODE (part) == SET - && GET_CODE (SET_DEST (part)) == REG) - ? SET_SRC (part) : part))) - return 0; - } - return reg; -} - -/* See if the only way in which INSN uses REG is by calling it, or by - setting it while calling it. Set *SET to a SET rtx if the register - is set by INSN. */ - -static int -noncall_uses_reg (reg, insn, set) - rtx reg; - rtx insn; - rtx *set; -{ - rtx pattern, reg2; - - *set = NULL_RTX; - - reg2 = sfunc_uses_reg (insn); - if (reg2 && REGNO (reg2) == REGNO (reg)) - { - pattern = single_set (insn); - if (pattern - && GET_CODE (SET_DEST (pattern)) == REG - && REGNO (reg) == REGNO (SET_DEST (pattern))) - *set = pattern; - return 0; - } - if (GET_CODE (insn) != CALL_INSN) - { - /* We don't use rtx_equal_p because we don't care if the mode is - different. */ - pattern = single_set (insn); - if (pattern - && GET_CODE (SET_DEST (pattern)) == REG - && REGNO (reg) == REGNO (SET_DEST (pattern))) - { - rtx par, part; - int i; - - *set = pattern; - par = PATTERN (insn); - if (GET_CODE (par) == PARALLEL) - for (i = XVECLEN (par, 0) - 1; i >= 0; i--) - { - part = XVECEXP (par, 0, i); - if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) - return 1; - } - return reg_mentioned_p (reg, SET_SRC (pattern)); - } - - return 1; - } - - pattern = PATTERN (insn); - - if (GET_CODE (pattern) == PARALLEL) - { - int i; - - for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) - if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) - return 1; - pattern = XVECEXP (pattern, 0, 0); - } - - if (GET_CODE (pattern) == SET) - { - if (reg_mentioned_p (reg, SET_DEST (pattern))) - { - /* We don't use rtx_equal_p, because we don't care if the - mode is different. */ - if (GET_CODE (SET_DEST (pattern)) != REG - || REGNO (reg) != REGNO (SET_DEST (pattern))) - return 1; - - *set = pattern; - } - - pattern = SET_SRC (pattern); - } - - if (GET_CODE (pattern) != CALL - || GET_CODE (XEXP (pattern, 0)) != MEM - || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) - return 1; - - return 0; -} - -/* Given a X, a pattern of an insn or a part of it, return a mask of used - general registers. Bits 0..15 mean that the respective registers - are used as inputs in the instruction. Bits 16..31 mean that the - registers 0..15, respectively, are used as outputs, or are clobbered. - IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ -int -regs_used (x, is_dest) - rtx x; int is_dest; -{ - enum rtx_code code; - char *fmt; - int i, used = 0; - - if (! x) - return used; - code = GET_CODE (x); - switch (code) - { - case REG: - if (REGNO (x) < 16) - return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) - << (REGNO (x) + is_dest)); - return 0; - case SUBREG: - { - rtx y = SUBREG_REG (x); - - if (GET_CODE (y) != REG) - break; - if (REGNO (y) < 16) - return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) - << (REGNO (y) + SUBREG_WORD (x) + is_dest)); - return 0; - } - case SET: - return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); - case RETURN: - /* If there was a return value, it must have been indicated with USE. */ - return 0x00ffff00; - case CLOBBER: - is_dest = 1; - break; - case MEM: - is_dest = 0; - break; - case CALL: - used |= 0x00ff00f0; - break; - } - - fmt = GET_RTX_FORMAT (code); - - for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) - { - if (fmt[i] == 'E') - { - register int j; - for (j = XVECLEN (x, i) - 1; j >= 0; j--) - used |= regs_used (XVECEXP (x, i, j), is_dest); - } - else if (fmt[i] == 'e') - used |= regs_used (XEXP (x, i), is_dest); - } - return used; -} - -/* Create an instruction that prevents redirection of a conditional branch - to the destination of the JUMP with address ADDR. - If the branch needs to be implemented as an indirect jump, try to find - a scratch register for it. - If NEED_BLOCK is 0, don't do anything unless we need a scratch register. - If any preceding insn that doesn't fit into a delay slot is good enough, - pass 1. Pass 2 if a definite blocking insn is needed. - -1 is used internally to avoid deep recursion. - If a blocking instruction is made or recognized, return it. */ - -static rtx -gen_block_redirect (jump, addr, need_block) - rtx jump; - int addr, need_block; -{ - int dead = 0; - rtx prev = prev_nonnote_insn (jump); - rtx dest; - - /* First, check if we already have an instruction that satisfies our need. */ - if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev)) - { - if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) - return prev; - if (GET_CODE (PATTERN (prev)) == USE - || GET_CODE (PATTERN (prev)) == CLOBBER - || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) - prev = jump; - else if ((need_block &= ~1) < 0) - return prev; - else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) - need_block = 0; - } - /* We can't use JUMP_LABEL here because it might be undefined - when not optimizing. */ - dest = XEXP (SET_SRC (PATTERN (jump)), 0); - /* If the branch is out of range, try to find a scratch register for it. */ - if (optimize - && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)) - { - rtx scan; - /* Don't look for the stack pointer as a scratch register, - it would cause trouble if an interrupt occurred. */ - unsigned try = 0x7fff, used; - int jump_left = flag_expensive_optimizations + 1; - - /* It is likely that the most recent eligible instruction is wanted for - the delay slot. Therefore, find out which registers it uses, and - try to avoid using them. */ - - for (scan = jump; scan = PREV_INSN (scan); ) - { - enum rtx_code code; - - if (INSN_DELETED_P (scan)) - continue; - code = GET_CODE (scan); - if (code == CODE_LABEL || code == JUMP_INSN) - break; - if (code == INSN - && GET_CODE (PATTERN (scan)) != USE - && GET_CODE (PATTERN (scan)) != CLOBBER - && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) - { - try &= ~regs_used (PATTERN (scan), 0); - break; - } - } - for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); ) - { - enum rtx_code code; - - if (INSN_DELETED_P (scan)) - continue; - code = GET_CODE (scan); - if (GET_RTX_CLASS (code) == 'i') - { - used |= regs_used (PATTERN (scan), 0); - if (code == CALL_INSN) - used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); - dead |= (used >> 16) & ~used; - if (dead & try) - { - dead &= try; - break; - } - if (code == JUMP_INSN) - if (jump_left-- && simplejump_p (scan)) - scan = JUMP_LABEL (scan); - else - break; - } - } - /* Mask out the stack pointer again, in case it was - the only 'free' register we have found. */ - dead &= 0x7fff; - } - /* If the immediate destination is still in range, check for possible - threading with a jump beyond the delay slot insn. - Don't check if we are called recursively; the jump has been or will be - checked in a different invocation then. */ - - else if (optimize && need_block >= 0) - { - rtx next = next_active_insn (next_active_insn (dest)); - if (next && GET_CODE (next) == JUMP_INSN - && GET_CODE (PATTERN (next)) == SET - && recog_memoized (next) == CODE_FOR_jump) - { - dest = JUMP_LABEL (next); - if (dest - && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098) - gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1); - } - } - - if (dead) - { - rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead)); - - /* It would be nice if we could convert the jump into an indirect - jump / far branch right now, and thus exposing all constituent - instructions to further optimization. However, reorg uses - simplejump_p to determine if there is an unconditional jump where - it should try to schedule instructions from the target of the - branch; simplejump_p fails for indirect jumps even if they have - a JUMP_LABEL. */ - rtx insn = emit_insn_before (gen_indirect_jump_scratch - (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump)))) - , jump); - INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; - return insn; - } - else if (need_block) - /* We can't use JUMP_LABEL here because it might be undefined - when not optimizing. */ - return emit_insn_before (gen_block_branch_redirect - (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0)))) - , jump); - return prev; -} - -#define CONDJUMP_MIN -252 -#define CONDJUMP_MAX 262 -struct far_branch -{ - /* A label (to be placed) in front of the jump - that jumps to our ultimate destination. */ - rtx near_label; - /* Where we are going to insert it if we cannot move the jump any farther, - or the jump itself if we have picked up an existing jump. */ - rtx insert_place; - /* The ultimate destination. */ - rtx far_label; - struct far_branch *prev; - /* If the branch has already been created, its address; - else the address of its first prospective user. */ - int address; -}; - -enum mdep_reorg_phase_e mdep_reorg_phase; -void -gen_far_branch (bp) - struct far_branch *bp; -{ - rtx insn = bp->insert_place; - rtx jump; - rtx label = gen_label_rtx (); - - emit_label_after (label, insn); - if (bp->far_label) - { - jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); - LABEL_NUSES (bp->far_label)++; - } - else - jump = emit_jump_insn_after (gen_return (), insn); - /* Emit a barrier so that reorg knows that any following instructions - are not reachable via a fall-through path. - But don't do this when not optimizing, since we wouldn't supress the - alignment for the barrier then, and could end up with out-of-range - pc-relative loads. */ - if (optimize) - emit_barrier_after (jump); - emit_label_after (bp->near_label, insn); - JUMP_LABEL (jump) = bp->far_label; - if (! invert_jump (insn, label)) - abort (); - /* Prevent reorg from undoing our splits. */ - gen_block_redirect (jump, bp->address += 2, 2); -} - -/* Fix up ADDR_DIFF_VECs. */ -void -fixup_addr_diff_vecs (first) - rtx first; -{ - rtx insn; - - for (insn = first; insn; insn = NEXT_INSN (insn)) - { - rtx vec_lab, pat, prev, prevpat, x, braf_label; - - if (GET_CODE (insn) != JUMP_INSN - || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) - continue; - pat = PATTERN (insn); - vec_lab = XEXP (XEXP (pat, 0), 0); - - /* Search the matching casesi_jump_2. */ - for (prev = vec_lab; ; prev = PREV_INSN (prev)) - { - if (GET_CODE (prev) != JUMP_INSN) - continue; - prevpat = PATTERN (prev); - if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) - continue; - x = XVECEXP (prevpat, 0, 1); - if (GET_CODE (x) != USE) - continue; - x = XEXP (x, 0); - if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) - break; - } - - /* Emit the reference label of the braf where it belongs, right after - the casesi_jump_2 (i.e. braf). */ - braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); - emit_label_after (braf_label, prev); - - /* Fix up the ADDR_DIF_VEC to be relative - to the reference address of the braf. */ - XEXP (XEXP (pat, 0), 0) = braf_label; - } -} - -/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following - a barrier. Return the base 2 logarithm of the desired alignment. */ -int -barrier_align (barrier_or_label) - rtx barrier_or_label; -{ - rtx next = next_real_insn (barrier_or_label), pat, prev; - int slot, credit; - - if (! next) - return 0; - - pat = PATTERN (next); - - if (GET_CODE (pat) == ADDR_DIFF_VEC) - return 2; - - if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1) - /* This is a barrier in front of a constant table. */ - return 0; - - prev = prev_real_insn (barrier_or_label); - if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC) - { - pat = PATTERN (prev); - /* If this is a very small table, we want to keep the alignment after - the table to the minimum for proper code alignment. */ - return ((TARGET_SMALLCODE - || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) - <= 1 << (CACHE_LOG - 2))) - ? 1 : CACHE_LOG); - } - - if (TARGET_SMALLCODE) - return 0; - - if (! TARGET_SH3 || ! optimize) - return CACHE_LOG; - - /* When fixing up pcloads, a constant table might be inserted just before - the basic block that ends with the barrier. Thus, we can't trust the - instruction lengths before that. */ - if (mdep_reorg_phase > SH_FIXUP_PCLOAD) - { - /* Check if there is an immediately preceding branch to the insn beyond - the barrier. We must weight the cost of discarding useful information - from the current cache line when executing this branch and there is - an alignment, against that of fetching unneeded insn in front of the - branch target when there is no alignment. */ - - /* PREV is presumed to be the JUMP_INSN for the barrier under - investigation. Skip to the insn before it. */ - prev = prev_real_insn (prev); - - for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2; - credit >= 0 && prev && GET_CODE (prev) == INSN; - prev = prev_real_insn (prev)) - { - if (GET_CODE (PATTERN (prev)) == USE - || GET_CODE (PATTERN (prev)) == CLOBBER) - continue; - if (GET_CODE (PATTERN (prev)) == SEQUENCE) - prev = XVECEXP (PATTERN (prev), 0, 1); - if (slot && - get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) - slot = 0; - credit -= get_attr_length (prev); - } - if (prev - && GET_CODE (prev) == JUMP_INSN - && JUMP_LABEL (prev) - && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label) - && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0))) - return 0; - } - - return CACHE_LOG; -} - -/* If we are inside a phony loop, lmost any kind of label can turn up as the - first one in the loop. Aligning a braf label causes incorrect switch - destination addresses; we can detect braf labels because they are - followed by a BARRIER. - Applying loop alignment to small constant or switch tables is a waste - of space, so we suppress this too. */ -int -sh_loop_align (label) - rtx label; -{ - rtx next = label; - - do - next = next_nonnote_insn (next); - while (next && GET_CODE (next) == CODE_LABEL); - - if (! next - || GET_RTX_CLASS (GET_CODE (next)) != 'i' - || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC - || recog_memoized (next) == CODE_FOR_consttable_2) - return 0; - return 2; -} - -/* Exported to toplev.c. - - Do a final pass over the function, just before delayed branch - scheduling. */ - -void -machine_dependent_reorg (first) - rtx first; -{ - rtx insn, mova; - int num_mova; - rtx r0_rtx = gen_rtx (REG, Pmode, 0); - rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx); - - /* If relaxing, generate pseudo-ops to associate function calls with - the symbols they call. It does no harm to not generate these - pseudo-ops. However, when we can generate them, it enables to - linker to potentially relax the jsr to a bsr, and eliminate the - register load and, possibly, the constant pool entry. */ - - mdep_reorg_phase = SH_INSERT_USES_LABELS; - if (TARGET_RELAX) - { - /* Remove all REG_LABEL notes. We want to use them for our own - purposes. This works because none of the remaining passes - need to look at them. - - ??? But it may break in the future. We should use a machine - dependent REG_NOTE, or some other approach entirely. */ - for (insn = first; insn; insn = NEXT_INSN (insn)) - { - if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') - { - rtx note; - - while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0) - remove_note (insn, note); - } - } - - for (insn = first; insn; insn = NEXT_INSN (insn)) - { - rtx pattern, reg, link, set, scan, dies, label; - int rescan = 0, foundinsn = 0; - - if (GET_CODE (insn) == CALL_INSN) - { - pattern = PATTERN (insn); - - if (GET_CODE (pattern) == PARALLEL) - pattern = XVECEXP (pattern, 0, 0); - if (GET_CODE (pattern) == SET) - pattern = SET_SRC (pattern); - - if (GET_CODE (pattern) != CALL - || GET_CODE (XEXP (pattern, 0)) != MEM) - continue; - - reg = XEXP (XEXP (pattern, 0), 0); - } - else - { - reg = sfunc_uses_reg (insn); - if (! reg) - continue; - } - - if (GET_CODE (reg) != REG) - continue; - - /* This is a function call via REG. If the only uses of REG - between the time that it is set and the time that it dies - are in function calls, then we can associate all the - function calls with the setting of REG. */ - - for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) - { - if (REG_NOTE_KIND (link) != 0) - continue; - set = single_set (XEXP (link, 0)); - if (set && rtx_equal_p (reg, SET_DEST (set))) - { - link = XEXP (link, 0); - break; - } - } - - if (! link) - { - /* ??? Sometimes global register allocation will have - deleted the insn pointed to by LOG_LINKS. Try - scanning backward to find where the register is set. */ - for (scan = PREV_INSN (insn); - scan && GET_CODE (scan) != CODE_LABEL; - scan = PREV_INSN (scan)) - { - if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') - continue; - - if (! reg_mentioned_p (reg, scan)) - continue; - - if (noncall_uses_reg (reg, scan, &set)) - break; - - if (set) - { - link = scan; - break; - } - } - } - - if (! link) - continue; - - /* The register is set at LINK. */ - - /* We can only optimize the function call if the register is - being set to a symbol. In theory, we could sometimes - optimize calls to a constant location, but the assembler - and linker do not support that at present. */ - if (GET_CODE (SET_SRC (set)) != SYMBOL_REF - && GET_CODE (SET_SRC (set)) != LABEL_REF) - continue; - - /* Scan forward from LINK to the place where REG dies, and - make sure that the only insns which use REG are - themselves function calls. */ - - /* ??? This doesn't work for call targets that were allocated - by reload, since there may not be a REG_DEAD note for the - register. */ - - dies = NULL_RTX; - for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) - { - rtx scanset; - - /* Don't try to trace forward past a CODE_LABEL if we haven't - seen INSN yet. Ordinarily, we will only find the setting insn - in LOG_LINKS if it is in the same basic block. However, - cross-jumping can insert code labels in between the load and - the call, and can result in situations where a single call - insn may have two targets depending on where we came from. */ - - if (GET_CODE (scan) == CODE_LABEL && ! foundinsn) - break; - - if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') - continue; - - /* Don't try to trace forward past a JUMP. To optimize - safely, we would have to check that all the - instructions at the jump destination did not use REG. */ - - if (GET_CODE (scan) == JUMP_INSN) - break; - - if (! reg_mentioned_p (reg, scan)) - continue; - - if (noncall_uses_reg (reg, scan, &scanset)) - break; - - if (scan == insn) - foundinsn = 1; - - if (scan != insn - && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan))) - { - /* There is a function call to this register other - than the one we are checking. If we optimize - this call, we need to rescan again below. */ - rescan = 1; - } - - /* ??? We shouldn't have to worry about SCANSET here. - We should just be able to check for a REG_DEAD note - on a function call. However, the REG_DEAD notes are - apparently not dependable around libcalls; c-torture - execute/920501-2 is a test case. If SCANSET is set, - then this insn sets the register, so it must have - died earlier. Unfortunately, this will only handle - the cases in which the register is, in fact, set in a - later insn. */ - - /* ??? We shouldn't have to use FOUNDINSN here. - However, the LOG_LINKS fields are apparently not - entirely reliable around libcalls; - newlib/libm/math/e_pow.c is a test case. Sometimes - an insn will appear in LOG_LINKS even though it is - not the most recent insn which sets the register. */ - - if (foundinsn - && (scanset - || find_reg_note (scan, REG_DEAD, reg))) - { - dies = scan; - break; - } - } - - if (! dies) - { - /* Either there was a branch, or some insn used REG - other than as a function call address. */ - continue; - } - - /* Create a code label, and put it in a REG_LABEL note on - the insn which sets the register, and on each call insn - which uses the register. In final_prescan_insn we look - for the REG_LABEL notes, and output the appropriate label - or pseudo-op. */ - - label = gen_label_rtx (); - REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label, - REG_NOTES (link)); - REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label, - REG_NOTES (insn)); - if (rescan) - { - scan = link; - do - { - rtx reg2; - - scan = NEXT_INSN (scan); - if (scan != insn - && ((GET_CODE (scan) == CALL_INSN - && reg_mentioned_p (reg, scan)) - || ((reg2 = sfunc_uses_reg (scan)) - && REGNO (reg2) == REGNO (reg)))) - REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL, - label, REG_NOTES (scan)); - } - while (scan != dies); - } - } - } - - if (TARGET_SH2) - fixup_addr_diff_vecs (first); - - if (optimize) - { - mdep_reorg_phase = SH_SHORTEN_BRANCHES0; - shorten_branches (first); - } - /* Scan the function looking for move instructions which have to be - changed to pc-relative loads and insert the literal tables. */ - - mdep_reorg_phase = SH_FIXUP_PCLOAD; - for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) - { - if (mova_p (insn)) - { - if (! num_mova++) - mova = insn; - } - else if (GET_CODE (insn) == JUMP_INSN - && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC - && num_mova) - { - rtx scan; - int total; - - num_mova--; - - /* Some code might have been inserted between the mova and - its ADDR_DIFF_VEC. Check if the mova is still in range. */ - for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) - total += get_attr_length (scan); - - /* range of mova is 1020, add 4 because pc counts from address of - second instruction after this one, subtract 2 in case pc is 2 - byte aligned. Possible alignment needed for the ADDR_DIFF_VEC - cancels out with alignment effects of the mova itself. */ - if (total > 1022) - { - /* Change the mova into a load, and restart scanning - there. broken_move will then return true for mova. */ - SET_SRC (PATTERN (mova)) - = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); - INSN_CODE (mova) = -1; - insn = mova; - } - } - if (broken_move (insn)) - { - rtx scan; - /* Scan ahead looking for a barrier to stick the constant table - behind. */ - rtx barrier = find_barrier (num_mova, mova, insn); - rtx last_float_move, last_float = 0, *last_float_addr; - - if (num_mova && ! mova_p (mova)) - { - /* find_barrier had to change the first mova into a - pcload; thus, we have to start with this new pcload. */ - insn = mova; - num_mova = 0; - } - /* Now find all the moves between the points and modify them. */ - for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) - { - if (GET_CODE (scan) == CODE_LABEL) - last_float = 0; - if (broken_move (scan)) - { - rtx *patp = &PATTERN (scan), pat = *patp; - rtx src, dst; - rtx lab; - rtx newinsn; - rtx newsrc; - enum machine_mode mode; - - if (GET_CODE (pat) == PARALLEL) - patp = &XVECEXP (pat, 0, 0), pat = *patp; - src = SET_SRC (pat); - dst = SET_DEST (pat); - mode = GET_MODE (dst); - - if (mode == SImode && hi_const (src) - && REGNO (dst) != FPUL_REG) - { - int offset = 0; - - mode = HImode; - while (GET_CODE (dst) == SUBREG) - { - offset += SUBREG_WORD (dst); - dst = SUBREG_REG (dst); - } - dst = gen_rtx (REG, HImode, REGNO (dst) + offset); - } - - if (GET_CODE (dst) == REG - && ((REGNO (dst) >= FIRST_FP_REG - && REGNO (dst) <= LAST_XD_REG) - || REGNO (dst) == FPUL_REG)) - { - if (last_float - && reg_set_between_p (r0_rtx, last_float_move, scan)) - last_float = 0; - lab = add_constant (src, mode, last_float); - if (lab) - emit_insn_before (gen_mova (lab), scan); - else - *last_float_addr = r0_inc_rtx; - last_float_move = scan; - last_float = src; - newsrc = gen_rtx (MEM, mode, - ((TARGET_SH4 && ! TARGET_FMOVD - || REGNO (dst) == FPUL_REG) - ? r0_inc_rtx - : r0_rtx)); - last_float_addr = &XEXP (newsrc, 0); - } - else - { - lab = add_constant (src, mode, 0); - newsrc = gen_rtx (MEM, mode, - gen_rtx (LABEL_REF, VOIDmode, lab)); - } - RTX_UNCHANGING_P (newsrc) = 1; - *patp = gen_rtx (SET, VOIDmode, dst, newsrc); - INSN_CODE (scan) = -1; - } - } - dump_table (barrier); - insn = barrier; - } - } - - mdep_reorg_phase = SH_SHORTEN_BRANCHES1; - insn_addresses = 0; - split_branches (first); - - /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it - also has an effect on the register that holds the addres of the sfunc. - Insert an extra dummy insn in front of each sfunc that pretends to - use this register. */ - if (flag_delayed_branch) - { - for (insn = first; insn; insn = NEXT_INSN (insn)) - { - rtx reg = sfunc_uses_reg (insn); - - if (! reg) - continue; - emit_insn_before (gen_use_sfunc_addr (reg), insn); - } - } -#if 0 - /* fpscr is not actually a user variable, but we pretend it is for the - sake of the previous optimization passes, since we want it handled like - one. However, we don't have eny debugging information for it, so turn - it into a non-user variable now. */ - if (TARGET_SH4) - REG_USERVAR_P (get_fpscr_rtx ()) = 0; -#endif - if (optimize) - sh_flag_remove_dead_before_cse = 1; - mdep_reorg_phase = SH_AFTER_MDEP_REORG; -} - -int -get_dest_uid (label, max_uid) - rtx label; - int max_uid; -{ - rtx dest = next_real_insn (label); - int dest_uid; - if (! dest) - /* This can happen for an undefined label. */ - return 0; - dest_uid = INSN_UID (dest); - /* If this is a newly created branch redirection blocking instruction, - we cannot index the branch_uid or insn_addresses arrays with its - uid. But then, we won't need to, because the actual destination is - the following branch. */ - while (dest_uid >= max_uid) - { - dest = NEXT_INSN (dest); - dest_uid = INSN_UID (dest); - } - if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN) - return 0; - return dest_uid; -} - -/* Split condbranches that are out of range. Also add clobbers for - scratch registers that are needed in far jumps. - We do this before delay slot scheduling, so that it can take our - newly created instructions into account. It also allows us to - find branches with common targets more easily. */ - -static void -split_branches (first) - rtx first; -{ - rtx insn; - struct far_branch **uid_branch, *far_branch_list = 0; - int max_uid = get_max_uid (); - - /* Find out which branches are out of range. */ - shorten_branches (first); - - uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); - bzero ((char *) uid_branch, max_uid * sizeof *uid_branch); - - for (insn = first; insn; insn = NEXT_INSN (insn)) - if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') - continue; - else if (INSN_DELETED_P (insn)) - { - /* Shorten_branches would split this instruction again, - so transform it into a note. */ - PUT_CODE (insn, NOTE); - NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (insn) = 0; - } - else if (GET_CODE (insn) == JUMP_INSN - /* Don't mess with ADDR_DIFF_VEC */ - && (GET_CODE (PATTERN (insn)) == SET - || GET_CODE (PATTERN (insn)) == RETURN)) - { - enum attr_type type = get_attr_type (insn); - if (type == TYPE_CBRANCH) - { - rtx next, beyond; - - if (get_attr_length (insn) > 4) - { - rtx src = SET_SRC (PATTERN (insn)); - rtx cond = XEXP (src, 0); - rtx olabel = XEXP (XEXP (src, 1), 0); - rtx jump; - int addr = insn_addresses[INSN_UID (insn)]; - rtx label = 0; - int dest_uid = get_dest_uid (olabel, max_uid); - struct far_branch *bp = uid_branch[dest_uid]; - - /* redirect_jump needs a valid JUMP_LABEL, and it might delete - the label if the LABEL_NUSES count drops to zero. There is - always a jump_optimize pass that sets these values, but it - proceeds to delete unreferenced code, and then if not - optimizing, to un-delete the deleted instructions, thus - leaving labels with too low uses counts. */ - if (! optimize) - { - JUMP_LABEL (insn) = olabel; - LABEL_NUSES (olabel)++; - } - if (! bp) - { - bp = (struct far_branch *) alloca (sizeof *bp); - uid_branch[dest_uid] = bp; - bp->prev = far_branch_list; - far_branch_list = bp; - bp->far_label - = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); - LABEL_NUSES (bp->far_label)++; - } - else - { - label = bp->near_label; - if (! label && bp->address - addr >= CONDJUMP_MIN) - { - rtx block = bp->insert_place; - - if (GET_CODE (PATTERN (block)) == RETURN) - block = PREV_INSN (block); - else - block = gen_block_redirect (block, - bp->address, 2); - label = emit_label_after (gen_label_rtx (), - PREV_INSN (block)); - bp->near_label = label; - } - else if (label && ! NEXT_INSN (label)) - if (addr + 2 - bp->address <= CONDJUMP_MAX) - bp->insert_place = insn; - else - gen_far_branch (bp); - } - if (! label - || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN) - { - bp->near_label = label = gen_label_rtx (); - bp->insert_place = insn; - bp->address = addr; - } - if (! redirect_jump (insn, label)) - abort (); - } - else - { - /* get_attr_length (insn) == 2 */ - /* Check if we have a pattern where reorg wants to redirect - the branch to a label from an unconditional branch that - is too far away. */ - /* We can't use JUMP_LABEL here because it might be undefined - when not optimizing. */ - /* A syntax error might cause beyond to be NULL_RTX. */ - beyond - = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), - 0)); - - if (beyond - && (GET_CODE (beyond) == JUMP_INSN - || (GET_CODE (beyond = next_active_insn (beyond)) - == JUMP_INSN)) - && GET_CODE (PATTERN (beyond)) == SET - && recog_memoized (beyond) == CODE_FOR_jump - && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))] - - insn_addresses[INSN_UID (insn)] + 252U) - > 252 + 258 + 2)) - gen_block_redirect (beyond, - insn_addresses[INSN_UID (beyond)], 1); - } - - next = next_active_insn (insn); - - if ((GET_CODE (next) == JUMP_INSN - || GET_CODE (next = next_active_insn (next)) == JUMP_INSN) - && GET_CODE (PATTERN (next)) == SET - && recog_memoized (next) == CODE_FOR_jump - && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))] - - insn_addresses[INSN_UID (insn)] + 252U) - > 252 + 258 + 2)) - gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1); - } - else if (type == TYPE_JUMP || type == TYPE_RETURN) - { - int addr = insn_addresses[INSN_UID (insn)]; - rtx far_label = 0; - int dest_uid = 0; - struct far_branch *bp; - - if (type == TYPE_JUMP) - { - far_label = XEXP (SET_SRC (PATTERN (insn)), 0); - dest_uid = get_dest_uid (far_label, max_uid); - if (! dest_uid) - { - /* Parse errors can lead to labels outside - the insn stream. */ - if (! NEXT_INSN (far_label)) - continue; - - if (! optimize) - { - JUMP_LABEL (insn) = far_label; - LABEL_NUSES (far_label)++; - } - redirect_jump (insn, NULL_RTX); - far_label = 0; - } - } - bp = uid_branch[dest_uid]; - if (! bp) - { - bp = (struct far_branch *) alloca (sizeof *bp); - uid_branch[dest_uid] = bp; - bp->prev = far_branch_list; - far_branch_list = bp; - bp->near_label = 0; - bp->far_label = far_label; - if (far_label) - LABEL_NUSES (far_label)++; - } - else if (bp->near_label && ! NEXT_INSN (bp->near_label)) - if (addr - bp->address <= CONDJUMP_MAX) - emit_label_after (bp->near_label, PREV_INSN (insn)); - else - { - gen_far_branch (bp); - bp->near_label = 0; - } - else - bp->near_label = 0; - bp->address = addr; - bp->insert_place = insn; - if (! far_label) - emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); - else - gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); - } - } - /* Generate all pending far branches, - and free our references to the far labels. */ - while (far_branch_list) - { - if (far_branch_list->near_label - && ! NEXT_INSN (far_branch_list->near_label)) - gen_far_branch (far_branch_list); - if (optimize - && far_branch_list->far_label - && ! --LABEL_NUSES (far_branch_list->far_label)) - delete_insn (far_branch_list->far_label); - far_branch_list = far_branch_list->prev; - } - - /* Instruction length information is no longer valid due to the new - instructions that have been generated. */ - init_insn_lengths (); -} - -/* Dump out instruction addresses, which is useful for debugging the - constant pool table stuff. - - If relaxing, output the label and pseudo-ops used to link together - calls and the instruction which set the registers. */ - -/* ??? This is unnecessary, and probably should be deleted. This makes - the insn_addresses declaration above unnecessary. */ - -/* ??? The addresses printed by this routine for insns are nonsense for - insns which are inside of a sequence where none of the inner insns have - variable length. This is because the second pass of shorten_branches - does not bother to update them. */ - -void -final_prescan_insn (insn, opvec, noperands) - rtx insn; - rtx *opvec; - int noperands; -{ - if (TARGET_DUMPISIZE) - fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]); - - if (TARGET_RELAX) - { - rtx note; - - note = find_reg_note (insn, REG_LABEL, NULL_RTX); - if (note) - { - rtx pattern; - - pattern = PATTERN (insn); - if (GET_CODE (pattern) == PARALLEL) - pattern = XVECEXP (pattern, 0, 0); - if (GET_CODE (pattern) == CALL - || (GET_CODE (pattern) == SET - && (GET_CODE (SET_SRC (pattern)) == CALL - || get_attr_type (insn) == TYPE_SFUNC))) - asm_fprintf (asm_out_file, "\t.uses %LL%d\n", - CODE_LABEL_NUMBER (XEXP (note, 0))); - else if (GET_CODE (pattern) == SET) - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (XEXP (note, 0))); - else - abort (); - } - } -} - -/* Dump out any constants accumulated in the final pass. These will - only be labels. */ - -char * -output_jump_label_table () -{ - int i; - - if (pool_size) - { - fprintf (asm_out_file, "\t.align 2\n"); - for (i = 0; i < pool_size; i++) - { - pool_node *p = &pool_vector[i]; - - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (p->label)); - output_asm_insn (".long %O0", &p->value); - } - pool_size = 0; - } - - return ""; -} - -/* A full frame looks like: - - arg-5 - arg-4 - [ if current_function_anonymous_args - arg-3 - arg-2 - arg-1 - arg-0 ] - saved-fp - saved-r10 - saved-r11 - saved-r12 - saved-pr - local-n - .. - local-1 - local-0 <- fp points here. */ - -/* Number of bytes pushed for anonymous args, used to pass information - between expand_prologue and expand_epilogue. */ - -static int extra_push; - -/* Adjust the stack by SIZE bytes. REG holds the rtl of the register - to be adjusted, and TEMP, if nonnegative, holds the register number - of a general register that we may clobber. */ - -static void -output_stack_adjust (size, reg, temp) - int size; - rtx reg; - int temp; -{ - if (size) - { - if (CONST_OK_FOR_I (size)) - emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); - /* Try to do it with two partial adjustments; however, we must make - sure that the stack is properly aligned at all times, in case - an interrupt occurs between the two partial adjustments. */ - else if (CONST_OK_FOR_I (size / 2 & -4) - && CONST_OK_FOR_I (size - (size / 2 & -4))) - { - emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4))); - emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4)))); - } - else - { - rtx const_reg; - - /* If TEMP is invalid, we could temporarily save a general - register to MACL. However, there is currently no need - to handle this case, so just abort when we see it. */ - if (temp < 0) - abort (); - const_reg = gen_rtx (REG, SImode, temp); - - /* If SIZE is negative, subtract the positive value. - This sometimes allows a constant pool entry to be shared - between prologue and epilogue code. */ - if (size < 0) - { - emit_insn (gen_movsi (const_reg, GEN_INT (-size))); - emit_insn (gen_subsi3 (reg, reg, const_reg)); - } - else - { - emit_insn (gen_movsi (const_reg, GEN_INT (size))); - emit_insn (gen_addsi3 (reg, reg, const_reg)); - } - } - } -} - -/* Output RTL to push register RN onto the stack. */ - -static void -push (rn) - int rn; -{ - rtx x; - if (rn == FPUL_REG) - x = gen_push_fpul (); - else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE - && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) - { - if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) - return; - x = gen_push_4 (gen_rtx (REG, DFmode, rn)); - } - else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) - x = gen_push_e (gen_rtx (REG, SFmode, rn)); - else - x = gen_push (gen_rtx (REG, SImode, rn)); - - x = emit_insn (x); - REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, - gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0); -} - -/* Output RTL to pop register RN from the stack. */ - -static void -pop (rn) - int rn; -{ - rtx x; - if (rn == FPUL_REG) - x = gen_pop_fpul (); - else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE - && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) - { - if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) - return; - x = gen_pop_4 (gen_rtx (REG, DFmode, rn)); - } - else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) - x = gen_pop_e (gen_rtx (REG, SFmode, rn)); - else - x = gen_pop (gen_rtx (REG, SImode, rn)); - - x = emit_insn (x); - REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, - gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0); -} - -/* Generate code to push the regs specified in the mask. */ - -static void -push_regs (mask, mask2) - int mask, mask2; -{ - int i; - - /* Push PR last; this gives better latencies after the prologue, and - candidates for the return delay slot when there are no general - registers pushed. */ - for (i = 0; i < 32; i++) - if (mask & (1 << i) && i != PR_REG) - push (i); - for (i = 32; i < FIRST_PSEUDO_REGISTER; i++) - if (mask2 & (1 << (i - 32))) - push (i); - if (mask & (1 << PR_REG)) - push (PR_REG); -} - -/* Work out the registers which need to be saved, both as a mask and a - count of saved words. - - If doing a pragma interrupt function, then push all regs used by the - function, and if we call another function (we can tell by looking at PR), - make sure that all the regs it clobbers are safe too. */ - -static int -calc_live_regs (count_ptr, live_regs_mask2) - int *count_ptr; - int *live_regs_mask2; -{ - int reg; - int live_regs_mask = 0; - int count; - int interrupt_handler; - - if ((lookup_attribute - ("interrupt_handler", - DECL_MACHINE_ATTRIBUTES (current_function_decl))) - != NULL_TREE) - interrupt_handler = 1; - else - interrupt_handler = 0; - - *live_regs_mask2 = 0; - /* If we can save a lot of saves by switching to double mode, do that. */ - if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE) - for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) - if (regs_ever_live[reg] && regs_ever_live[reg+1] - && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa)) - && ++count > 2) - { - target_flags &= ~FPU_SINGLE_BIT; - break; - } - for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--) - { - if ((interrupt_handler && ! pragma_trapa) - ? (/* Need to save all the regs ever live. */ - (regs_ever_live[reg] - || (call_used_regs[reg] - && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG) - && regs_ever_live[PR_REG])) - && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM - && reg != RETURN_ADDRESS_POINTER_REGNUM - && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG) - : (/* Only push those regs which are used and need to be saved. */ - regs_ever_live[reg] && ! call_used_regs[reg])) - { - if (reg >= 32) - *live_regs_mask2 |= 1 << (reg - 32); - else - live_regs_mask |= 1 << reg; - count++; - if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG) - if (reg <= LAST_FP_REG) - { - if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1]) - { - if (reg >= 32) - *live_regs_mask2 |= 1 << ((reg ^ 1) - 32); - else - live_regs_mask |= 1 << (reg ^ 1); - count++; - } - } - else if (reg <= LAST_XD_REG) - { - /* Must switch to double mode to access these registers. */ - target_flags &= ~FPU_SINGLE_BIT; - count++; - } - } - } - - *count_ptr = count; - return live_regs_mask; -} - -/* Code to generate prologue and epilogue sequences */ - -void -sh_expand_prologue () -{ - int live_regs_mask; - int d, i; - int live_regs_mask2; - int save_flags = target_flags; - int double_align = 0; - - /* We have pretend args if we had an object sent partially in registers - and partially on the stack, e.g. a large structure. */ - output_stack_adjust (-current_function_pretend_args_size, - stack_pointer_rtx, 3); - - extra_push = 0; - - /* This is set by SETUP_VARARGS to indicate that this is a varargs - routine. Clear it here so that the next function isn't affected. */ - if (current_function_anonymous_args) - { - current_function_anonymous_args = 0; - - /* This is not used by the SH3E calling convention */ - if (!TARGET_SH3E) - { - /* Push arg regs as if they'd been provided by caller in stack. */ - for (i = 0; i < NPARM_REGS(SImode); i++) - { - int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; - if (i >= (NPARM_REGS(SImode) - - current_function_args_info.arg_count[(int) SH_ARG_INT] - )) - break; - push (rn); - extra_push += 4; - } - } - } - - /* If we're supposed to switch stacks at function entry, do so now. */ - if (sp_switch) - emit_insn (gen_sp_switch_1 ()); - - live_regs_mask = calc_live_regs (&d, &live_regs_mask2); - /* ??? Maybe we could save some switching if we can move a mode switch - that already happens to be at the function start into the prologue. */ - if (target_flags != save_flags) - emit_insn (gen_toggle_sz ()); - push_regs (live_regs_mask, live_regs_mask2); - if (target_flags != save_flags) - emit_insn (gen_toggle_sz ()); - - if (TARGET_ALIGN_DOUBLE && d & 1) - double_align = 4; - - target_flags = save_flags; - - output_stack_adjust (-get_frame_size () - double_align, - stack_pointer_rtx, 3); - - if (frame_pointer_needed) - emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); -} - -void -sh_expand_epilogue () -{ - int live_regs_mask; - int d, i; - - int live_regs_mask2; - int save_flags = target_flags; - int frame_size = get_frame_size (); - - live_regs_mask = calc_live_regs (&d, &live_regs_mask2); - - if (TARGET_ALIGN_DOUBLE && d & 1) - frame_size += 4; - - if (frame_pointer_needed) - { - output_stack_adjust (frame_size, frame_pointer_rtx, 7); - - /* We must avoid moving the stack pointer adjustment past code - which reads from the local frame, else an interrupt could - occur after the SP adjustment and clobber data in the local - frame. */ - emit_insn (gen_blockage ()); - emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx)); - } - else if (frame_size) - { - /* We must avoid moving the stack pointer adjustment past code - which reads from the local frame, else an interrupt could - occur after the SP adjustment and clobber data in the local - frame. */ - emit_insn (gen_blockage ()); - output_stack_adjust (frame_size, stack_pointer_rtx, 7); - } - - /* Pop all the registers. */ - - if (target_flags != save_flags) - emit_insn (gen_toggle_sz ()); - if (live_regs_mask & (1 << PR_REG)) - pop (PR_REG); - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - { - int j = (FIRST_PSEUDO_REGISTER - 1) - i; - if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG) - pop (j); - else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32)))) - pop (j); - } - if (target_flags != save_flags) - emit_insn (gen_toggle_sz ()); - target_flags = save_flags; - - output_stack_adjust (extra_push + current_function_pretend_args_size, - stack_pointer_rtx, 7); - - /* Switch back to the normal stack if necessary. */ - if (sp_switch) - emit_insn (gen_sp_switch_2 ()); -} - -/* Clear variables at function end. */ - -void -function_epilogue (stream, size) - FILE *stream; - int size; -{ - trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0; - sp_switch = NULL_RTX; -} - -rtx -sh_builtin_saveregs (arglist) - tree arglist; -{ - tree fntype = TREE_TYPE (current_function_decl); - /* First unnamed integer register. */ - int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT]; - /* Number of integer registers we need to save. */ - int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); - /* First unnamed SFmode float reg */ - int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT]; - /* Number of SFmode float regs to save. */ - int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); - int ptrsize = GET_MODE_SIZE (Pmode); - rtx valist, regbuf, fpregs; - int bufsize, regno; - - /* Allocate block of memory for the regs. */ - /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? - Or can assign_stack_local accept a 0 SIZE argument? */ - bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); - - regbuf = assign_stack_local (BLKmode, bufsize, 0); - MEM_SET_IN_STRUCT_P (regbuf, 1); - - /* Save int args. - This is optimized to only save the regs that are necessary. Explicitly - named args need not be saved. */ - if (n_intregs > 0) - move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, - gen_rtx (MEM, BLKmode, - plus_constant (XEXP (regbuf, 0), - n_floatregs * UNITS_PER_WORD)), - n_intregs, n_intregs * UNITS_PER_WORD); - - /* Save float args. - This is optimized to only save the regs that are necessary. Explicitly - named args need not be saved. - We explicitly build a pointer to the buffer because it halves the insn - count when not optimizing (otherwise the pointer is built for each reg - saved). - We emit the moves in reverse order so that we can use predecrement. */ - - fpregs = gen_reg_rtx (Pmode); - emit_move_insn (fpregs, XEXP (regbuf, 0)); - emit_insn (gen_addsi3 (fpregs, fpregs, - GEN_INT (n_floatregs * UNITS_PER_WORD))); - if (TARGET_SH4) - { - for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) - { - emit_insn (gen_addsi3 (fpregs, fpregs, - GEN_INT (-2 * UNITS_PER_WORD))); - emit_move_insn (gen_rtx (MEM, DFmode, fpregs), - gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno)); - } - regno = first_floatreg; - if (regno & 1) - { - emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); - emit_move_insn (gen_rtx (MEM, SFmode, fpregs), - gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno - - (TARGET_LITTLE_ENDIAN != 0))); - } - } - else - for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) - { - emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); - emit_move_insn (gen_rtx (MEM, SFmode, fpregs), - gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno)); - } - - /* Return the address of the regbuf. */ - return XEXP (regbuf, 0); -} - -/* Define the offset between two registers, one to be eliminated, and - the other its replacement, at the start of a routine. */ - -int -initial_elimination_offset (from, to) - int from; - int to; -{ - int regs_saved; - int total_saved_regs_space; - int total_auto_space = get_frame_size (); - int save_flags = target_flags; - - int live_regs_mask, live_regs_mask2; - live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2); - if (TARGET_ALIGN_DOUBLE && regs_saved & 1) - total_auto_space += 4; - target_flags = save_flags; - - total_saved_regs_space = (regs_saved) * 4; - - if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) - return total_saved_regs_space + total_auto_space; - - if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) - return total_saved_regs_space + total_auto_space; - - /* Initial gap between fp and sp is 0. */ - if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) - return 0; - - if (from == RETURN_ADDRESS_POINTER_REGNUM - && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM)) - { - int i, n = total_saved_regs_space; - for (i = PR_REG-1; i >= 0; i--) - if (live_regs_mask & (1 << i)) - n -= 4; - return n + total_auto_space; - } - - abort (); -} - -/* Handle machine specific pragmas to be semi-compatible with Hitachi - compiler. */ - -int -sh_handle_pragma (p_getc, p_ungetc, pname) - int (* p_getc) PROTO((void)); - void (* p_ungetc) PROTO((int)); - char * pname; -{ - int retval = 0; - - if (strcmp (pname, "interrupt") == 0) - pragma_interrupt = retval = 1; - else if (strcmp (pname, "trapa") == 0) - pragma_interrupt = pragma_trapa = retval = 1; - else if (strcmp (pname, "nosave_low_regs") == 0) - pragma_nosave_low_regs = retval = 1; - - return retval; -} - -/* Generate 'handle_interrupt' attribute for decls */ - -void -sh_pragma_insert_attributes (node, attributes, prefix) - tree node; - tree * attributes; - tree * prefix; -{ - tree a; - - if (! pragma_interrupt - || TREE_CODE (node) != FUNCTION_DECL) - return; - - /* We are only interested in fields. */ - if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd') - return; - - /* Add a 'handle_interrupt' attribute. */ - * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes); - - return; -} - -/* Return nonzero if ATTR is a valid attribute for DECL. - ATTRIBUTES are any existing attributes and ARGS are the arguments - supplied with ATTR. - - Supported attributes: - - interrupt_handler -- specifies this function is an interrupt handler. - - sp_switch -- specifies an alternate stack for an interrupt handler - to run on. - - trap_exit -- use a trapa to exit an interrupt function instead of - an rte instruction. */ - -int -sh_valid_machine_decl_attribute (decl, attributes, attr, args) - tree decl; - tree attributes; - tree attr; - tree args; -{ - int retval = 0; - - if (TREE_CODE (decl) != FUNCTION_DECL) - return 0; - - if (is_attribute_p ("interrupt_handler", attr)) - { - return 1; - } - - if (is_attribute_p ("sp_switch", attr)) - { - /* The sp_switch attribute only has meaning for interrupt functions. */ - if (!pragma_interrupt) - return 0; - - /* sp_switch must have an argument. */ - if (!args || TREE_CODE (args) != TREE_LIST) - return 0; - - /* The argument must be a constant string. */ - if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) - return 0; - - sp_switch = gen_rtx (SYMBOL_REF, VOIDmode, - TREE_STRING_POINTER (TREE_VALUE (args))); - return 1; - } - - if (is_attribute_p ("trap_exit", attr)) - { - /* The trap_exit attribute only has meaning for interrupt functions. */ - if (!pragma_interrupt) - return 0; - - /* trap_exit must have an argument. */ - if (!args || TREE_CODE (args) != TREE_LIST) - return 0; - - /* The argument must be a constant integer. */ - if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) - return 0; - - trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args)); - return 1; - } -} - - -/* Predicates used by the templates. */ - -/* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx. - Used only in general_movsrc_operand. */ - -int -system_reg_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - switch (REGNO (op)) - { - case PR_REG: - case MACL_REG: - case MACH_REG: - return 1; - } - return 0; -} - -/* Returns 1 if OP can be source of a simple move operation. - Same as general_operand, but a LABEL_REF is valid, PRE_DEC is - invalid as are subregs of system registers. */ - -int -general_movsrc_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (GET_CODE (op) == MEM) - { - rtx inside = XEXP (op, 0); - if (GET_CODE (inside) == CONST) - inside = XEXP (inside, 0); - - if (GET_CODE (inside) == LABEL_REF) - return 1; - - if (GET_CODE (inside) == PLUS - && GET_CODE (XEXP (inside, 0)) == LABEL_REF - && GET_CODE (XEXP (inside, 1)) == CONST_INT) - return 1; - - /* Only post inc allowed. */ - if (GET_CODE (inside) == PRE_DEC) - return 0; - } - - if ((mode == QImode || mode == HImode) - && (GET_CODE (op) == SUBREG - && GET_CODE (XEXP (op, 0)) == REG - && system_reg_operand (XEXP (op, 0), mode))) - return 0; - - return general_operand (op, mode); -} - -/* Returns 1 if OP can be a destination of a move. - Same as general_operand, but no preinc allowed. */ - -int -general_movdst_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - /* Only pre dec allowed. */ - if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC) - return 0; - - return general_operand (op, mode); -} - -/* Returns 1 if OP is a normal arithmetic register. */ - -int -arith_reg_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (register_operand (op, mode)) - { - int regno; - - if (GET_CODE (op) == REG) - regno = REGNO (op); - else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) - regno = REGNO (SUBREG_REG (op)); - else - return 1; - - return (regno != T_REG && regno != PR_REG - && (regno != FPUL_REG || TARGET_SH4) - && regno != MACH_REG && regno != MACL_REG); - } - return 0; -} - -int -fp_arith_reg_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (register_operand (op, mode)) - { - int regno; - - if (GET_CODE (op) == REG) - regno = REGNO (op); - else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) - regno = REGNO (SUBREG_REG (op)); - else - return 1; - - return (regno != T_REG && regno != PR_REG && regno > 15 - && regno != MACH_REG && regno != MACL_REG); - } - return 0; -} - -int -fp_extended_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode) - { - op = XEXP (op, 0); - mode = GET_MODE (op); - } - return fp_arith_reg_operand (op, mode); -} - -/* Returns 1 if OP is a valid source operand for an arithmetic insn. */ - -int -arith_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (arith_reg_operand (op, mode)) - return 1; - - if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) - return 1; - - return 0; -} - -/* Returns 1 if OP is a valid source operand for a compare insn. */ - -int -arith_reg_or_0_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (arith_reg_operand (op, mode)) - return 1; - - if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op))) - return 1; - - return 0; -} - -/* Returns 1 if OP is a valid source operand for a logical operation. */ - -int -logical_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - if (arith_reg_operand (op, mode)) - return 1; - - if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) - return 1; - - return 0; -} - -/* Nonzero if OP is a floating point value with value 0.0. */ - -int -fp_zero_operand (op) - rtx op; -{ - REAL_VALUE_TYPE r; - - if (GET_MODE (op) != SFmode) - return 0; - - REAL_VALUE_FROM_CONST_DOUBLE (r, op); - return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); -} - -/* Nonzero if OP is a floating point value with value 1.0. */ - -int -fp_one_operand (op) - rtx op; -{ - REAL_VALUE_TYPE r; - - if (GET_MODE (op) != SFmode) - return 0; - - REAL_VALUE_FROM_CONST_DOUBLE (r, op); - return REAL_VALUES_EQUAL (r, dconst1); -} - -int -tertiary_reload_operand (op, mode) - rtx op; - enum machine_mode mode; -{ - enum rtx_code code = GET_CODE (op); - return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE); -} - -int -fpscr_operand (op) - rtx op; -{ - return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG - && GET_MODE (op) == PSImode); -} - -int -commutative_float_operator (op, mode) - rtx op; - enum machine_mode mode; -{ - if (GET_MODE (op) != mode) - return 0; - switch (GET_CODE (op)) - { - case PLUS: - case MULT: - return 1; - } - return 0; -} - -int -noncommutative_float_operator (op, mode) - rtx op; - enum machine_mode mode; -{ - if (GET_MODE (op) != mode) - return 0; - switch (GET_CODE (op)) - { - case MINUS: - case DIV: - return 1; - } - return 0; -} - -int -binary_float_operator (op, mode) - rtx op; - enum machine_mode mode; -{ - if (GET_MODE (op) != mode) - return 0; - switch (GET_CODE (op)) - { - case PLUS: - case MINUS: - case MULT: - case DIV: - return 1; - } - return 0; -} - -/* Return the destination address of a branch. */ - -int -branch_dest (branch) - rtx branch; -{ - rtx dest = SET_SRC (PATTERN (branch)); - int dest_uid; - - if (GET_CODE (dest) == IF_THEN_ELSE) - dest = XEXP (dest, 1); - dest = XEXP (dest, 0); - dest_uid = INSN_UID (dest); - return insn_addresses[dest_uid]; -} - -/* Return non-zero if REG is not used after INSN. - We assume REG is a reload reg, and therefore does - not live past labels. It may live past calls or jumps though. */ -int -reg_unused_after (reg, insn) - rtx reg; - rtx insn; -{ - enum rtx_code code; - rtx set; - - /* If the reg is set by this instruction, then it is safe for our - case. Disregard the case where this is a store to memory, since - we are checking a register used in the store address. */ - set = single_set (insn); - if (set && GET_CODE (SET_DEST (set)) != MEM - && reg_overlap_mentioned_p (reg, SET_DEST (set))) - return 1; - - while (insn = NEXT_INSN (insn)) - { - code = GET_CODE (insn); - -#if 0 - /* If this is a label that existed before reload, then the register - if dead here. However, if this is a label added by reorg, then - the register may still be live here. We can't tell the difference, - so we just ignore labels completely. */ - if (code == CODE_LABEL) - return 1; - /* else */ -#endif - - if (code == JUMP_INSN) - return 0; - - /* If this is a sequence, we must handle them all at once. - We could have for instance a call that sets the target register, - and a insn in a delay slot that uses the register. In this case, - we must return 0. */ - else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) - { - int i; - int retval = 0; - - for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) - { - rtx this_insn = XVECEXP (PATTERN (insn), 0, i); - rtx set = single_set (this_insn); - - if (GET_CODE (this_insn) == CALL_INSN) - code = CALL_INSN; - else if (GET_CODE (this_insn) == JUMP_INSN) - { - if (INSN_ANNULLED_BRANCH_P (this_insn)) - return 0; - code = JUMP_INSN; - } - - if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) - return 0; - if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) - { - if (GET_CODE (SET_DEST (set)) != MEM) - retval = 1; - else - return 0; - } - if (set == 0 - && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) - return 0; - } - if (retval == 1) - return 1; - else if (code == JUMP_INSN) - return 0; - } - else if (GET_RTX_CLASS (code) == 'i') - { - rtx set = single_set (insn); - - if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) - return 0; - if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) - return GET_CODE (SET_DEST (set)) != MEM; - if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) - return 0; - } - - if (code == CALL_INSN && call_used_regs[REGNO (reg)]) - return 1; - } - return 1; -} - -extern struct obstack permanent_obstack; - -rtx -get_fpscr_rtx () -{ - static rtx fpscr_rtx; - - if (! fpscr_rtx) - { - push_obstacks (&permanent_obstack, &permanent_obstack); - fpscr_rtx = gen_rtx (REG, PSImode, 48); - REG_USERVAR_P (fpscr_rtx) = 1; - pop_obstacks (); - mark_user_reg (fpscr_rtx); - } - if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) - mark_user_reg (fpscr_rtx); - return fpscr_rtx; -} - -void -emit_sf_insn (pat) - rtx pat; -{ - rtx addr; - /* When generating reload insns, we must not create new registers. FPSCR - should already have the correct value, so do nothing to change it. */ - if (! TARGET_FPU_SINGLE && ! reload_in_progress) - { - addr = gen_reg_rtx (SImode); - emit_insn (gen_fpu_switch0 (addr)); - } - emit_insn (pat); - if (! TARGET_FPU_SINGLE && ! reload_in_progress) - { - addr = gen_reg_rtx (SImode); - emit_insn (gen_fpu_switch1 (addr)); - } -} - -void -emit_df_insn (pat) - rtx pat; -{ - rtx addr; - if (TARGET_FPU_SINGLE && ! reload_in_progress) - { - addr = gen_reg_rtx (SImode); - emit_insn (gen_fpu_switch0 (addr)); - } - emit_insn (pat); - if (TARGET_FPU_SINGLE && ! reload_in_progress) - { - addr = gen_reg_rtx (SImode); - emit_insn (gen_fpu_switch1 (addr)); - } -} - -void -expand_sf_unop (fun, operands) - rtx (*fun)(); - rtx *operands; -{ - emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); -} - -void -expand_sf_binop (fun, operands) - rtx (*fun)(); - rtx *operands; -{ - emit_sf_insn ((*fun) (operands[0], operands[1], operands[2], - get_fpscr_rtx ())); -} - -void -expand_df_unop (fun, operands) - rtx (*fun)(); - rtx *operands; -{ - emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); -} - -void -expand_df_binop (fun, operands) - rtx (*fun)(); - rtx *operands; -{ - emit_df_insn ((*fun) (operands[0], operands[1], operands[2], - get_fpscr_rtx ())); -} - -void -expand_fp_branch (compare, branch) - rtx (*compare) (), (*branch) (); -{ - (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn) - ((*compare) ()); - emit_jump_insn ((*branch) ()); -} - -/* We don't want to make fpscr call-saved, because that would prevent - channging it, and it would also cost an exstra instruction to save it. - We don't want it to be known as a global register either, because - that disables all flow analysis. But it has to be live at the function - return. Thus, we need to insert a USE at the end of the function. */ -/* This should best be called at about the time FINALIZE_PIC is called, - but not dependent on flag_pic. Alas, there is no suitable hook there, - so this gets called from HAVE_RETURN. */ -int -emit_fpscr_use () -{ - static int fpscr_uses = 0; - - if (rtx_equal_function_value_matters) - { - emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ())); - fpscr_uses++; - } - else - { - if (fpscr_uses > 1) - { - /* Due to he crude way we emit the USEs, we might end up with - some extra ones. Delete all but the last one. */ - rtx insn; - - for (insn = get_last_insn(); insn; insn = PREV_INSN (insn)) - if (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == USE - && GET_CODE (XEXP (PATTERN (insn), 0)) == REG - && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) - { - insn = PREV_INSN (insn); - break; - } - for (; insn; insn = PREV_INSN (insn)) - if (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == USE - && GET_CODE (XEXP (PATTERN (insn), 0)) == REG - && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) - { - PUT_CODE (insn, NOTE); - NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (insn) = 0; - } - } - fpscr_uses = 0; - } -} - -/* ??? gcc does flow analysis strictly after common subexpression - elimination. As a result, common subespression elimination fails - when there are some intervening statements setting the same register. - If we did nothing about this, this would hurt the precision switching - for SH4 badly. There is some cse after reload, but it is unable to - undo the extra register pressure from the unused instructions, and - it cannot remove auto-increment loads. - - A C code example that shows this flow/cse weakness for (at least) SH - and sparc (as of gcc ss-970706) is this: - -double -f(double a) -{ - double d; - d = 0.1; - a += d; - d = 1.1; - d = 0.1; - a *= d; - return a; -} - - So we add another pass before common subexpression elimination, to - remove assignments that are dead due to a following assignment in the - same basic block. */ - -int sh_flag_remove_dead_before_cse; - -static void -mark_use (x, reg_set_block) - rtx x, *reg_set_block; -{ - enum rtx_code code; - - if (! x) - return; - code = GET_CODE (x); - switch (code) - { - case REG: - { - int regno = REGNO (x); - int nregs = (regno < FIRST_PSEUDO_REGISTER - ? HARD_REGNO_NREGS (regno, GET_MODE (x)) - : 1); - do - { - reg_set_block[regno + nregs - 1] = 0; - } - while (--nregs); - break; - } - case SET: - { - rtx dest = SET_DEST (x); - - if (GET_CODE (dest) == SUBREG) - dest = SUBREG_REG (dest); - if (GET_CODE (dest) != REG) - mark_use (dest, reg_set_block); - mark_use (SET_SRC (x), reg_set_block); - break; - } - case CLOBBER: - break; - default: - { - char *fmt = GET_RTX_FORMAT (code); - int i, j; - for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) - { - if (fmt[i] == 'e') - mark_use (XEXP (x, i), reg_set_block); - else if (fmt[i] == 'E') - for (j = XVECLEN (x, i) - 1; j >= 0; j--) - mark_use (XVECEXP (x, i, j), reg_set_block); - } - break; - } - } -} - -int -remove_dead_before_cse () -{ - rtx *reg_set_block, last, last_call, insn, set; - int in_libcall = 0; - - /* This pass should run just once, after rtl generation. */ - - if (! sh_flag_remove_dead_before_cse - || rtx_equal_function_value_matters - || reload_completed) - return; - - sh_flag_remove_dead_before_cse = 0; - - reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx)); - bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx)); - last_call = last = get_last_insn (); - for (insn = last; insn; insn = PREV_INSN (insn)) - { - if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') - continue; - if (GET_CODE (insn) == JUMP_INSN) - { - last_call = last = insn; - continue; - } - set = single_set (insn); - - /* Don't delete parts of libcalls, since that would confuse cse, loop - and flow. */ - if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) - in_libcall = 1; - else if (in_libcall) - { - if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) - in_libcall = 0; - } - else if (set && GET_CODE (SET_DEST (set)) == REG) - { - int regno = REGNO (SET_DEST (set)); - rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno] - ? last_call - : last); - if (reg_set_block[regno] == ref_insn - && (regno >= FIRST_PSEUDO_REGISTER - || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1) - && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn))) - { - PUT_CODE (insn, NOTE); - NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (insn) = 0; - continue; - } - else - reg_set_block[REGNO (SET_DEST (set))] = ref_insn; - } - if (GET_CODE (insn) == CALL_INSN) - { - last_call = insn; - mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block); - } - mark_use (PATTERN (insn), reg_set_block); - } - return 0; -} |