diff options
Diffstat (limited to 'gcc/config/sh')
-rwxr-xr-x | gcc/config/sh/elf.h | 123 | ||||
-rwxr-xr-x | gcc/config/sh/lib1funcs.asm | 1206 | ||||
-rwxr-xr-x | gcc/config/sh/rtems.h | 35 | ||||
-rwxr-xr-x | gcc/config/sh/rtemself.h | 33 | ||||
-rwxr-xr-x | gcc/config/sh/sh.c | 4786 | ||||
-rwxr-xr-x | gcc/config/sh/sh.h | 2232 | ||||
-rwxr-xr-x | gcc/config/sh/sh.md | 4654 | ||||
-rwxr-xr-x | gcc/config/sh/t-sh | 29 | ||||
-rwxr-xr-x | gcc/config/sh/xm-sh.h | 42 |
9 files changed, 13140 insertions, 0 deletions
diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h new file mode 100755 index 0000000..68cc691 --- /dev/null +++ b/gcc/config/sh/elf.h @@ -0,0 +1,123 @@ +/* Definitions of target machine for gcc for Hitachi Super-H using ELF. + Copyright (C) 1996, 1997 Free Software Foundation, Inc. + Contributed by Ian Lance Taylor <ian@cygnus.com>. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Mostly like the regular SH configuration. */ +#include "sh/sh.h" + +/* No SDB debugging info. */ +#undef SDB_DEBUGGING_INFO + +/* Undefine some macros defined in both sh.h and svr4.h. */ +#undef IDENT_ASM_OP +#undef ASM_FILE_END +#undef ASM_OUTPUT_SOURCE_LINE +#undef DBX_OUTPUT_MAIN_SOURCE_FILE_END +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP +#undef ASM_OUTPUT_SECTION_NAME +#undef ASM_OUTPUT_CONSTRUCTOR +#undef ASM_OUTPUT_DESTRUCTOR +#undef ASM_DECLARE_FUNCTION_NAME +#undef PREFERRED_DEBUGGING_TYPE +#undef MAX_OFILE_ALIGNMENT + +/* Be ELF-like. */ +#include "svr4.h" + +/* The prefix to add to user-visible assembler symbols. + Note that svr4.h redefined it from the original value (that we want) + in sh.h */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#undef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." + +#undef ASM_FILE_START +#define ASM_FILE_START(FILE) do { \ + output_file_directive ((FILE), main_input_filename); \ + if (TARGET_LITTLE_ENDIAN) \ + fprintf ((FILE), "\t.little\n"); \ +} while (0) + + + +/* Let code know that this is ELF. */ +#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Acpu(sh) -Amachine(sh)" + +/* Pass -ml and -mrelax to the assembler and linker. */ +#undef ASM_SPEC +#define ASM_SPEC "%{ml:-little} %{mrelax:-relax}" + +#undef LINK_SPEC +#define LINK_SPEC "%{ml:-m shlelf} %{mrelax:-relax}" + +/* svr4.h undefined DBX_REGISTER_NUMBER, so we need to define it + again. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + (((REGNO) >= 22 && (REGNO) <= 39) ? ((REGNO) + 1) : (REGNO)) + +/* SH ELF, unlike most ELF implementations, uses underscores before + symbol names. */ +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM,NAME) \ + asm_fprintf (STREAM, "%U%s", NAME) + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \ + sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM)) + +#undef ASM_OUTPUT_INTERNAL_LABEL +#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ + asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM)) + +#undef ASM_OUTPUT_SOURCE_LINE +#define ASM_OUTPUT_SOURCE_LINE(file, line) \ +do \ + { \ + static int sym_lineno = 1; \ + asm_fprintf ((file), ".stabn 68,0,%d,%LLM%d-", \ + (line), sym_lineno); \ + assemble_name ((file), \ + XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));\ + asm_fprintf ((file), "\n%LLM%d:\n", sym_lineno); \ + sym_lineno += 1; \ + } \ +while (0) + +#undef DBX_OUTPUT_MAIN_SOURCE_FILE_END +#define DBX_OUTPUT_MAIN_SOURCE_FILE_END(FILE, FILENAME) \ +do { \ + text_section (); \ + fprintf ((FILE), "\t.stabs \"\",%d,0,0,Letext\nLetext:\n", N_SO); \ +} while (0) + +/* Arrange to call __main, rather than using crtbegin.o and crtend.o + and relying on .init and .fini being executed at appropriate times. */ +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#undef STARTFILE_SPEC +#undef ENDFILE_SPEC + +/* HANDLE_SYSV_PRAGMA (defined by svr4.h) takes precedence over HANDLE_PRAGMA. + We want to use the HANDLE_PRAGMA from sh.h. */ +#undef HANDLE_SYSV_PRAGMA diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm new file mode 100755 index 0000000..bf9ea9a --- /dev/null +++ b/gcc/config/sh/lib1funcs.asm @@ -0,0 +1,1206 @@ +/* Copyright (C) 1994, 1995, 1997, 1998 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file with other programs, and to distribute +those programs without any restriction coming from the use of this +file. (The General Public License restrictions do apply in other +respects; for example, they cover modification of the file, and +distribution when not linked into another program.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you link this library with other files, + some of which are compiled with GCC, to produce an executable, + this library does not by itself cause the resulting executable + to be covered by the GNU General Public License. + This exception does not however invalidate any other reasons why + the executable file might be covered by the GNU General Public License. */ + + +!! libgcc1 routines for the Hitachi SH cpu. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +#ifdef __ELF__ +#define LOCAL(X) .L_##X +#else +#define LOCAL(X) L_##X +#endif + +#ifdef L_ashiftrt + .global ___ashiftrt_r4_0 + .global ___ashiftrt_r4_1 + .global ___ashiftrt_r4_2 + .global ___ashiftrt_r4_3 + .global ___ashiftrt_r4_4 + .global ___ashiftrt_r4_5 + .global ___ashiftrt_r4_6 + .global ___ashiftrt_r4_7 + .global ___ashiftrt_r4_8 + .global ___ashiftrt_r4_9 + .global ___ashiftrt_r4_10 + .global ___ashiftrt_r4_11 + .global ___ashiftrt_r4_12 + .global ___ashiftrt_r4_13 + .global ___ashiftrt_r4_14 + .global ___ashiftrt_r4_15 + .global ___ashiftrt_r4_16 + .global ___ashiftrt_r4_17 + .global ___ashiftrt_r4_18 + .global ___ashiftrt_r4_19 + .global ___ashiftrt_r4_20 + .global ___ashiftrt_r4_21 + .global ___ashiftrt_r4_22 + .global ___ashiftrt_r4_23 + .global ___ashiftrt_r4_24 + .global ___ashiftrt_r4_25 + .global ___ashiftrt_r4_26 + .global ___ashiftrt_r4_27 + .global ___ashiftrt_r4_28 + .global ___ashiftrt_r4_29 + .global ___ashiftrt_r4_30 + .global ___ashiftrt_r4_31 + .global ___ashiftrt_r4_32 + + .align 1 +___ashiftrt_r4_32: +___ashiftrt_r4_31: + rotcl r4 + rts + subc r4,r4 + +___ashiftrt_r4_30: + shar r4 +___ashiftrt_r4_29: + shar r4 +___ashiftrt_r4_28: + shar r4 +___ashiftrt_r4_27: + shar r4 +___ashiftrt_r4_26: + shar r4 +___ashiftrt_r4_25: + shar r4 +___ashiftrt_r4_24: + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 + +___ashiftrt_r4_23: + shar r4 +___ashiftrt_r4_22: + shar r4 +___ashiftrt_r4_21: + shar r4 +___ashiftrt_r4_20: + shar r4 +___ashiftrt_r4_19: + shar r4 +___ashiftrt_r4_18: + shar r4 +___ashiftrt_r4_17: + shar r4 +___ashiftrt_r4_16: + shlr16 r4 + rts + exts.w r4,r4 + +___ashiftrt_r4_15: + shar r4 +___ashiftrt_r4_14: + shar r4 +___ashiftrt_r4_13: + shar r4 +___ashiftrt_r4_12: + shar r4 +___ashiftrt_r4_11: + shar r4 +___ashiftrt_r4_10: + shar r4 +___ashiftrt_r4_9: + shar r4 +___ashiftrt_r4_8: + shar r4 +___ashiftrt_r4_7: + shar r4 +___ashiftrt_r4_6: + shar r4 +___ashiftrt_r4_5: + shar r4 +___ashiftrt_r4_4: + shar r4 +___ashiftrt_r4_3: + shar r4 +___ashiftrt_r4_2: + shar r4 +___ashiftrt_r4_1: + rts + shar r4 + +___ashiftrt_r4_0: + rts + nop +#endif + +#ifdef L_ashiftrt_n + +! +! ___ashrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + + .global ___ashrsi3 + .align 2 +___ashrsi3: + mov #31,r0 + and r0,r5 + mova LOCAL(ashrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashrsi3_table): + .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) + +LOCAL(ashrsi3_31): + rotcl r0 + rts + subc r0,r0 + +LOCAL(ashrsi3_30): + shar r0 +LOCAL(ashrsi3_29): + shar r0 +LOCAL(ashrsi3_28): + shar r0 +LOCAL(ashrsi3_27): + shar r0 +LOCAL(ashrsi3_26): + shar r0 +LOCAL(ashrsi3_25): + shar r0 +LOCAL(ashrsi3_24): + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +LOCAL(ashrsi3_23): + shar r0 +LOCAL(ashrsi3_22): + shar r0 +LOCAL(ashrsi3_21): + shar r0 +LOCAL(ashrsi3_20): + shar r0 +LOCAL(ashrsi3_19): + shar r0 +LOCAL(ashrsi3_18): + shar r0 +LOCAL(ashrsi3_17): + shar r0 +LOCAL(ashrsi3_16): + shlr16 r0 + rts + exts.w r0,r0 + +LOCAL(ashrsi3_15): + shar r0 +LOCAL(ashrsi3_14): + shar r0 +LOCAL(ashrsi3_13): + shar r0 +LOCAL(ashrsi3_12): + shar r0 +LOCAL(ashrsi3_11): + shar r0 +LOCAL(ashrsi3_10): + shar r0 +LOCAL(ashrsi3_9): + shar r0 +LOCAL(ashrsi3_8): + shar r0 +LOCAL(ashrsi3_7): + shar r0 +LOCAL(ashrsi3_6): + shar r0 +LOCAL(ashrsi3_5): + shar r0 +LOCAL(ashrsi3_4): + shar r0 +LOCAL(ashrsi3_3): + shar r0 +LOCAL(ashrsi3_2): + shar r0 +LOCAL(ashrsi3_1): + rts + shar r0 + +LOCAL(ashrsi3_0): + rts + nop + +#endif + +#ifdef L_ashiftlt + +! +! ___ashlsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global ___ashlsi3 + .align 2 +___ashlsi3: + mov #31,r0 + and r0,r5 + mova LOCAL(ashlsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashlsi3_table): + .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) + .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) + +LOCAL(ashlsi3_6): + shll2 r0 +LOCAL(ashlsi3_4): + shll2 r0 +LOCAL(ashlsi3_2): + rts + shll2 r0 + +LOCAL(ashlsi3_7): + shll2 r0 +LOCAL(ashlsi3_5): + shll2 r0 +LOCAL(ashlsi3_3): + shll2 r0 +LOCAL(ashlsi3_1): + rts + shll r0 + +LOCAL(ashlsi3_14): + shll2 r0 +LOCAL(ashlsi3_12): + shll2 r0 +LOCAL(ashlsi3_10): + shll2 r0 +LOCAL(ashlsi3_8): + rts + shll8 r0 + +LOCAL(ashlsi3_15): + shll2 r0 +LOCAL(ashlsi3_13): + shll2 r0 +LOCAL(ashlsi3_11): + shll2 r0 +LOCAL(ashlsi3_9): + shll8 r0 + rts + shll r0 + +LOCAL(ashlsi3_22): + shll2 r0 +LOCAL(ashlsi3_20): + shll2 r0 +LOCAL(ashlsi3_18): + shll2 r0 +LOCAL(ashlsi3_16): + rts + shll16 r0 + +LOCAL(ashlsi3_23): + shll2 r0 +LOCAL(ashlsi3_21): + shll2 r0 +LOCAL(ashlsi3_19): + shll2 r0 +LOCAL(ashlsi3_17): + shll16 r0 + rts + shll r0 + +LOCAL(ashlsi3_30): + shll2 r0 +LOCAL(ashlsi3_28): + shll2 r0 +LOCAL(ashlsi3_26): + shll2 r0 +LOCAL(ashlsi3_24): + shll16 r0 + rts + shll8 r0 + +LOCAL(ashlsi3_31): + shll2 r0 +LOCAL(ashlsi3_29): + shll2 r0 +LOCAL(ashlsi3_27): + shll2 r0 +LOCAL(ashlsi3_25): + shll16 r0 + shll8 r0 + rts + shll r0 + +LOCAL(ashlsi3_0): + rts + nop + +#endif + +#ifdef L_lshiftrt + +! +! ___lshrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global ___lshrsi3 + .align 2 +___lshrsi3: + mov #31,r0 + and r0,r5 + mova LOCAL(lshrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(lshrsi3_table): + .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) + .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) + +LOCAL(lshrsi3_6): + shlr2 r0 +LOCAL(lshrsi3_4): + shlr2 r0 +LOCAL(lshrsi3_2): + rts + shlr2 r0 + +LOCAL(lshrsi3_7): + shlr2 r0 +LOCAL(lshrsi3_5): + shlr2 r0 +LOCAL(lshrsi3_3): + shlr2 r0 +LOCAL(lshrsi3_1): + rts + shlr r0 + +LOCAL(lshrsi3_14): + shlr2 r0 +LOCAL(lshrsi3_12): + shlr2 r0 +LOCAL(lshrsi3_10): + shlr2 r0 +LOCAL(lshrsi3_8): + rts + shlr8 r0 + +LOCAL(lshrsi3_15): + shlr2 r0 +LOCAL(lshrsi3_13): + shlr2 r0 +LOCAL(lshrsi3_11): + shlr2 r0 +LOCAL(lshrsi3_9): + shlr8 r0 + rts + shlr r0 + +LOCAL(lshrsi3_22): + shlr2 r0 +LOCAL(lshrsi3_20): + shlr2 r0 +LOCAL(lshrsi3_18): + shlr2 r0 +LOCAL(lshrsi3_16): + rts + shlr16 r0 + +LOCAL(lshrsi3_23): + shlr2 r0 +LOCAL(lshrsi3_21): + shlr2 r0 +LOCAL(lshrsi3_19): + shlr2 r0 +LOCAL(lshrsi3_17): + shlr16 r0 + rts + shlr r0 + +LOCAL(lshrsi3_30): + shlr2 r0 +LOCAL(lshrsi3_28): + shlr2 r0 +LOCAL(lshrsi3_26): + shlr2 r0 +LOCAL(lshrsi3_24): + shlr16 r0 + rts + shlr8 r0 + +LOCAL(lshrsi3_31): + shlr2 r0 +LOCAL(lshrsi3_29): + shlr2 r0 +LOCAL(lshrsi3_27): + shlr2 r0 +LOCAL(lshrsi3_25): + shlr16 r0 + shlr8 r0 + rts + shlr r0 + +LOCAL(lshrsi3_0): + rts + nop + +#endif + +#ifdef L_movstr + .text +! done all the large groups, do the remainder + +! jump to movstr+ +done: + add #64,r5 + mova ___movstrSI0,r0 + shll2 r6 + add r6,r0 + jmp @r0 + add #64,r4 + .align 4 + .global ___movstrSI64 +___movstrSI64: + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global ___movstrSI60 +___movstrSI60: + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global ___movstrSI56 +___movstrSI56: + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global ___movstrSI52 +___movstrSI52: + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global ___movstrSI48 +___movstrSI48: + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global ___movstrSI44 +___movstrSI44: + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global ___movstrSI40 +___movstrSI40: + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global ___movstrSI36 +___movstrSI36: + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global ___movstrSI32 +___movstrSI32: + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global ___movstrSI28 +___movstrSI28: + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global ___movstrSI24 +___movstrSI24: + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global ___movstrSI20 +___movstrSI20: + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global ___movstrSI16 +___movstrSI16: + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global ___movstrSI12 +___movstrSI12: + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global ___movstrSI8 +___movstrSI8: + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global ___movstrSI4 +___movstrSI4: + mov.l @(0,r5),r0 + mov.l r0,@(0,r4) +___movstrSI0: + rts + or r0,r0,r0 + + .align 4 + + .global ___movstr +___movstr: + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + + mov.l @(0,r5),r0 + mov.l r0,@(0,r4) + + add #-16,r6 + cmp/pl r6 + bf done + + add #64,r5 + bra ___movstr + add #64,r4 +#endif + +#ifdef L_movstr_i4 +#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) + .text + .global ___movstr_i4_even + .global ___movstr_i4_odd + .global ___movstrSI12_i4 + + .p2align 5 +L_movstr_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +___movstr_i4_odd: + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movstr_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movstr_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movstr_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movstr_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + +___movstr_i4_even: + mov.l @r5+,r0 + bra L_movstr_start_even + mov.l @r5+,r1 + + .p2align 4 +___movstrSI12_i4: + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) +#endif /* ! __SH4__ */ +#endif + +#ifdef L_mulsi3 + + + .global ___mulsi3 + +! r4 = aabb +! r5 = ccdd +! r0 = aabb*ccdd via partial products +! +! if aa == 0 and cc = 0 +! r0 = bb*dd +! +! else +! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) +! + +___mulsi3: + mulu r4,r5 ! multiply the lsws macl=bb*dd + mov r5,r3 ! r3 = ccdd + swap.w r4,r2 ! r2 = bbaa + xtrct r2,r3 ! r3 = aacc + tst r3,r3 ! msws zero ? + bf hiset + rts ! yes - then we have the answer + sts macl,r0 + +hiset: sts macl,r0 ! r0 = bb*dd + mulu r2,r5 | brewing macl = aa*dd + sts macl,r1 + mulu r3,r4 | brewing macl = cc*bb + sts macl,r2 + add r1,r2 + shll16 r2 + rts + add r2,r0 + + +#endif +#ifdef L_sdivsi3_i4 + .title "SH DIVIDE" +!! 4 byte integer Divide code for the Hitachi SH +#ifdef __SH4__ +!! args in r4 and r5, result in fpul, clobber dr0, dr2 + + .global ___sdivsi3_i4 +___sdivsi3_i4: + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + +#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 + + .global ___sdivsi3_i4 +___sdivsi3_i4: + sts.l fpscr,@-r15 + mov #8,r2 + swap.w r2,r2 + lds r2,fpscr + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + +#endif /* ! __SH4__ */ +#endif + +#ifdef L_sdivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh3e code. */ +#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__) +!! +!! Steve Chamberlain +!! sac@cygnus.com +!! +!! + +!! args in r4 and r5, result in r0 clobber r1,r2,r3 + + .global ___sdivsi3 +___sdivsi3: + mov r4,r1 + mov r5,r0 + + tst r0,r0 + bt div0 + mov #0,r2 + div0s r2,r1 + subc r3,r3 + subc r2,r1 + div0s r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + addc r2,r1 + rts + mov r1,r0 + + +div0: rts + mov #0,r0 + +#endif /* ! __SH4__ */ +#endif +#ifdef L_udivsi3_i4 + + .title "SH DIVIDE" +!! 4 byte integer Divide code for the Hitachi SH +#ifdef __SH4__ +!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 + + .global ___udivsi3_i4 +___udivsi3_i4: + mov #1,r1 + cmp/hi r1,r5 + bf trivial + rotr r1 + xor r1,r4 + lds r4,fpul + mova L1,r0 +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else +#ifdef __LITTLE_ENDIAN__ + fmov.s @r0+,fr5 + fmov.s @r0,fr4 +#else + fmov.s @r0+,fr4 + fmov.s @r0,fr5 +#endif +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + +trivial: + rts + lds r4,fpul + + .align 2 +L1: + .double 2147483648 + +#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 + + .global ___udivsi3_i4 +___udivsi3_i4: + mov #1,r1 + cmp/hi r1,r5 + bf trivial + sts.l fpscr,@-r15 + mova L1,r0 + lds.l @r0+,fpscr + rotr r1 + xor r1,r4 + lds r4,fpul +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else +#ifdef __LITTLE_ENDIAN__ + fmov.s @r0+,fr5 + fmov.s @r0,fr4 +#else + fmov.s @r0+,fr4 + fmov.s @r0,fr5 +#endif +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + +trivial: + rts + lds r4,fpul + + .align 2 +L1: +#if defined (__LITTLE_ENDIAN__) || ! defined (FMOVD_WORKS) + .long 0x80000 +#else + .long 0x180000 +#endif + .double 2147483648 + +#endif /* ! __SH4__ */ +#endif + +#ifdef L_udivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh3e code. */ +#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__) +!! +!! Steve Chamberlain +!! sac@cygnus.com +!! +!! + +!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit + .global ___udivsi3 + +___udivsi3: +longway: + mov #0,r0 + div0u + ! get one bit from the msb of the numerator into the T + ! bit and divide it by whats in r5. Put the answer bit + ! into the T bit so it can come out again at the bottom + + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 +shortway: + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + +vshortway: + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 ; div1 r5,r0 + rotcl r4 +ret: rts + mov r4,r0 + +#endif /* __SH4__ */ +#endif +#ifdef L_set_fpscr +#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) + .global ___set_fpscr +___set_fpscr: + lds r4,fpscr + mov.l ___set_fpscr_L1,r1 + swap.w r4,r0 + or #24,r0 +#ifndef FMOVD_WORKS + xor #16,r0 +#endif +#if defined(__SH4__) + swap.w r0,r3 + mov.l r3,@(4,r1) +#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r2 + mov.l r2,@r1 +#endif +#ifndef FMOVD_WORKS + xor #8,r0 +#else + xor #24,r0 +#endif +#if defined(__SH4__) + swap.w r0,r2 + rts + mov.l r2,@r1 +#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r3 + rts + mov.l r3,@(4,r1) +#endif + .align 2 +___set_fpscr_L1: + .long ___fpscr_values +#ifdef __ELF__ + .comm ___fpscr_values,8,4 +#else + .comm ___fpscr_values,8 +#endif /* ELF */ +#endif /* SH3E / SH4 */ +#endif /* L_set_fpscr */ diff --git a/gcc/config/sh/rtems.h b/gcc/config/sh/rtems.h new file mode 100755 index 0000000..3e3fc7b --- /dev/null +++ b/gcc/config/sh/rtems.h @@ -0,0 +1,35 @@ +/* Definitions for rtems targeting a SH using COFF. + Copyright (C) 1997 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include "sh/sh.h" + +/* Specify predefined symbols in preprocessor. */ + +#undef CPP_PREDEFINES +#define CPP_PREDEFINES "-D__sh__ -Drtems -D__rtems__ \ + -Asystem(rtems) -Acpu(sh) -Amachine(sh)" + +/* Generate calls to memcpy, memcmp and memset. */ +#ifndef TARGET_MEM_FUNCTIONS +#define TARGET_MEM_FUNCTIONS +#endif + +/* end of sh/rtems.h */ diff --git a/gcc/config/sh/rtemself.h b/gcc/config/sh/rtemself.h new file mode 100755 index 0000000..8000a3a --- /dev/null +++ b/gcc/config/sh/rtemself.h @@ -0,0 +1,33 @@ +/* Definitions for rtems targeting a SH using elf. + Copyright (C) 1997 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include "sh/elf.h" + +/* Specify predefined symbols in preprocessor. */ + +#undef CPP_PREDEFINES +#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Drtems -D__rtems__ \ + -Asystem(rtems) -Acpu(sh) -Amachine(sh)" + +/* Generate calls to memcpy, memcmp and memset. */ +#ifndef TARGET_MEM_FUNCTIONS +#define TARGET_MEM_FUNCTIONS +#endif diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c new file mode 100755 index 0000000..4d4b5cd --- /dev/null +++ b/gcc/config/sh/sh.c @@ -0,0 +1,4786 @@ +/* Output routines for GCC for Hitachi Super-H. + Copyright (C) 1993-1998 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Contributed by Steve Chamberlain (sac@cygnus.com). + Improved by Jim Wilson (wilson@cygnus.com). */ + +#include "config.h" + +#include <stdio.h> + +#include "rtl.h" +#include "tree.h" +#include "flags.h" +#include "insn-flags.h" +#include "expr.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "output.h" +#include "insn-attr.h" + +int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; + +#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) +#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) + +/* ??? The pragma interrupt support will not work for SH3. */ +/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to + output code for the next function appropriate for an interrupt handler. */ +int pragma_interrupt; + +/* This is set by the trap_exit attribute for functions. It specifies + a trap number to be used in a trapa instruction at function exit + (instead of an rte instruction). */ +int trap_exit; + +/* This is used by the sp_switch attribute for functions. It specifies + a variable holding the address of the stack the interrupt function + should switch to/from at entry/exit. */ +rtx sp_switch; + +/* This is set by #pragma trapa, and is similar to the above, except that + the compiler doesn't emit code to preserve all registers. */ +static int pragma_trapa; + +/* This is set by #pragma nosave_low_regs. This is useful on the SH3, + which has a separate set of low regs for User and Supervisor modes. + This should only be used for the lowest level of interrupts. Higher levels + of interrupts must save the registers in case they themselves are + interrupted. */ +int pragma_nosave_low_regs; + +/* This is used for communication between SETUP_INCOMING_VARARGS and + sh_expand_prologue. */ +int current_function_anonymous_args; + +/* Global variables from toplev.c and final.c that are used within, but + not declared in any header file. */ +extern char *version_string; +extern int *insn_addresses; + +/* Global variables for machine-dependent things. */ + +/* Which cpu are we scheduling for. */ +enum processor_type sh_cpu; + +/* Saved operands from the last compare to use when we generate an scc + or bcc insn. */ + +rtx sh_compare_op0; +rtx sh_compare_op1; + +enum machine_mode sh_addr_diff_vec_mode; + +/* Provides the class number of the smallest class containing + reg number. */ + +int regno_reg_class[FIRST_PSEUDO_REGISTER] = +{ + R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, + GENERAL_REGS, PR_REGS, T_REGS, NO_REGS, + MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS, + FP0_REGS,FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + DF_REGS, DF_REGS, DF_REGS, DF_REGS, + DF_REGS, DF_REGS, DF_REGS, DF_REGS, + FPSCR_REGS, +}; + +char fp_reg_names[][5] = +{ + "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", + "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", + "fpul", + "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14", +}; + +/* Provide reg_class from a letter such as appears in the machine + description. */ + +enum reg_class reg_class_from_letter[] = +{ + /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS, + /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS, + /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS, + /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS, + /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS, + /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS, + /* y */ FPUL_REGS, /* z */ R0_REGS +}; + +int assembler_dialect; + +rtx get_fpscr_rtx (); +void emit_sf_insn (); +void emit_df_insn (); + +static void split_branches PROTO ((rtx)); + +/* Print the operand address in x to the stream. */ + +void +print_operand_address (stream, x) + FILE *stream; + rtx x; +{ + switch (GET_CODE (x)) + { + case REG: + case SUBREG: + fprintf (stream, "@%s", reg_names[true_regnum (x)]); + break; + + case PLUS: + { + rtx base = XEXP (x, 0); + rtx index = XEXP (x, 1); + + switch (GET_CODE (index)) + { + case CONST_INT: + fprintf (stream, "@(%d,%s)", INTVAL (index), + reg_names[true_regnum (base)]); + break; + + case REG: + case SUBREG: + { + int base_num = true_regnum (base); + int index_num = true_regnum (index); + + fprintf (stream, "@(r0,%s)", + reg_names[MAX (base_num, index_num)]); + break; + } + + default: + debug_rtx (x); + abort (); + } + } + break; + + case PRE_DEC: + fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); + break; + + case POST_INC: + fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); + break; + + default: + output_addr_const (stream, x); + break; + } +} + +/* Print operand x (an rtx) in assembler syntax to file stream + according to modifier code. + + '.' print a .s if insn needs delay slot + ',' print LOCAL_LABEL_PREFIX + '@' print trap, rte or rts depending upon pragma interruptness + '#' output a nop if there is nothing to put in the delay slot + 'O' print a constant without the # + 'R' print the LSW of a dp value - changes if in little endian + 'S' print the MSW of a dp value - changes if in little endian + 'T' print the next word of a dp value - same as 'R' in big endian mode. + 'o' output an operator. */ + +void +print_operand (stream, x, code) + FILE *stream; + rtx x; + int code; +{ + switch (code) + { + case '.': + if (final_sequence + && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) + fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); + break; + case ',': + fprintf (stream, "%s", LOCAL_LABEL_PREFIX); + break; + case '@': + { + int interrupt_handler; + + if ((lookup_attribute + ("interrupt_handler", + DECL_MACHINE_ATTRIBUTES (current_function_decl))) + != NULL_TREE) + interrupt_handler = 1; + else + interrupt_handler = 0; + + if (trap_exit) + fprintf (stream, "trapa #%d", trap_exit); + else if (interrupt_handler) + fprintf (stream, "rte"); + else + fprintf (stream, "rts"); + break; + } + case '#': + /* Output a nop if there's nothing in the delay slot. */ + if (dbr_sequence_length () == 0) + fprintf (stream, "\n\tnop"); + break; + case 'O': + output_addr_const (stream, x); + break; + case 'R': + fputs (reg_names[REGNO (x) + LSW], (stream)); + break; + case 'S': + fputs (reg_names[REGNO (x) + MSW], (stream)); + break; + case 'T': + /* Next word of a double. */ + switch (GET_CODE (x)) + { + case REG: + fputs (reg_names[REGNO (x) + 1], (stream)); + break; + case MEM: + if (GET_CODE (XEXP (x, 0)) != PRE_DEC + && GET_CODE (XEXP (x, 0)) != POST_INC) + x = adj_offsettable_operand (x, 4); + print_operand_address (stream, XEXP (x, 0)); + break; + } + break; + case 'o': + switch (GET_CODE (x)) + { + case PLUS: fputs ("add", stream); break; + case MINUS: fputs ("sub", stream); break; + case MULT: fputs ("mul", stream); break; + case DIV: fputs ("div", stream); break; + } + break; + default: + switch (GET_CODE (x)) + { + case REG: + if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG + && GET_MODE_SIZE (GET_MODE (x)) > 4) + fprintf ((stream), "d%s", reg_names[REGNO (x)]+1); + else + fputs (reg_names[REGNO (x)], (stream)); + break; + case MEM: + output_address (XEXP (x, 0)); + break; + default: + fputc ('#', stream); + output_addr_const (stream, x); + break; + } + break; + } +} + +/* Emit code to perform a block move. Choose the best method. + + OPERANDS[0] is the destination. + OPERANDS[1] is the source. + OPERANDS[2] is the size. + OPERANDS[3] is the alignment safe to use. */ + +int +expand_block_move (operands) + rtx *operands; +{ + int align = INTVAL (operands[3]); + int constp = (GET_CODE (operands[2]) == CONST_INT); + int bytes = (constp ? INTVAL (operands[2]) : 0); + + /* If it isn't a constant number of bytes, or if it doesn't have 4 byte + alignment, or if it isn't a multiple of 4 bytes, then fail. */ + if (! constp || align < 4 || (bytes % 4 != 0)) + return 0; + + if (TARGET_HARD_SH4) + { + if (bytes < 12) + return 0; + else if (bytes == 12) + { + tree entry_name; + rtx func_addr_rtx; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + + entry_name = get_identifier ("__movstrSI12_i4"); + + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + emit_insn (gen_block_move_real_i4 (func_addr_rtx)); + return 1; + } + else if (! TARGET_SMALLCODE) + { + tree entry_name; + rtx func_addr_rtx; + int dwords; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + rtx r6 = gen_rtx (REG, SImode, 6); + + entry_name = get_identifier (bytes & 4 + ? "__movstr_i4_odd" + : "__movstr_i4_even"); + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + + dwords = bytes >> 3; + emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); + emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); + return 1; + } + else + return 0; + } + if (bytes < 64) + { + char entry[30]; + tree entry_name; + rtx func_addr_rtx; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + + sprintf (entry, "__movstrSI%d", bytes); + entry_name = get_identifier (entry); + + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx (SYMBOL_REF, Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + emit_insn (gen_block_move_real (func_addr_rtx)); + return 1; + } + + /* This is the same number of bytes as a memcpy call, but to a different + less common function name, so this will occasionally use more space. */ + if (! TARGET_SMALLCODE) + { + tree entry_name; + rtx func_addr_rtx; + int final_switch, while_loop; + rtx r4 = gen_rtx (REG, SImode, 4); + rtx r5 = gen_rtx (REG, SImode, 5); + rtx r6 = gen_rtx (REG, SImode, 6); + + entry_name = get_identifier ("__movstr"); + func_addr_rtx + = copy_to_mode_reg (Pmode, + gen_rtx (SYMBOL_REF, Pmode, + IDENTIFIER_POINTER (entry_name))); + emit_insn (gen_move_insn (r4, XEXP (operands[0], 0))); + emit_insn (gen_move_insn (r5, XEXP (operands[1], 0))); + + /* r6 controls the size of the move. 16 is decremented from it + for each 64 bytes moved. Then the negative bit left over is used + as an index into a list of move instructions. e.g., a 72 byte move + would be set up with size(r6) = 14, for one iteration through the + big while loop, and a switch of -2 for the last part. */ + + final_switch = 16 - ((bytes / 4) % 16); + while_loop = ((bytes / 4) / 16 - 1) * 16; + emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); + emit_insn (gen_block_lump_real (func_addr_rtx)); + return 1; + } + + return 0; +} + +/* Prepare operands for a move define_expand; specifically, one of the + operands must be in a register. */ + +int +prepare_move_operands (operands, mode) + rtx operands[]; + enum machine_mode mode; +{ + if (! reload_in_progress && ! reload_completed) + { + /* Copy the source to a register if both operands aren't registers. */ + if (! register_operand (operands[0], mode) + && ! register_operand (operands[1], mode)) + operands[1] = copy_to_mode_reg (mode, operands[1]); + + /* This case can happen while generating code to move the result + of a library call to the target. Reject `st r0,@(rX,rY)' because + reload will fail to find a spill register for rX, since r0 is already + being used for the source. */ + else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0 + && GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG) + operands[1] = copy_to_mode_reg (mode, operands[1]); + } + + return 0; +} + +/* Prepare the operands for an scc instruction; make sure that the + compare has been done. */ +rtx +prepare_scc_operands (code) + enum rtx_code code; +{ + rtx t_reg = gen_rtx (REG, SImode, T_REG); + enum rtx_code oldcode = code; + enum machine_mode mode; + + /* First need a compare insn. */ + switch (code) + { + case NE: + /* It isn't possible to handle this case. */ + abort (); + case LT: + code = GT; + break; + case LE: + code = GE; + break; + case LTU: + code = GTU; + break; + case LEU: + code = GEU; + break; + } + if (code != oldcode) + { + rtx tmp = sh_compare_op0; + sh_compare_op0 = sh_compare_op1; + sh_compare_op1 = tmp; + } + + mode = GET_MODE (sh_compare_op0); + if (mode == VOIDmode) + mode = GET_MODE (sh_compare_op1); + + sh_compare_op0 = force_reg (mode, sh_compare_op0); + if ((code != EQ && code != NE + && (sh_compare_op1 != const0_rtx + || code == GTU || code == GEU || code == LTU || code == LEU)) + || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT) + sh_compare_op1 = force_reg (mode, sh_compare_op1); + + if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) + (mode == SFmode ? emit_sf_insn : emit_df_insn) + (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, + gen_rtx (SET, VOIDmode, t_reg, + gen_rtx (code, SImode, + sh_compare_op0, sh_compare_op1)), + gen_rtx (USE, VOIDmode, get_fpscr_rtx ())))); + else + emit_insn (gen_rtx (SET, VOIDmode, t_reg, + gen_rtx (code, SImode, sh_compare_op0, + sh_compare_op1))); + + return t_reg; +} + +/* Called from the md file, set up the operands of a compare instruction. */ + +void +from_compare (operands, code) + rtx *operands; + int code; +{ + enum machine_mode mode = GET_MODE (sh_compare_op0); + rtx insn; + if (mode == VOIDmode) + mode = GET_MODE (sh_compare_op1); + if (code != EQ + || mode == DImode + || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) + { + /* Force args into regs, since we can't use constants here. */ + sh_compare_op0 = force_reg (mode, sh_compare_op0); + if (sh_compare_op1 != const0_rtx + || code == GTU || code == GEU + || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) + sh_compare_op1 = force_reg (mode, sh_compare_op1); + } + if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE) + { + from_compare (operands, GT); + insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1); + } + else + insn = gen_rtx (SET, VOIDmode, + gen_rtx (REG, SImode, 18), + gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1)); + if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + insn = gen_rtx (PARALLEL, VOIDmode, + gen_rtvec (2, insn, + gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))); + (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); + } + else + emit_insn (insn); +} + +/* Functions to output assembly code. */ + +/* Return a sequence of instructions to perform DI or DF move. + + Since the SH cannot move a DI or DF in one instruction, we have + to take care when we see overlapping source and dest registers. */ + +char * +output_movedouble (insn, operands, mode) + rtx insn; + rtx operands[]; + enum machine_mode mode; +{ + rtx dst = operands[0]; + rtx src = operands[1]; + + if (GET_CODE (dst) == MEM + && GET_CODE (XEXP (dst, 0)) == PRE_DEC) + return "mov.l %T1,%0\n\tmov.l %1,%0"; + + if (register_operand (dst, mode) + && register_operand (src, mode)) + { + if (REGNO (src) == MACH_REG) + return "sts mach,%S0\n\tsts macl,%R0"; + + /* When mov.d r1,r2 do r2->r3 then r1->r2; + when mov.d r1,r0 do r1->r0 then r2->r1. */ + + if (REGNO (src) + 1 == REGNO (dst)) + return "mov %T1,%T0\n\tmov %1,%0"; + else + return "mov %1,%0\n\tmov %T1,%T0"; + } + else if (GET_CODE (src) == CONST_INT) + { + if (INTVAL (src) < 0) + output_asm_insn ("mov #-1,%S0", operands); + else + output_asm_insn ("mov #0,%S0", operands); + + return "mov %1,%R0"; + } + else if (GET_CODE (src) == MEM) + { + int ptrreg = -1; + int dreg = REGNO (dst); + rtx inside = XEXP (src, 0); + + if (GET_CODE (inside) == REG) + ptrreg = REGNO (inside); + else if (GET_CODE (inside) == SUBREG) + ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside); + else if (GET_CODE (inside) == PLUS) + { + ptrreg = REGNO (XEXP (inside, 0)); + /* ??? A r0+REG address shouldn't be possible here, because it isn't + an offsettable address. Unfortunately, offsettable addresses use + QImode to check the offset, and a QImode offsettable address + requires r0 for the other operand, which is not currently + supported, so we can't use the 'o' constraint. + Thus we must check for and handle r0+REG addresses here. + We punt for now, since this is likely very rare. */ + if (GET_CODE (XEXP (inside, 1)) == REG) + abort (); + } + else if (GET_CODE (inside) == LABEL_REF) + return "mov.l %1,%0\n\tmov.l %1+4,%T0"; + else if (GET_CODE (inside) == POST_INC) + return "mov.l %1,%0\n\tmov.l %1,%T0"; + else + abort (); + + /* Work out the safe way to copy. Copy into the second half first. */ + if (dreg == ptrreg) + return "mov.l %T1,%T0\n\tmov.l %1,%0"; + } + + return "mov.l %1,%0\n\tmov.l %T1,%T0"; +} + +/* Print an instruction which would have gone into a delay slot after + another instruction, but couldn't because the other instruction expanded + into a sequence where putting the slot insn at the end wouldn't work. */ + +static void +print_slot (insn) + rtx insn; +{ + final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1); + + INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; +} + +char * +output_far_jump (insn, op) + rtx insn; + rtx op; +{ + struct { rtx lab, reg, op; } this; + char *jump; + int far; + int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)]; + + this.lab = gen_label_rtx (); + + if (TARGET_SH2 + && offset >= -32764 + && offset - get_attr_length (insn) <= 32766) + { + far = 0; + jump = "mov.w %O0,%1;braf %1"; + } + else + { + far = 1; + jump = "mov.l %O0,%1;jmp @%1"; + } + /* If we have a scratch register available, use it. */ + if (GET_CODE (PREV_INSN (insn)) == INSN + && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch) + { + this.reg = SET_DEST (PATTERN (PREV_INSN (insn))); + output_asm_insn (jump, &this.lab); + if (dbr_sequence_length ()) + print_slot (final_sequence); + else + output_asm_insn ("nop", 0); + } + else + { + /* Output the delay slot insn first if any. */ + if (dbr_sequence_length ()) + print_slot (final_sequence); + + this.reg = gen_rtx (REG, SImode, 13); + output_asm_insn ("mov.l r13,@-r15", 0); + output_asm_insn (jump, &this.lab); + output_asm_insn ("mov.l @r15+,r13", 0); + } + if (far) + output_asm_insn (".align 2", 0); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab)); + this.op = op; + output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab); + return ""; +} + +/* Local label counter, used for constants in the pool and inside + pattern branches. */ + +static int lf = 100; + +/* Output code for ordinary branches. */ + +char * +output_branch (logic, insn, operands) + int logic; + rtx insn; + rtx *operands; +{ + switch (get_attr_length (insn)) + { + case 6: + /* This can happen if filling the delay slot has caused a forward + branch to exceed its range (we could reverse it, but only + when we know we won't overextend other branches; this should + best be handled by relaxation). + It can also happen when other condbranches hoist delay slot insn + from their destination, thus leading to code size increase. + But the branch will still be in the range -4092..+4098 bytes. */ + + if (! TARGET_RELAX) + { + int label = lf++; + /* The call to print_slot will clobber the operands. */ + rtx op0 = operands[0]; + + /* If the instruction in the delay slot is annulled (true), then + there is no delay slot where we can put it now. The only safe + place for it is after the label. final will do that by default. */ + + if (final_sequence + && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) + { + asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", + ASSEMBLER_DIALECT ? "/" : ".", label); + print_slot (final_sequence); + } + else + asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); + + output_asm_insn ("bra\t%l0", &op0); + fprintf (asm_out_file, "\tnop\n"); + ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label); + + return ""; + } + /* When relaxing, handle this like a short branch. The linker + will fix it up if it still doesn't fit after relaxation. */ + case 2: + return logic ? "bt%.\t%l0" : "bf%.\t%l0"; + default: + abort (); + } +} + +char * +output_branchy_insn (code, template, insn, operands) + char *template; + enum rtx_code code; + rtx insn; + rtx *operands; +{ + rtx next_insn = NEXT_INSN (insn); + int label_nr; + + if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn)) + { + rtx src = SET_SRC (PATTERN (next_insn)); + if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) + { + /* Following branch not taken */ + operands[9] = gen_label_rtx (); + emit_label_after (operands[9], next_insn); + return template; + } + else + { + int offset = (branch_dest (next_insn) + - insn_addresses[INSN_UID (next_insn)] + 4); + if (offset >= -252 && offset <= 258) + { + if (GET_CODE (src) == IF_THEN_ELSE) + /* branch_true */ + src = XEXP (src, 1); + operands[9] = src; + return template; + } + } + } + operands[9] = gen_label_rtx (); + emit_label_after (operands[9], insn); + return template; +} + +char * +output_ieee_ccmpeq (insn, operands) + rtx insn, operands; +{ + output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands); +} + +/* Output to FILE the start of the assembler file. */ + +void +output_file_start (file) + FILE *file; +{ + register int pos; + + output_file_directive (file, main_input_filename); + + /* Switch to the data section so that the coffsem symbol and the + gcc2_compiled. symbol aren't in the text section. */ + data_section (); + + if (TARGET_LITTLE_ENDIAN) + fprintf (file, "\t.little\n"); +} + +/* Actual number of instructions used to make a shift by N. */ +static char ashiftrt_insns[] = + { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; + +/* Left shift and logical right shift are the same. */ +static char shift_insns[] = + { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; + +/* Individual shift amounts needed to get the above length sequences. + One bit right shifts clobber the T bit, so when possible, put one bit + shifts in the middle of the sequence, so the ends are eligible for + branch delay slots. */ +static short shift_amounts[32][5] = { + {0}, {1}, {2}, {2, 1}, + {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, + {8}, {8, 1}, {8, 2}, {8, 1, 2}, + {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, + {16}, {16, 1}, {16, 2}, {16, 1, 2}, + {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, + {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, + {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; + +/* Likewise, but for shift amounts < 16, up to three highmost bits + might be clobbered. This is typically used when combined with some + kind of sign or zero extension. */ + +static char ext_shift_insns[] = + { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; + +static short ext_shift_amounts[32][4] = { + {0}, {1}, {2}, {2, 1}, + {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, + {8}, {8, 1}, {8, 2}, {8, 1, 2}, + {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, + {16}, {16, 1}, {16, 2}, {16, 1, 2}, + {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, + {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, + {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; + +/* Assuming we have a value that has been sign-extended by at least one bit, + can we use the ext_shift_amounts with the last shift turned to an arithmetic shift + to shift it by N without data loss, and quicker than by other means? */ +#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) + +/* This is used in length attributes in sh.md to help compute the length + of arbitrary constant shift instructions. */ + +int +shift_insns_rtx (insn) + rtx insn; +{ + rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int shift_count = INTVAL (XEXP (set_src, 1)); + enum rtx_code shift_code = GET_CODE (set_src); + + switch (shift_code) + { + case ASHIFTRT: + return ashiftrt_insns[shift_count]; + case LSHIFTRT: + case ASHIFT: + return shift_insns[shift_count]; + default: + abort(); + } +} + +/* Return the cost of a shift. */ + +int +shiftcosts (x) + rtx x; +{ + int value = INTVAL (XEXP (x, 1)); + + /* If shift by a non constant, then this will be expensive. */ + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return SH_DYNAMIC_SHIFT_COST; + + /* Otherwise, return the true cost in instructions. */ + if (GET_CODE (x) == ASHIFTRT) + { + int cost = ashiftrt_insns[value]; + /* If SH3, then we put the constant in a reg and use shad. */ + if (cost > 1 + SH_DYNAMIC_SHIFT_COST) + cost = 1 + SH_DYNAMIC_SHIFT_COST; + return cost; + } + else + return shift_insns[value]; +} + +/* Return the cost of an AND operation. */ + +int +andcosts (x) + rtx x; +{ + int i; + + /* Anding with a register is a single cycle and instruction. */ + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return 1; + + i = INTVAL (XEXP (x, 1)); + /* These constants are single cycle extu.[bw] instructions. */ + if (i == 0xff || i == 0xffff) + return 1; + /* Constants that can be used in an and immediate instruction is a single + cycle, but this requires r0, so make it a little more expensive. */ + if (CONST_OK_FOR_L (i)) + return 2; + /* Constants that can be loaded with a mov immediate and an and. + This case is probably unnecessary. */ + if (CONST_OK_FOR_I (i)) + return 2; + /* Any other constants requires a 2 cycle pc-relative load plus an and. + This case is probably unnecessary. */ + return 3; +} + +/* Return the cost of a multiply. */ +int +multcosts (x) + rtx x; +{ + if (TARGET_SH2) + { + /* We have a mul insn, so we can never take more than the mul and the + read of the mac reg, but count more because of the latency and extra + reg usage. */ + if (TARGET_SMALLCODE) + return 2; + return 3; + } + + /* If we're aiming at small code, then just count the number of + insns in a multiply call sequence. */ + if (TARGET_SMALLCODE) + return 5; + + /* Otherwise count all the insns in the routine we'd be calling too. */ + return 20; +} + +/* Code to expand a shift. */ + +void +gen_ashift (type, n, reg) + int type; + int n; + rtx reg; +{ + /* Negative values here come from the shift_amounts array. */ + if (n < 0) + { + if (type == ASHIFT) + type = LSHIFTRT; + else + type = ASHIFT; + n = -n; + } + + switch (type) + { + case ASHIFTRT: + emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); + break; + case LSHIFTRT: + if (n == 1) + emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); + else + emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); + break; + case ASHIFT: + emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n))); + break; + } +} + +/* Same for HImode */ + +void +gen_ashift_hi (type, n, reg) + int type; + int n; + rtx reg; +{ + /* Negative values here come from the shift_amounts array. */ + if (n < 0) + { + if (type == ASHIFT) + type = LSHIFTRT; + else + type = ASHIFT; + n = -n; + } + + switch (type) + { + case ASHIFTRT: + case LSHIFTRT: + /* We don't have HImode right shift operations because using the + ordinary 32 bit shift instructions for that doesn't generate proper + zero/sign extension. + gen_ashift_hi is only called in contexts where we know that the + sign extension works out correctly. */ + { + int word = 0; + if (GET_CODE (reg) == SUBREG) + { + word = SUBREG_WORD (reg); + reg = SUBREG_REG (reg); + } + gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word)); + break; + } + case ASHIFT: + emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); + break; + } +} + +/* Output RTL to split a constant shift into its component SH constant + shift instructions. */ + +int +gen_shifty_op (code, operands) + int code; + rtx *operands; +{ + int value = INTVAL (operands[2]); + int max, i; + + /* Truncate the shift count in case it is out of bounds. */ + value = value & 0x1f; + + if (value == 31) + { + if (code == LSHIFTRT) + { + emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); + emit_insn (gen_movt (operands[0])); + return; + } + else if (code == ASHIFT) + { + /* There is a two instruction sequence for 31 bit left shifts, + but it requires r0. */ + if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0) + { + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + return; + } + } + } + else if (value == 0) + { + /* This can happen when not optimizing. We must output something here + to prevent the compiler from aborting in final.c after the try_split + call. */ + emit_insn (gen_nop ()); + return; + } + + max = shift_insns[value]; + for (i = 0; i < max; i++) + gen_ashift (code, shift_amounts[value][i], operands[0]); +} + +/* Same as above, but optimized for values where the topmost bits don't + matter. */ + +int +gen_shifty_hi_op (code, operands) + int code; + rtx *operands; +{ + int value = INTVAL (operands[2]); + int max, i; + void (*gen_fun)(); + + /* This operation is used by and_shl for SImode values with a few + high bits known to be cleared. */ + value &= 31; + if (value == 0) + { + emit_insn (gen_nop ()); + return; + } + + gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; + if (code == ASHIFT) + { + max = ext_shift_insns[value]; + for (i = 0; i < max; i++) + gen_fun (code, ext_shift_amounts[value][i], operands[0]); + } + else + /* When shifting right, emit the shifts in reverse order, so that + solitary negative values come first. */ + for (i = ext_shift_insns[value] - 1; i >= 0; i--) + gen_fun (code, ext_shift_amounts[value][i], operands[0]); +} + +/* Output RTL for an arithmetic right shift. */ + +/* ??? Rewrite to use super-optimizer sequences. */ + +int +expand_ashiftrt (operands) + rtx *operands; +{ + rtx wrk; + char func[18]; + tree func_name; + int value; + + if (TARGET_SH3) + { + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx count = copy_to_mode_reg (SImode, operands[2]); + emit_insn (gen_negsi2 (count, count)); + emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); + return 1; + } + else if (ashiftrt_insns[INTVAL (operands[2]) & 31] + > 1 + SH_DYNAMIC_SHIFT_COST) + { + rtx count + = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); + emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); + return 1; + } + } + if (GET_CODE (operands[2]) != CONST_INT) + return 0; + + value = INTVAL (operands[2]) & 31; + + if (value == 31) + { + emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); + return 1; + } + else if (value >= 16 && value <= 19) + { + wrk = gen_reg_rtx (SImode); + emit_insn (gen_ashrsi2_16 (wrk, operands[1])); + value -= 16; + while (value--) + gen_ashift (ASHIFTRT, 1, wrk); + emit_move_insn (operands[0], wrk); + return 1; + } + /* Expand a short sequence inline, longer call a magic routine. */ + else if (value <= 5) + { + wrk = gen_reg_rtx (SImode); + emit_move_insn (wrk, operands[1]); + while (value--) + gen_ashift (ASHIFTRT, 1, wrk); + emit_move_insn (operands[0], wrk); + return 1; + } + + wrk = gen_reg_rtx (Pmode); + + /* Load the value into an arg reg and call a helper. */ + emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]); + sprintf (func, "__ashiftrt_r4_%d", value); + func_name = get_identifier (func); + emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode, + IDENTIFIER_POINTER (func_name))); + emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); + emit_move_insn (operands[0], gen_rtx (REG, SImode, 4)); + return 1; +} + +int sh_dynamicalize_shift_p (count) + rtx count; +{ + return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST; +} + +/* Try to find a good way to implement the combiner pattern + [(set (match_operand:SI 0 "register_operand" "r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) . + LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. + return 0 for simple right / left or left/right shift combination. + return 1 for a combination of shifts with zero_extend. + return 2 for a combination of shifts with an AND that needs r0. + return 3 for a combination of shifts with an AND that needs an extra + scratch register, when the three highmost bits of the AND mask are clear. + return 4 for a combination of shifts with an AND that needs an extra + scratch register, when any of the three highmost bits of the AND mask + is set. + If ATTRP is set, store an initial right shift width in ATTRP[0], + and the instruction length in ATTRP[1] . These values are not valid + when returning 0. + When ATTRP is set and returning 1, ATTRP[2] gets set to the index into + shift_amounts for the last shift value that is to be used before the + sign extend. */ +int +shl_and_kind (left_rtx, mask_rtx, attrp) + rtx left_rtx, mask_rtx; + int *attrp; +{ + unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; + int left = INTVAL (left_rtx), right; + int best = 0; + int cost, best_cost = 10000; + int best_right = 0, best_len = 0; + int i; + int can_ext; + + if (left < 0 || left > 31) + return 0; + if (GET_CODE (mask_rtx) == CONST_INT) + mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; + else + mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; + /* Can this be expressed as a right shift / left shift pair ? */ + lsb = ((mask ^ (mask - 1)) >> 1) + 1; + right = exact_log2 (lsb); + mask2 = ~(mask + lsb - 1); + lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; + /* mask has no zeroes but trailing zeroes <==> ! mask2 */ + if (! mask2) + best_cost = shift_insns[right] + shift_insns[right + left]; + /* mask has no trailing zeroes <==> ! right */ + else if (! right && mask2 == ~(lsb2 - 1)) + { + int late_right = exact_log2 (lsb2); + best_cost = shift_insns[left + late_right] + shift_insns[late_right]; + } + /* Try to use zero extend */ + if (mask2 == ~(lsb2 - 1)) + { + int width, first; + + for (width = 8; width <= 16; width += 8) + { + /* Can we zero-extend right away? */ + if (lsb2 == (HOST_WIDE_INT)1 << width) + { + cost + = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; + if (cost < best_cost) + { + best = 1; + best_cost = cost; + best_right = right; + best_len = cost; + if (attrp) + attrp[2] = -1; + } + continue; + } + /* ??? Could try to put zero extend into initial right shift, + or even shift a bit left before the right shift. */ + /* Determine value of first part of left shift, to get to the + zero extend cut-off point. */ + first = width - exact_log2 (lsb2) + right; + if (first >= 0 && right + left - first >= 0) + { + cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 + + ext_shift_insns[right + left - first]; + if (cost < best_cost) + { + best = 1; + best_cost = cost; + best_right = right; + best_len = cost; + if (attrp) + attrp[2] = first; + } + } + } + } + /* Try to use r0 AND pattern */ + for (i = 0; i <= 2; i++) + { + if (i > right) + break; + if (! CONST_OK_FOR_L (mask >> i)) + continue; + cost = (i != 0) + 2 + ext_shift_insns[left + i]; + if (cost < best_cost) + { + best = 2; + best_cost = cost; + best_right = i; + best_len = cost - 1; + } + } + /* Try to use a scratch register to hold the AND operand. */ + can_ext = ((mask << left) & 0xe0000000) == 0; + for (i = 0; i <= 2; i++) + { + if (i > right) + break; + cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3) + + (can_ext ? ext_shift_insns : shift_insns)[left + i]; + if (cost < best_cost) + { + best = 4 - can_ext; + best_cost = cost; + best_right = i; + best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i); + } + } + + if (attrp) + { + attrp[0] = best_right; + attrp[1] = best_len; + } + return best; +} + +/* This is used in length attributes of the unnamed instructions + corresponding to shl_and_kind return values of 1 and 2. */ +int +shl_and_length (insn) + rtx insn; +{ + rtx set_src, left_rtx, mask_rtx; + int attributes[3]; + + set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + left_rtx = XEXP (XEXP (set_src, 0), 1); + mask_rtx = XEXP (set_src, 1); + shl_and_kind (left_rtx, mask_rtx, attributes); + return attributes[1]; +} + +/* This is used in length attribute of the and_shl_scratch instruction. */ + +int +shl_and_scr_length (insn) + rtx insn; +{ + rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int len = shift_insns[INTVAL (XEXP (set_src, 1))]; + rtx op = XEXP (set_src, 0); + len += shift_insns[INTVAL (XEXP (op, 1))] + 1; + op = XEXP (XEXP (op, 0), 0); + return len + shift_insns[INTVAL (XEXP (op, 1))]; +} + +/* Generating rtl? */ +extern int rtx_equal_function_value_matters; + +/* Generate rtl for instructions for which shl_and_kind advised a particular + method of generating them, i.e. returned zero. */ + +int +gen_shl_and (dest, left_rtx, mask_rtx, source) + rtx dest, left_rtx, mask_rtx, source; +{ + int attributes[3]; + unsigned HOST_WIDE_INT mask; + int kind = shl_and_kind (left_rtx, mask_rtx, attributes); + int right, total_shift; + int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op; + + right = attributes[0]; + total_shift = INTVAL (left_rtx) + right; + mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; + switch (kind) + { + default: + return -1; + case 1: + { + int first = attributes[2]; + rtx operands[3]; + + if (first < 0) + { + emit_insn ((mask << right) <= 0xff + ? gen_zero_extendqisi2(dest, + gen_lowpart (QImode, source)) + : gen_zero_extendhisi2(dest, + gen_lowpart (HImode, source))); + source = dest; + } + if (source != dest) + emit_insn (gen_movsi (dest, source)); + operands[0] = dest; + if (right) + { + operands[2] = GEN_INT (right); + gen_shifty_hi_op (LSHIFTRT, operands); + } + if (first > 0) + { + operands[2] = GEN_INT (first); + gen_shifty_hi_op (ASHIFT, operands); + total_shift -= first; + mask <<= first; + } + if (first >= 0) + emit_insn (mask <= 0xff + ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest)) + : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest))); + if (total_shift > 0) + { + operands[2] = GEN_INT (total_shift); + gen_shifty_hi_op (ASHIFT, operands); + } + break; + } + case 4: + shift_gen_fun = gen_shifty_op; + case 3: + /* If the topmost bit that matters is set, set the topmost bits + that don't matter. This way, we might be able to get a shorter + signed constant. */ + if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift)) + mask |= (HOST_WIDE_INT)~0 << (31 - total_shift); + case 2: + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (rtx_equal_function_value_matters + || reload_in_progress || reload_completed) + { + rtx operands[3]; + + /* Cases 3 and 4 should be handled by this split + only while combining */ + if (kind > 2) + abort (); + if (right) + { + emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); + source = dest; + } + emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); + if (total_shift) + { + operands[0] = dest; + operands[1] = dest; + operands[2] = GEN_INT (total_shift); + shift_gen_fun (ASHIFT, operands); + } + break; + } + else + { + int neg = 0; + if (kind != 4 && total_shift < 16) + { + neg = -ext_shift_amounts[total_shift][1]; + if (neg > 0) + neg -= ext_shift_amounts[total_shift][2]; + else + neg = 0; + } + emit_insn (gen_and_shl_scratch (dest, source, + GEN_INT (right), + GEN_INT (mask), + GEN_INT (total_shift + neg), + GEN_INT (neg))); + emit_insn (gen_movsi (dest, dest)); + break; + } + } + return 0; +} + +/* Try to find a good way to implement the combiner pattern + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI 18))] + LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. + return 0 for simple left / right shift combination. + return 1 for left shift / 8 bit sign extend / left shift. + return 2 for left shift / 16 bit sign extend / left shift. + return 3 for left shift / 8 bit sign extend / shift / sign extend. + return 4 for left shift / 16 bit sign extend / shift / sign extend. + return 5 for left shift / 16 bit sign extend / right shift + return 6 for < 8 bit sign extend / left shift. + return 7 for < 8 bit sign extend / left shift / single right shift. + If COSTP is nonzero, assign the calculated cost to *COSTP. */ + +int +shl_sext_kind (left_rtx, size_rtx, costp) + rtx left_rtx, size_rtx; + int *costp; +{ + int left, size, insize, ext; + int cost, best_cost; + int kind; + + left = INTVAL (left_rtx); + size = INTVAL (size_rtx); + insize = size - left; + if (insize <= 0) + abort (); + /* Default to left / right shift. */ + kind = 0; + best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; + if (size <= 16) + { + /* 16 bit shift / sign extend / 16 bit shift */ + cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; + /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden + below, by alternative 3 or something even better. */ + if (cost < best_cost) + { + kind = 5; + best_cost = cost; + } + } + /* Try a plain sign extend between two shifts. */ + for (ext = 16; ext >= insize; ext -= 8) + { + if (ext <= size) + { + cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; + if (cost < best_cost) + { + kind = ext / 8U; + best_cost = cost; + } + } + /* Check if we can do a sloppy shift with a final signed shift + restoring the sign. */ + if (EXT_SHIFT_SIGNED (size - ext)) + cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; + /* If not, maybe it's still cheaper to do the second shift sloppy, + and do a final sign extend? */ + else if (size <= 16) + cost = ext_shift_insns[ext - insize] + 1 + + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; + else + continue; + if (cost < best_cost) + { + kind = ext / 8U + 2; + best_cost = cost; + } + } + /* Check if we can sign extend in r0 */ + if (insize < 8) + { + cost = 3 + shift_insns[left]; + if (cost < best_cost) + { + kind = 6; + best_cost = cost; + } + /* Try the same with a final signed shift. */ + if (left < 31) + { + cost = 3 + ext_shift_insns[left + 1] + 1; + if (cost < best_cost) + { + kind = 7; + best_cost = cost; + } + } + } + if (TARGET_SH3) + { + /* Try to use a dynamic shift. */ + cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; + if (cost < best_cost) + { + kind = 0; + best_cost = cost; + } + } + if (costp) + *costp = cost; + return kind; +} + +/* Function to be used in the length attribute of the instructions + implementing this pattern. */ + +int +shl_sext_length (insn) + rtx insn; +{ + rtx set_src, left_rtx, size_rtx; + int cost; + + set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + left_rtx = XEXP (XEXP (set_src, 0), 1); + size_rtx = XEXP (set_src, 1); + shl_sext_kind (left_rtx, size_rtx, &cost); + return cost; +} + +/* Generate rtl for this pattern */ + +int +gen_shl_sext (dest, left_rtx, size_rtx, source) + rtx dest, left_rtx, size_rtx, source; +{ + int kind; + int left, size, insize, cost; + rtx operands[3]; + + kind = shl_sext_kind (left_rtx, size_rtx, &cost); + left = INTVAL (left_rtx); + size = INTVAL (size_rtx); + insize = size - left; + switch (kind) + { + case 1: + case 2: + case 3: + case 4: + { + int ext = kind & 1 ? 8 : 16; + int shift2 = size - ext; + + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (! rtx_equal_function_value_matters + && ! reload_in_progress && ! reload_completed) + { + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + emit_insn (gen_movsi (dest, source)); + break; + } + if (dest != source) + emit_insn (gen_movsi (dest, source)); + operands[0] = dest; + if (ext - insize) + { + operands[2] = GEN_INT (ext - insize); + gen_shifty_hi_op (ASHIFT, operands); + } + emit_insn (kind & 1 + ? gen_extendqisi2(dest, gen_lowpart (QImode, dest)) + : gen_extendhisi2(dest, gen_lowpart (HImode, dest))); + if (kind <= 2) + { + if (shift2) + { + operands[2] = GEN_INT (shift2); + gen_shifty_op (ASHIFT, operands); + } + } + else + { + if (shift2 > 0) + { + if (EXT_SHIFT_SIGNED (shift2)) + { + operands[2] = GEN_INT (shift2 + 1); + gen_shifty_op (ASHIFT, operands); + operands[2] = GEN_INT (1); + gen_shifty_op (ASHIFTRT, operands); + break; + } + operands[2] = GEN_INT (shift2); + gen_shifty_hi_op (ASHIFT, operands); + } + else if (shift2) + { + operands[2] = GEN_INT (-shift2); + gen_shifty_hi_op (LSHIFTRT, operands); + } + emit_insn (size <= 8 + ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) + : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + } + break; + } + case 5: + { + int i = 16 - size; + if (! rtx_equal_function_value_matters + && ! reload_in_progress && ! reload_completed) + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + else + { + operands[0] = dest; + operands[2] = GEN_INT (16 - insize); + gen_shifty_hi_op (ASHIFT, operands); + emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); + } + /* Don't use gen_ashrsi3 because it generates new pseudos. */ + while (--i >= 0) + gen_ashift (ASHIFTRT, 1, dest); + break; + } + case 6: + case 7: + /* Don't expand fine-grained when combining, because that will + make the pattern fail. */ + if (! rtx_equal_function_value_matters + && ! reload_in_progress && ! reload_completed) + { + emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); + emit_insn (gen_movsi (dest, source)); + break; + } + emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); + emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); + emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); + operands[0] = dest; + operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; + gen_shifty_op (ASHIFT, operands); + if (kind == 7) + emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1))); + break; + default: + return -1; + } + return 0; +} + +/* The SH cannot load a large constant into a register, constants have to + come from a pc relative load. The reference of a pc relative load + instruction must be less than 1k infront of the instruction. This + means that we often have to dump a constant inside a function, and + generate code to branch around it. + + It is important to minimize this, since the branches will slow things + down and make things bigger. + + Worst case code looks like: + + mov.l L1,rn + bra L2 + nop + align + L1: .long value + L2: + .. + + mov.l L3,rn + bra L4 + nop + align + L3: .long value + L4: + .. + + We fix this by performing a scan before scheduling, which notices which + instructions need to have their operands fetched from the constant table + and builds the table. + + The algorithm is: + + scan, find an instruction which needs a pcrel move. Look forward, find the + last barrier which is within MAX_COUNT bytes of the requirement. + If there isn't one, make one. Process all the instructions between + the find and the barrier. + + In the above example, we can tell that L3 is within 1k of L1, so + the first move can be shrunk from the 3 insn+constant sequence into + just 1 insn, and the constant moved to L3 to make: + + mov.l L1,rn + .. + mov.l L3,rn + bra L4 + nop + align + L3:.long value + L4:.long value + + Then the second move becomes the target for the shortening process. */ + +typedef struct +{ + rtx value; /* Value in table. */ + rtx label; /* Label of value. */ + enum machine_mode mode; /* Mode of value. */ +} pool_node; + +/* The maximum number of constants that can fit into one pool, since + the pc relative range is 0...1020 bytes and constants are at least 4 + bytes long. */ + +#define MAX_POOL_SIZE (1020/4) +static pool_node pool_vector[MAX_POOL_SIZE]; +static int pool_size; + +/* ??? If we need a constant in HImode which is the truncated value of a + constant we need in SImode, we could combine the two entries thus saving + two bytes. Is this common enough to be worth the effort of implementing + it? */ + +/* ??? This stuff should be done at the same time that we shorten branches. + As it is now, we must assume that all branches are the maximum size, and + this causes us to almost always output constant pools sooner than + necessary. */ + +/* Add a constant to the pool and return its label. */ + +static rtx +add_constant (x, mode, last_value) + rtx last_value; + rtx x; + enum machine_mode mode; +{ + int i; + rtx lab; + + /* First see if we've already got it. */ + for (i = 0; i < pool_size; i++) + { + if (x->code == pool_vector[i].value->code + && mode == pool_vector[i].mode) + { + if (x->code == CODE_LABEL) + { + if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) + continue; + } + if (rtx_equal_p (x, pool_vector[i].value)) + { + lab = 0; + if (! last_value + || ! i + || ! rtx_equal_p (last_value, pool_vector[i-1].value)) + { + lab = pool_vector[i].label; + if (! lab) + pool_vector[i].label = lab = gen_label_rtx (); + } + return lab; + } + } + } + + /* Need a new one. */ + pool_vector[pool_size].value = x; + if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) + lab = 0; + else + lab = gen_label_rtx (); + pool_vector[pool_size].mode = mode; + pool_vector[pool_size].label = lab; + pool_size++; + return lab; +} + +/* Output the literal table. */ + +static void +dump_table (scan) + rtx scan; +{ + int i; + int need_align = 1; + + /* Do two passes, first time dump out the HI sized constants. */ + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + if (p->mode == HImode) + { + if (need_align) + { + scan = emit_insn_after (gen_align_2 (), scan); + need_align = 0; + } + scan = emit_label_after (p->label, scan); + scan = emit_insn_after (gen_consttable_2 (p->value), scan); + } + } + + need_align = 1; + + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + switch (p->mode) + { + case HImode: + break; + case SImode: + case SFmode: + if (need_align) + { + need_align = 0; + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_4 (), scan); + } + if (p->label) + scan = emit_label_after (p->label, scan); + scan = emit_insn_after (gen_consttable_4 (p->value), scan); + break; + case DFmode: + case DImode: + if (need_align) + { + need_align = 0; + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (gen_align_4 (), scan); + } + if (p->label) + scan = emit_label_after (p->label, scan); + scan = emit_insn_after (gen_consttable_8 (p->value), scan); + break; + default: + abort (); + break; + } + } + + scan = emit_insn_after (gen_consttable_end (), scan); + scan = emit_barrier_after (scan); + pool_size = 0; +} + +/* Return non-zero if constant would be an ok source for a + mov.w instead of a mov.l. */ + +static int +hi_const (src) + rtx src; +{ + return (GET_CODE (src) == CONST_INT + && INTVAL (src) >= -32768 + && INTVAL (src) <= 32767); +} + +/* Non-zero if the insn is a move instruction which needs to be fixed. */ + +/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the + CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't + need to fix it if the input value is CONST_OK_FOR_I. */ + +static int +broken_move (insn) + rtx insn; +{ + if (GET_CODE (insn) == INSN) + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET + /* We can load any 8 bit value if we don't care what the high + order bits end up as. */ + && GET_MODE (SET_DEST (pat)) != QImode + && CONSTANT_P (SET_SRC (pat)) + && ! (TARGET_SH3E + && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE + && (fp_zero_operand (SET_SRC (pat)) + || fp_one_operand (SET_SRC (pat))) + && GET_CODE (SET_DEST (pat)) == REG + && REGNO (SET_DEST (pat)) >= FIRST_FP_REG + && REGNO (SET_DEST (pat)) <= LAST_FP_REG) + && (GET_CODE (SET_SRC (pat)) != CONST_INT + || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat))))) + return 1; + } + + return 0; +} + +static int +mova_p (insn) + rtx insn; +{ + return (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC + && XINT (SET_SRC (PATTERN (insn)), 1) == 1); +} + +/* Find the last barrier from insn FROM which is close enough to hold the + constant pool. If we can't find one, then create one near the end of + the range. */ + +static rtx +find_barrier (num_mova, mova, from) + int num_mova; + rtx mova, from; +{ + int count_si = 0; + int count_hi = 0; + int found_hi = 0; + int found_si = 0; + int hi_align = 2; + int si_align = 2; + int leading_mova = num_mova; + rtx barrier_before_mova, found_barrier = 0, good_barrier = 0; + int si_limit; + int hi_limit; + + /* For HImode: range is 510, add 4 because pc counts from address of + second instruction after this one, subtract 2 for the jump instruction + that we may need to emit before the table, subtract 2 for the instruction + that fills the jump delay slot (in very rare cases, reorg will take an + instruction from after the constant pool or will leave the delay slot + empty). This gives 510. + For SImode: range is 1020, add 4 because pc counts from address of + second instruction after this one, subtract 2 in case pc is 2 byte + aligned, subtract 2 for the jump instruction that we may need to emit + before the table, subtract 2 for the instruction that fills the jump + delay slot. This gives 1018. */ + + /* The branch will always be shortened now that the reference address for + forward branches is the successor address, thus we need no longer make + adjustments to the [sh]i_limit for -O0. */ + + si_limit = 1018; + hi_limit = 510; + + while (from && count_si < si_limit && count_hi < hi_limit) + { + int inc = get_attr_length (from); + int new_align = 1; + + if (GET_CODE (from) == CODE_LABEL) + { + if (optimize) + new_align = 1 << label_to_alignment (from); + else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER) + new_align = 1 << barrier_align (from); + else + new_align = 1; + inc = 0; + } + + if (GET_CODE (from) == BARRIER) + { + + found_barrier = from; + + /* If we are at the end of the function, or in front of an alignment + instruction, we need not insert an extra alignment. We prefer + this kind of barrier. */ + if (barrier_align (from) > 2) + good_barrier = from; + } + + if (broken_move (from)) + { + rtx pat, src, dst; + enum machine_mode mode; + + pat = PATTERN (from); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + src = SET_SRC (pat); + dst = SET_DEST (pat); + mode = GET_MODE (dst); + + /* We must explicitly check the mode, because sometimes the + front end will generate code to load unsigned constants into + HImode targets without properly sign extending them. */ + if (mode == HImode + || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG)) + { + found_hi += 2; + /* We put the short constants before the long constants, so + we must count the length of short constants in the range + for the long constants. */ + /* ??? This isn't optimal, but is easy to do. */ + si_limit -= 2; + } + else + { + while (si_align > 2 && found_si + si_align - 2 > count_si) + si_align >>= 1; + if (found_si > count_si) + count_si = found_si; + found_si += GET_MODE_SIZE (mode); + if (num_mova) + si_limit -= GET_MODE_SIZE (mode); + } + } + + if (mova_p (from)) + { + if (! num_mova++) + { + leading_mova = 0; + mova = from; + barrier_before_mova = good_barrier ? good_barrier : found_barrier; + } + if (found_si > count_si) + count_si = found_si; + } + else if (GET_CODE (from) == JUMP_INSN + && (GET_CODE (PATTERN (from)) == ADDR_VEC + || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)) + { + if (num_mova) + num_mova--; + if (barrier_align (next_real_insn (from)) == CACHE_LOG) + { + /* We have just passed the barrier in front of the + ADDR_DIFF_VEC, which is stored in found_barrier. Since + the ADDR_DIFF_VEC is accessed as data, just like our pool + constants, this is a good opportunity to accommodate what + we have gathered so far. + If we waited any longer, we could end up at a barrier in + front of code, which gives worse cache usage for separated + instruction / data caches. */ + good_barrier = found_barrier; + break; + } + else + { + rtx body = PATTERN (from); + inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); + } + } + + if (found_si) + { + if (new_align > si_align) + { + si_limit -= count_si - 1 & new_align - si_align; + si_align = new_align; + } + count_si = count_si + new_align - 1 & -new_align; + count_si += inc; + } + if (found_hi) + { + if (new_align > hi_align) + { + hi_limit -= count_hi - 1 & new_align - hi_align; + hi_align = new_align; + } + count_hi = count_hi + new_align - 1 & -new_align; + count_hi += inc; + } + from = NEXT_INSN (from); + } + + if (num_mova) + if (leading_mova) + { + /* Try as we might, the leading mova is out of range. Change + it into a load (which will become a pcload) and retry. */ + SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); + INSN_CODE (mova) = -1; + return find_barrier (0, 0, mova); + } + else + { + /* Insert the constant pool table before the mova instruction, + to prevent the mova label reference from going out of range. */ + from = mova; + good_barrier = found_barrier = barrier_before_mova; + } + + if (found_barrier) + { + if (good_barrier && next_real_insn (found_barrier)) + found_barrier = good_barrier; + } + else + { + /* We didn't find a barrier in time to dump our stuff, + so we'll make one. */ + rtx label = gen_label_rtx (); + + /* If we exceeded the range, then we must back up over the last + instruction we looked at. Otherwise, we just need to undo the + NEXT_INSN at the end of the loop. */ + if (count_hi > hi_limit || count_si > si_limit) + from = PREV_INSN (PREV_INSN (from)); + else + from = PREV_INSN (from); + + /* Walk back to be just before any jump or label. + Putting it before a label reduces the number of times the branch + around the constant pool table will be hit. Putting it before + a jump makes it more likely that the bra delay slot will be + filled. */ + while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE + || GET_CODE (from) == CODE_LABEL) + from = PREV_INSN (from); + + from = emit_jump_insn_after (gen_jump (label), from); + JUMP_LABEL (from) = label; + LABEL_NUSES (label) = 1; + found_barrier = emit_barrier_after (from); + emit_label_after (label, found_barrier); + } + + return found_barrier; +} + +/* If the instruction INSN is implemented by a special function, and we can + positively find the register that is used to call the sfunc, and this + register is not used anywhere else in this instruction - except as the + destination of a set, return this register; else, return 0. */ +rtx +sfunc_uses_reg (insn) + rtx insn; +{ + int i; + rtx pattern, part, reg_part, reg; + + if (GET_CODE (insn) != INSN) + return 0; + pattern = PATTERN (insn); + if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) + return 0; + + for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) + { + part = XVECEXP (pattern, 0, i); + if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) + reg_part = part; + } + if (! reg_part) + return 0; + reg = XEXP (reg_part, 0); + for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) + { + part = XVECEXP (pattern, 0, i); + if (part == reg_part || GET_CODE (part) == CLOBBER) + continue; + if (reg_mentioned_p (reg, ((GET_CODE (part) == SET + && GET_CODE (SET_DEST (part)) == REG) + ? SET_SRC (part) : part))) + return 0; + } + return reg; +} + +/* See if the only way in which INSN uses REG is by calling it, or by + setting it while calling it. Set *SET to a SET rtx if the register + is set by INSN. */ + +static int +noncall_uses_reg (reg, insn, set) + rtx reg; + rtx insn; + rtx *set; +{ + rtx pattern, reg2; + + *set = NULL_RTX; + + reg2 = sfunc_uses_reg (insn); + if (reg2 && REGNO (reg2) == REGNO (reg)) + { + pattern = single_set (insn); + if (pattern + && GET_CODE (SET_DEST (pattern)) == REG + && REGNO (reg) == REGNO (SET_DEST (pattern))) + *set = pattern; + return 0; + } + if (GET_CODE (insn) != CALL_INSN) + { + /* We don't use rtx_equal_p because we don't care if the mode is + different. */ + pattern = single_set (insn); + if (pattern + && GET_CODE (SET_DEST (pattern)) == REG + && REGNO (reg) == REGNO (SET_DEST (pattern))) + { + rtx par, part; + int i; + + *set = pattern; + par = PATTERN (insn); + if (GET_CODE (par) == PARALLEL) + for (i = XVECLEN (par, 0) - 1; i >= 0; i--) + { + part = XVECEXP (par, 0, i); + if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) + return 1; + } + return reg_mentioned_p (reg, SET_SRC (pattern)); + } + + return 1; + } + + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + { + int i; + + for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) + if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) + return 1; + pattern = XVECEXP (pattern, 0, 0); + } + + if (GET_CODE (pattern) == SET) + { + if (reg_mentioned_p (reg, SET_DEST (pattern))) + { + /* We don't use rtx_equal_p, because we don't care if the + mode is different. */ + if (GET_CODE (SET_DEST (pattern)) != REG + || REGNO (reg) != REGNO (SET_DEST (pattern))) + return 1; + + *set = pattern; + } + + pattern = SET_SRC (pattern); + } + + if (GET_CODE (pattern) != CALL + || GET_CODE (XEXP (pattern, 0)) != MEM + || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) + return 1; + + return 0; +} + +/* Given a X, a pattern of an insn or a part of it, return a mask of used + general registers. Bits 0..15 mean that the respective registers + are used as inputs in the instruction. Bits 16..31 mean that the + registers 0..15, respectively, are used as outputs, or are clobbered. + IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ +int +regs_used (x, is_dest) + rtx x; int is_dest; +{ + enum rtx_code code; + char *fmt; + int i, used = 0; + + if (! x) + return used; + code = GET_CODE (x); + switch (code) + { + case REG: + if (REGNO (x) < 16) + return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) + << (REGNO (x) + is_dest)); + return 0; + case SUBREG: + { + rtx y = SUBREG_REG (x); + + if (GET_CODE (y) != REG) + break; + if (REGNO (y) < 16) + return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) + << (REGNO (y) + SUBREG_WORD (x) + is_dest)); + return 0; + } + case SET: + return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); + case RETURN: + /* If there was a return value, it must have been indicated with USE. */ + return 0x00ffff00; + case CLOBBER: + is_dest = 1; + break; + case MEM: + is_dest = 0; + break; + case CALL: + used |= 0x00ff00f0; + break; + } + + fmt = GET_RTX_FORMAT (code); + + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + used |= regs_used (XVECEXP (x, i, j), is_dest); + } + else if (fmt[i] == 'e') + used |= regs_used (XEXP (x, i), is_dest); + } + return used; +} + +/* Create an instruction that prevents redirection of a conditional branch + to the destination of the JUMP with address ADDR. + If the branch needs to be implemented as an indirect jump, try to find + a scratch register for it. + If NEED_BLOCK is 0, don't do anything unless we need a scratch register. + If any preceding insn that doesn't fit into a delay slot is good enough, + pass 1. Pass 2 if a definite blocking insn is needed. + -1 is used internally to avoid deep recursion. + If a blocking instruction is made or recognized, return it. */ + +static rtx +gen_block_redirect (jump, addr, need_block) + rtx jump; + int addr, need_block; +{ + int dead = 0; + rtx prev = prev_nonnote_insn (jump); + rtx dest; + + /* First, check if we already have an instruction that satisfies our need. */ + if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev)) + { + if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) + return prev; + if (GET_CODE (PATTERN (prev)) == USE + || GET_CODE (PATTERN (prev)) == CLOBBER + || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) + prev = jump; + else if ((need_block &= ~1) < 0) + return prev; + else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) + need_block = 0; + } + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + dest = XEXP (SET_SRC (PATTERN (jump)), 0); + /* If the branch is out of range, try to find a scratch register for it. */ + if (optimize + && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)) + { + rtx scan; + /* Don't look for the stack pointer as a scratch register, + it would cause trouble if an interrupt occurred. */ + unsigned try = 0x7fff, used; + int jump_left = flag_expensive_optimizations + 1; + + /* It is likely that the most recent eligible instruction is wanted for + the delay slot. Therefore, find out which registers it uses, and + try to avoid using them. */ + + for (scan = jump; scan = PREV_INSN (scan); ) + { + enum rtx_code code; + + if (INSN_DELETED_P (scan)) + continue; + code = GET_CODE (scan); + if (code == CODE_LABEL || code == JUMP_INSN) + break; + if (code == INSN + && GET_CODE (PATTERN (scan)) != USE + && GET_CODE (PATTERN (scan)) != CLOBBER + && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) + { + try &= ~regs_used (PATTERN (scan), 0); + break; + } + } + for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); ) + { + enum rtx_code code; + + if (INSN_DELETED_P (scan)) + continue; + code = GET_CODE (scan); + if (GET_RTX_CLASS (code) == 'i') + { + used |= regs_used (PATTERN (scan), 0); + if (code == CALL_INSN) + used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); + dead |= (used >> 16) & ~used; + if (dead & try) + { + dead &= try; + break; + } + if (code == JUMP_INSN) + if (jump_left-- && simplejump_p (scan)) + scan = JUMP_LABEL (scan); + else + break; + } + } + /* Mask out the stack pointer again, in case it was + the only 'free' register we have found. */ + dead &= 0x7fff; + } + /* If the immediate destination is still in range, check for possible + threading with a jump beyond the delay slot insn. + Don't check if we are called recursively; the jump has been or will be + checked in a different invocation then. */ + + else if (optimize && need_block >= 0) + { + rtx next = next_active_insn (next_active_insn (dest)); + if (next && GET_CODE (next) == JUMP_INSN + && GET_CODE (PATTERN (next)) == SET + && recog_memoized (next) == CODE_FOR_jump) + { + dest = JUMP_LABEL (next); + if (dest + && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098) + gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1); + } + } + + if (dead) + { + rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead)); + + /* It would be nice if we could convert the jump into an indirect + jump / far branch right now, and thus exposing all constituent + instructions to further optimization. However, reorg uses + simplejump_p to determine if there is an unconditional jump where + it should try to schedule instructions from the target of the + branch; simplejump_p fails for indirect jumps even if they have + a JUMP_LABEL. */ + rtx insn = emit_insn_before (gen_indirect_jump_scratch + (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump)))) + , jump); + INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; + return insn; + } + else if (need_block) + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + return emit_insn_before (gen_block_branch_redirect + (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0)))) + , jump); + return prev; +} + +#define CONDJUMP_MIN -252 +#define CONDJUMP_MAX 262 +struct far_branch +{ + /* A label (to be placed) in front of the jump + that jumps to our ultimate destination. */ + rtx near_label; + /* Where we are going to insert it if we cannot move the jump any farther, + or the jump itself if we have picked up an existing jump. */ + rtx insert_place; + /* The ultimate destination. */ + rtx far_label; + struct far_branch *prev; + /* If the branch has already been created, its address; + else the address of its first prospective user. */ + int address; +}; + +enum mdep_reorg_phase_e mdep_reorg_phase; +void +gen_far_branch (bp) + struct far_branch *bp; +{ + rtx insn = bp->insert_place; + rtx jump; + rtx label = gen_label_rtx (); + + emit_label_after (label, insn); + if (bp->far_label) + { + jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); + LABEL_NUSES (bp->far_label)++; + } + else + jump = emit_jump_insn_after (gen_return (), insn); + /* Emit a barrier so that reorg knows that any following instructions + are not reachable via a fall-through path. + But don't do this when not optimizing, since we wouldn't supress the + alignment for the barrier then, and could end up with out-of-range + pc-relative loads. */ + if (optimize) + emit_barrier_after (jump); + emit_label_after (bp->near_label, insn); + JUMP_LABEL (jump) = bp->far_label; + if (! invert_jump (insn, label)) + abort (); + /* Prevent reorg from undoing our splits. */ + gen_block_redirect (jump, bp->address += 2, 2); +} + +/* Fix up ADDR_DIFF_VECs. */ +void +fixup_addr_diff_vecs (first) + rtx first; +{ + rtx insn; + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx vec_lab, pat, prev, prevpat, x, braf_label; + + if (GET_CODE (insn) != JUMP_INSN + || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) + continue; + pat = PATTERN (insn); + vec_lab = XEXP (XEXP (pat, 0), 0); + + /* Search the matching casesi_jump_2. */ + for (prev = vec_lab; ; prev = PREV_INSN (prev)) + { + if (GET_CODE (prev) != JUMP_INSN) + continue; + prevpat = PATTERN (prev); + if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) + continue; + x = XVECEXP (prevpat, 0, 1); + if (GET_CODE (x) != USE) + continue; + x = XEXP (x, 0); + if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) + break; + } + + /* Emit the reference label of the braf where it belongs, right after + the casesi_jump_2 (i.e. braf). */ + braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); + emit_label_after (braf_label, prev); + + /* Fix up the ADDR_DIF_VEC to be relative + to the reference address of the braf. */ + XEXP (XEXP (pat, 0), 0) = braf_label; + } +} + +/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following + a barrier. Return the base 2 logarithm of the desired alignment. */ +int +barrier_align (barrier_or_label) + rtx barrier_or_label; +{ + rtx next = next_real_insn (barrier_or_label), pat, prev; + int slot, credit; + + if (! next) + return 0; + + pat = PATTERN (next); + + if (GET_CODE (pat) == ADDR_DIFF_VEC) + return 2; + + if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1) + /* This is a barrier in front of a constant table. */ + return 0; + + prev = prev_real_insn (barrier_or_label); + if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC) + { + pat = PATTERN (prev); + /* If this is a very small table, we want to keep the alignment after + the table to the minimum for proper code alignment. */ + return ((TARGET_SMALLCODE + || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) + <= 1 << (CACHE_LOG - 2))) + ? 1 : CACHE_LOG); + } + + if (TARGET_SMALLCODE) + return 0; + + if (! TARGET_SH3 || ! optimize) + return CACHE_LOG; + + /* When fixing up pcloads, a constant table might be inserted just before + the basic block that ends with the barrier. Thus, we can't trust the + instruction lengths before that. */ + if (mdep_reorg_phase > SH_FIXUP_PCLOAD) + { + /* Check if there is an immediately preceding branch to the insn beyond + the barrier. We must weight the cost of discarding useful information + from the current cache line when executing this branch and there is + an alignment, against that of fetching unneeded insn in front of the + branch target when there is no alignment. */ + + /* PREV is presumed to be the JUMP_INSN for the barrier under + investigation. Skip to the insn before it. */ + prev = prev_real_insn (prev); + + for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2; + credit >= 0 && prev && GET_CODE (prev) == INSN; + prev = prev_real_insn (prev)) + { + if (GET_CODE (PATTERN (prev)) == USE + || GET_CODE (PATTERN (prev)) == CLOBBER) + continue; + if (GET_CODE (PATTERN (prev)) == SEQUENCE) + prev = XVECEXP (PATTERN (prev), 0, 1); + if (slot && + get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) + slot = 0; + credit -= get_attr_length (prev); + } + if (prev + && GET_CODE (prev) == JUMP_INSN + && JUMP_LABEL (prev) + && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label) + && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0))) + return 0; + } + + return CACHE_LOG; +} + +/* If we are inside a phony loop, lmost any kind of label can turn up as the + first one in the loop. Aligning a braf label causes incorrect switch + destination addresses; we can detect braf labels because they are + followed by a BARRIER. + Applying loop alignment to small constant or switch tables is a waste + of space, so we suppress this too. */ +int +sh_loop_align (label) + rtx label; +{ + rtx next = label; + + do + next = next_nonnote_insn (next); + while (next && GET_CODE (next) == CODE_LABEL); + + if (! next + || GET_RTX_CLASS (GET_CODE (next)) != 'i' + || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC + || recog_memoized (next) == CODE_FOR_consttable_2) + return 0; + return 2; +} + +/* Exported to toplev.c. + + Do a final pass over the function, just before delayed branch + scheduling. */ + +void +machine_dependent_reorg (first) + rtx first; +{ + rtx insn, mova; + int num_mova; + rtx r0_rtx = gen_rtx (REG, Pmode, 0); + rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx); + + /* If relaxing, generate pseudo-ops to associate function calls with + the symbols they call. It does no harm to not generate these + pseudo-ops. However, when we can generate them, it enables to + linker to potentially relax the jsr to a bsr, and eliminate the + register load and, possibly, the constant pool entry. */ + + mdep_reorg_phase = SH_INSERT_USES_LABELS; + if (TARGET_RELAX) + { + /* Remove all REG_LABEL notes. We want to use them for our own + purposes. This works because none of the remaining passes + need to look at them. + + ??? But it may break in the future. We should use a machine + dependent REG_NOTE, or some other approach entirely. */ + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + rtx note; + + while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0) + remove_note (insn, note); + } + } + + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx pattern, reg, link, set, scan, dies, label; + int rescan = 0, foundinsn = 0; + + if (GET_CODE (insn) == CALL_INSN) + { + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + if (GET_CODE (pattern) == SET) + pattern = SET_SRC (pattern); + + if (GET_CODE (pattern) != CALL + || GET_CODE (XEXP (pattern, 0)) != MEM) + continue; + + reg = XEXP (XEXP (pattern, 0), 0); + } + else + { + reg = sfunc_uses_reg (insn); + if (! reg) + continue; + } + + if (GET_CODE (reg) != REG) + continue; + + /* This is a function call via REG. If the only uses of REG + between the time that it is set and the time that it dies + are in function calls, then we can associate all the + function calls with the setting of REG. */ + + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + { + if (REG_NOTE_KIND (link) != 0) + continue; + set = single_set (XEXP (link, 0)); + if (set && rtx_equal_p (reg, SET_DEST (set))) + { + link = XEXP (link, 0); + break; + } + } + + if (! link) + { + /* ??? Sometimes global register allocation will have + deleted the insn pointed to by LOG_LINKS. Try + scanning backward to find where the register is set. */ + for (scan = PREV_INSN (insn); + scan && GET_CODE (scan) != CODE_LABEL; + scan = PREV_INSN (scan)) + { + if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') + continue; + + if (! reg_mentioned_p (reg, scan)) + continue; + + if (noncall_uses_reg (reg, scan, &set)) + break; + + if (set) + { + link = scan; + break; + } + } + } + + if (! link) + continue; + + /* The register is set at LINK. */ + + /* We can only optimize the function call if the register is + being set to a symbol. In theory, we could sometimes + optimize calls to a constant location, but the assembler + and linker do not support that at present. */ + if (GET_CODE (SET_SRC (set)) != SYMBOL_REF + && GET_CODE (SET_SRC (set)) != LABEL_REF) + continue; + + /* Scan forward from LINK to the place where REG dies, and + make sure that the only insns which use REG are + themselves function calls. */ + + /* ??? This doesn't work for call targets that were allocated + by reload, since there may not be a REG_DEAD note for the + register. */ + + dies = NULL_RTX; + for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) + { + rtx scanset; + + /* Don't try to trace forward past a CODE_LABEL if we haven't + seen INSN yet. Ordinarily, we will only find the setting insn + in LOG_LINKS if it is in the same basic block. However, + cross-jumping can insert code labels in between the load and + the call, and can result in situations where a single call + insn may have two targets depending on where we came from. */ + + if (GET_CODE (scan) == CODE_LABEL && ! foundinsn) + break; + + if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') + continue; + + /* Don't try to trace forward past a JUMP. To optimize + safely, we would have to check that all the + instructions at the jump destination did not use REG. */ + + if (GET_CODE (scan) == JUMP_INSN) + break; + + if (! reg_mentioned_p (reg, scan)) + continue; + + if (noncall_uses_reg (reg, scan, &scanset)) + break; + + if (scan == insn) + foundinsn = 1; + + if (scan != insn + && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan))) + { + /* There is a function call to this register other + than the one we are checking. If we optimize + this call, we need to rescan again below. */ + rescan = 1; + } + + /* ??? We shouldn't have to worry about SCANSET here. + We should just be able to check for a REG_DEAD note + on a function call. However, the REG_DEAD notes are + apparently not dependable around libcalls; c-torture + execute/920501-2 is a test case. If SCANSET is set, + then this insn sets the register, so it must have + died earlier. Unfortunately, this will only handle + the cases in which the register is, in fact, set in a + later insn. */ + + /* ??? We shouldn't have to use FOUNDINSN here. + However, the LOG_LINKS fields are apparently not + entirely reliable around libcalls; + newlib/libm/math/e_pow.c is a test case. Sometimes + an insn will appear in LOG_LINKS even though it is + not the most recent insn which sets the register. */ + + if (foundinsn + && (scanset + || find_reg_note (scan, REG_DEAD, reg))) + { + dies = scan; + break; + } + } + + if (! dies) + { + /* Either there was a branch, or some insn used REG + other than as a function call address. */ + continue; + } + + /* Create a code label, and put it in a REG_LABEL note on + the insn which sets the register, and on each call insn + which uses the register. In final_prescan_insn we look + for the REG_LABEL notes, and output the appropriate label + or pseudo-op. */ + + label = gen_label_rtx (); + REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label, + REG_NOTES (link)); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label, + REG_NOTES (insn)); + if (rescan) + { + scan = link; + do + { + rtx reg2; + + scan = NEXT_INSN (scan); + if (scan != insn + && ((GET_CODE (scan) == CALL_INSN + && reg_mentioned_p (reg, scan)) + || ((reg2 = sfunc_uses_reg (scan)) + && REGNO (reg2) == REGNO (reg)))) + REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL, + label, REG_NOTES (scan)); + } + while (scan != dies); + } + } + } + + if (TARGET_SH2) + fixup_addr_diff_vecs (first); + + if (optimize) + { + mdep_reorg_phase = SH_SHORTEN_BRANCHES0; + shorten_branches (first); + } + /* Scan the function looking for move instructions which have to be + changed to pc-relative loads and insert the literal tables. */ + + mdep_reorg_phase = SH_FIXUP_PCLOAD; + for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) + { + if (mova_p (insn)) + { + if (! num_mova++) + mova = insn; + } + else if (GET_CODE (insn) == JUMP_INSN + && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + && num_mova) + { + rtx scan; + int total; + + num_mova--; + + /* Some code might have been inserted between the mova and + its ADDR_DIFF_VEC. Check if the mova is still in range. */ + for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) + total += get_attr_length (scan); + + /* range of mova is 1020, add 4 because pc counts from address of + second instruction after this one, subtract 2 in case pc is 2 + byte aligned. Possible alignment needed for the ADDR_DIFF_VEC + cancels out with alignment effects of the mova itself. */ + if (total > 1022) + { + /* Change the mova into a load, and restart scanning + there. broken_move will then return true for mova. */ + SET_SRC (PATTERN (mova)) + = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); + INSN_CODE (mova) = -1; + insn = mova; + } + } + if (broken_move (insn)) + { + rtx scan; + /* Scan ahead looking for a barrier to stick the constant table + behind. */ + rtx barrier = find_barrier (num_mova, mova, insn); + rtx last_float_move, last_float = 0, *last_float_addr; + + if (num_mova && ! mova_p (mova)) + { + /* find_barrier had to change the first mova into a + pcload; thus, we have to start with this new pcload. */ + insn = mova; + num_mova = 0; + } + /* Now find all the moves between the points and modify them. */ + for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) + { + if (GET_CODE (scan) == CODE_LABEL) + last_float = 0; + if (broken_move (scan)) + { + rtx *patp = &PATTERN (scan), pat = *patp; + rtx src, dst; + rtx lab; + rtx newinsn; + rtx newsrc; + enum machine_mode mode; + + if (GET_CODE (pat) == PARALLEL) + patp = &XVECEXP (pat, 0, 0), pat = *patp; + src = SET_SRC (pat); + dst = SET_DEST (pat); + mode = GET_MODE (dst); + + if (mode == SImode && hi_const (src) + && REGNO (dst) != FPUL_REG) + { + int offset = 0; + + mode = HImode; + while (GET_CODE (dst) == SUBREG) + { + offset += SUBREG_WORD (dst); + dst = SUBREG_REG (dst); + } + dst = gen_rtx (REG, HImode, REGNO (dst) + offset); + } + + if (GET_CODE (dst) == REG + && ((REGNO (dst) >= FIRST_FP_REG + && REGNO (dst) <= LAST_XD_REG) + || REGNO (dst) == FPUL_REG)) + { + if (last_float + && reg_set_between_p (r0_rtx, last_float_move, scan)) + last_float = 0; + lab = add_constant (src, mode, last_float); + if (lab) + emit_insn_before (gen_mova (lab), scan); + else + *last_float_addr = r0_inc_rtx; + last_float_move = scan; + last_float = src; + newsrc = gen_rtx (MEM, mode, + ((TARGET_SH4 && ! TARGET_FMOVD + || REGNO (dst) == FPUL_REG) + ? r0_inc_rtx + : r0_rtx)); + last_float_addr = &XEXP (newsrc, 0); + } + else + { + lab = add_constant (src, mode, 0); + newsrc = gen_rtx (MEM, mode, + gen_rtx (LABEL_REF, VOIDmode, lab)); + } + RTX_UNCHANGING_P (newsrc) = 1; + *patp = gen_rtx (SET, VOIDmode, dst, newsrc); + INSN_CODE (scan) = -1; + } + } + dump_table (barrier); + insn = barrier; + } + } + + mdep_reorg_phase = SH_SHORTEN_BRANCHES1; + insn_addresses = 0; + split_branches (first); + + /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it + also has an effect on the register that holds the addres of the sfunc. + Insert an extra dummy insn in front of each sfunc that pretends to + use this register. */ + if (flag_delayed_branch) + { + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + rtx reg = sfunc_uses_reg (insn); + + if (! reg) + continue; + emit_insn_before (gen_use_sfunc_addr (reg), insn); + } + } +#if 0 + /* fpscr is not actually a user variable, but we pretend it is for the + sake of the previous optimization passes, since we want it handled like + one. However, we don't have eny debugging information for it, so turn + it into a non-user variable now. */ + if (TARGET_SH4) + REG_USERVAR_P (get_fpscr_rtx ()) = 0; +#endif + if (optimize) + sh_flag_remove_dead_before_cse = 1; + mdep_reorg_phase = SH_AFTER_MDEP_REORG; +} + +int +get_dest_uid (label, max_uid) + rtx label; + int max_uid; +{ + rtx dest = next_real_insn (label); + int dest_uid; + if (! dest) + /* This can happen for an undefined label. */ + return 0; + dest_uid = INSN_UID (dest); + /* If this is a newly created branch redirection blocking instruction, + we cannot index the branch_uid or insn_addresses arrays with its + uid. But then, we won't need to, because the actual destination is + the following branch. */ + while (dest_uid >= max_uid) + { + dest = NEXT_INSN (dest); + dest_uid = INSN_UID (dest); + } + if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN) + return 0; + return dest_uid; +} + +/* Split condbranches that are out of range. Also add clobbers for + scratch registers that are needed in far jumps. + We do this before delay slot scheduling, so that it can take our + newly created instructions into account. It also allows us to + find branches with common targets more easily. */ + +static void +split_branches (first) + rtx first; +{ + rtx insn; + struct far_branch **uid_branch, *far_branch_list = 0; + int max_uid = get_max_uid (); + + /* Find out which branches are out of range. */ + shorten_branches (first); + + uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); + bzero ((char *) uid_branch, max_uid * sizeof *uid_branch); + + for (insn = first; insn; insn = NEXT_INSN (insn)) + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + else if (INSN_DELETED_P (insn)) + { + /* Shorten_branches would split this instruction again, + so transform it into a note. */ + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + } + else if (GET_CODE (insn) == JUMP_INSN + /* Don't mess with ADDR_DIFF_VEC */ + && (GET_CODE (PATTERN (insn)) == SET + || GET_CODE (PATTERN (insn)) == RETURN)) + { + enum attr_type type = get_attr_type (insn); + if (type == TYPE_CBRANCH) + { + rtx next, beyond; + + if (get_attr_length (insn) > 4) + { + rtx src = SET_SRC (PATTERN (insn)); + rtx cond = XEXP (src, 0); + rtx olabel = XEXP (XEXP (src, 1), 0); + rtx jump; + int addr = insn_addresses[INSN_UID (insn)]; + rtx label = 0; + int dest_uid = get_dest_uid (olabel, max_uid); + struct far_branch *bp = uid_branch[dest_uid]; + + /* redirect_jump needs a valid JUMP_LABEL, and it might delete + the label if the LABEL_NUSES count drops to zero. There is + always a jump_optimize pass that sets these values, but it + proceeds to delete unreferenced code, and then if not + optimizing, to un-delete the deleted instructions, thus + leaving labels with too low uses counts. */ + if (! optimize) + { + JUMP_LABEL (insn) = olabel; + LABEL_NUSES (olabel)++; + } + if (! bp) + { + bp = (struct far_branch *) alloca (sizeof *bp); + uid_branch[dest_uid] = bp; + bp->prev = far_branch_list; + far_branch_list = bp; + bp->far_label + = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); + LABEL_NUSES (bp->far_label)++; + } + else + { + label = bp->near_label; + if (! label && bp->address - addr >= CONDJUMP_MIN) + { + rtx block = bp->insert_place; + + if (GET_CODE (PATTERN (block)) == RETURN) + block = PREV_INSN (block); + else + block = gen_block_redirect (block, + bp->address, 2); + label = emit_label_after (gen_label_rtx (), + PREV_INSN (block)); + bp->near_label = label; + } + else if (label && ! NEXT_INSN (label)) + if (addr + 2 - bp->address <= CONDJUMP_MAX) + bp->insert_place = insn; + else + gen_far_branch (bp); + } + if (! label + || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN) + { + bp->near_label = label = gen_label_rtx (); + bp->insert_place = insn; + bp->address = addr; + } + if (! redirect_jump (insn, label)) + abort (); + } + else + { + /* get_attr_length (insn) == 2 */ + /* Check if we have a pattern where reorg wants to redirect + the branch to a label from an unconditional branch that + is too far away. */ + /* We can't use JUMP_LABEL here because it might be undefined + when not optimizing. */ + /* A syntax error might cause beyond to be NULL_RTX. */ + beyond + = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), + 0)); + + if (beyond + && (GET_CODE (beyond) == JUMP_INSN + || (GET_CODE (beyond = next_active_insn (beyond)) + == JUMP_INSN)) + && GET_CODE (PATTERN (beyond)) == SET + && recog_memoized (beyond) == CODE_FOR_jump + && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))] + - insn_addresses[INSN_UID (insn)] + 252U) + > 252 + 258 + 2)) + gen_block_redirect (beyond, + insn_addresses[INSN_UID (beyond)], 1); + } + + next = next_active_insn (insn); + + if ((GET_CODE (next) == JUMP_INSN + || GET_CODE (next = next_active_insn (next)) == JUMP_INSN) + && GET_CODE (PATTERN (next)) == SET + && recog_memoized (next) == CODE_FOR_jump + && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))] + - insn_addresses[INSN_UID (insn)] + 252U) + > 252 + 258 + 2)) + gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1); + } + else if (type == TYPE_JUMP || type == TYPE_RETURN) + { + int addr = insn_addresses[INSN_UID (insn)]; + rtx far_label = 0; + int dest_uid = 0; + struct far_branch *bp; + + if (type == TYPE_JUMP) + { + far_label = XEXP (SET_SRC (PATTERN (insn)), 0); + dest_uid = get_dest_uid (far_label, max_uid); + if (! dest_uid) + { + /* Parse errors can lead to labels outside + the insn stream. */ + if (! NEXT_INSN (far_label)) + continue; + + if (! optimize) + { + JUMP_LABEL (insn) = far_label; + LABEL_NUSES (far_label)++; + } + redirect_jump (insn, NULL_RTX); + far_label = 0; + } + } + bp = uid_branch[dest_uid]; + if (! bp) + { + bp = (struct far_branch *) alloca (sizeof *bp); + uid_branch[dest_uid] = bp; + bp->prev = far_branch_list; + far_branch_list = bp; + bp->near_label = 0; + bp->far_label = far_label; + if (far_label) + LABEL_NUSES (far_label)++; + } + else if (bp->near_label && ! NEXT_INSN (bp->near_label)) + if (addr - bp->address <= CONDJUMP_MAX) + emit_label_after (bp->near_label, PREV_INSN (insn)); + else + { + gen_far_branch (bp); + bp->near_label = 0; + } + else + bp->near_label = 0; + bp->address = addr; + bp->insert_place = insn; + if (! far_label) + emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); + else + gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); + } + } + /* Generate all pending far branches, + and free our references to the far labels. */ + while (far_branch_list) + { + if (far_branch_list->near_label + && ! NEXT_INSN (far_branch_list->near_label)) + gen_far_branch (far_branch_list); + if (optimize + && far_branch_list->far_label + && ! --LABEL_NUSES (far_branch_list->far_label)) + delete_insn (far_branch_list->far_label); + far_branch_list = far_branch_list->prev; + } + + /* Instruction length information is no longer valid due to the new + instructions that have been generated. */ + init_insn_lengths (); +} + +/* Dump out instruction addresses, which is useful for debugging the + constant pool table stuff. + + If relaxing, output the label and pseudo-ops used to link together + calls and the instruction which set the registers. */ + +/* ??? This is unnecessary, and probably should be deleted. This makes + the insn_addresses declaration above unnecessary. */ + +/* ??? The addresses printed by this routine for insns are nonsense for + insns which are inside of a sequence where none of the inner insns have + variable length. This is because the second pass of shorten_branches + does not bother to update them. */ + +void +final_prescan_insn (insn, opvec, noperands) + rtx insn; + rtx *opvec; + int noperands; +{ + if (TARGET_DUMPISIZE) + fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]); + + if (TARGET_RELAX) + { + rtx note; + + note = find_reg_note (insn, REG_LABEL, NULL_RTX); + if (note) + { + rtx pattern; + + pattern = PATTERN (insn); + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + if (GET_CODE (pattern) == CALL + || (GET_CODE (pattern) == SET + && (GET_CODE (SET_SRC (pattern)) == CALL + || get_attr_type (insn) == TYPE_SFUNC))) + asm_fprintf (asm_out_file, "\t.uses %LL%d\n", + CODE_LABEL_NUMBER (XEXP (note, 0))); + else if (GET_CODE (pattern) == SET) + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (XEXP (note, 0))); + else + abort (); + } + } +} + +/* Dump out any constants accumulated in the final pass. These will + only be labels. */ + +char * +output_jump_label_table () +{ + int i; + + if (pool_size) + { + fprintf (asm_out_file, "\t.align 2\n"); + for (i = 0; i < pool_size; i++) + { + pool_node *p = &pool_vector[i]; + + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (p->label)); + output_asm_insn (".long %O0", &p->value); + } + pool_size = 0; + } + + return ""; +} + +/* A full frame looks like: + + arg-5 + arg-4 + [ if current_function_anonymous_args + arg-3 + arg-2 + arg-1 + arg-0 ] + saved-fp + saved-r10 + saved-r11 + saved-r12 + saved-pr + local-n + .. + local-1 + local-0 <- fp points here. */ + +/* Number of bytes pushed for anonymous args, used to pass information + between expand_prologue and expand_epilogue. */ + +static int extra_push; + +/* Adjust the stack by SIZE bytes. REG holds the rtl of the register + to be adjusted, and TEMP, if nonnegative, holds the register number + of a general register that we may clobber. */ + +static void +output_stack_adjust (size, reg, temp) + int size; + rtx reg; + int temp; +{ + if (size) + { + if (CONST_OK_FOR_I (size)) + emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); + /* Try to do it with two partial adjustments; however, we must make + sure that the stack is properly aligned at all times, in case + an interrupt occurs between the two partial adjustments. */ + else if (CONST_OK_FOR_I (size / 2 & -4) + && CONST_OK_FOR_I (size - (size / 2 & -4))) + { + emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4))); + emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4)))); + } + else + { + rtx const_reg; + + /* If TEMP is invalid, we could temporarily save a general + register to MACL. However, there is currently no need + to handle this case, so just abort when we see it. */ + if (temp < 0) + abort (); + const_reg = gen_rtx (REG, SImode, temp); + + /* If SIZE is negative, subtract the positive value. + This sometimes allows a constant pool entry to be shared + between prologue and epilogue code. */ + if (size < 0) + { + emit_insn (gen_movsi (const_reg, GEN_INT (-size))); + emit_insn (gen_subsi3 (reg, reg, const_reg)); + } + else + { + emit_insn (gen_movsi (const_reg, GEN_INT (size))); + emit_insn (gen_addsi3 (reg, reg, const_reg)); + } + } + } +} + +/* Output RTL to push register RN onto the stack. */ + +static void +push (rn) + int rn; +{ + rtx x; + if (rn == FPUL_REG) + x = gen_push_fpul (); + else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE + && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) + { + if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) + return; + x = gen_push_4 (gen_rtx (REG, DFmode, rn)); + } + else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) + x = gen_push_e (gen_rtx (REG, SFmode, rn)); + else + x = gen_push (gen_rtx (REG, SImode, rn)); + + x = emit_insn (x); + REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, + gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0); +} + +/* Output RTL to pop register RN from the stack. */ + +static void +pop (rn) + int rn; +{ + rtx x; + if (rn == FPUL_REG) + x = gen_pop_fpul (); + else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE + && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) + { + if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) + return; + x = gen_pop_4 (gen_rtx (REG, DFmode, rn)); + } + else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) + x = gen_pop_e (gen_rtx (REG, SFmode, rn)); + else + x = gen_pop (gen_rtx (REG, SImode, rn)); + + x = emit_insn (x); + REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC, + gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0); +} + +/* Generate code to push the regs specified in the mask. */ + +static void +push_regs (mask, mask2) + int mask, mask2; +{ + int i; + + /* Push PR last; this gives better latencies after the prologue, and + candidates for the return delay slot when there are no general + registers pushed. */ + for (i = 0; i < 32; i++) + if (mask & (1 << i) && i != PR_REG) + push (i); + for (i = 32; i < FIRST_PSEUDO_REGISTER; i++) + if (mask2 & (1 << (i - 32))) + push (i); + if (mask & (1 << PR_REG)) + push (PR_REG); +} + +/* Work out the registers which need to be saved, both as a mask and a + count of saved words. + + If doing a pragma interrupt function, then push all regs used by the + function, and if we call another function (we can tell by looking at PR), + make sure that all the regs it clobbers are safe too. */ + +static int +calc_live_regs (count_ptr, live_regs_mask2) + int *count_ptr; + int *live_regs_mask2; +{ + int reg; + int live_regs_mask = 0; + int count; + int interrupt_handler; + + if ((lookup_attribute + ("interrupt_handler", + DECL_MACHINE_ATTRIBUTES (current_function_decl))) + != NULL_TREE) + interrupt_handler = 1; + else + interrupt_handler = 0; + + *live_regs_mask2 = 0; + /* If we can save a lot of saves by switching to double mode, do that. */ + if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE) + for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) + if (regs_ever_live[reg] && regs_ever_live[reg+1] + && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa)) + && ++count > 2) + { + target_flags &= ~FPU_SINGLE_BIT; + break; + } + for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--) + { + if ((interrupt_handler && ! pragma_trapa) + ? (/* Need to save all the regs ever live. */ + (regs_ever_live[reg] + || (call_used_regs[reg] + && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG) + && regs_ever_live[PR_REG])) + && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM + && reg != RETURN_ADDRESS_POINTER_REGNUM + && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG) + : (/* Only push those regs which are used and need to be saved. */ + regs_ever_live[reg] && ! call_used_regs[reg])) + { + if (reg >= 32) + *live_regs_mask2 |= 1 << (reg - 32); + else + live_regs_mask |= 1 << reg; + count++; + if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG) + if (reg <= LAST_FP_REG) + { + if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1]) + { + if (reg >= 32) + *live_regs_mask2 |= 1 << ((reg ^ 1) - 32); + else + live_regs_mask |= 1 << (reg ^ 1); + count++; + } + } + else if (reg <= LAST_XD_REG) + { + /* Must switch to double mode to access these registers. */ + target_flags &= ~FPU_SINGLE_BIT; + count++; + } + } + } + + *count_ptr = count; + return live_regs_mask; +} + +/* Code to generate prologue and epilogue sequences */ + +void +sh_expand_prologue () +{ + int live_regs_mask; + int d, i; + int live_regs_mask2; + int save_flags = target_flags; + int double_align = 0; + + /* We have pretend args if we had an object sent partially in registers + and partially on the stack, e.g. a large structure. */ + output_stack_adjust (-current_function_pretend_args_size, + stack_pointer_rtx, 3); + + extra_push = 0; + + /* This is set by SETUP_VARARGS to indicate that this is a varargs + routine. Clear it here so that the next function isn't affected. */ + if (current_function_anonymous_args) + { + current_function_anonymous_args = 0; + + /* This is not used by the SH3E calling convention */ + if (!TARGET_SH3E) + { + /* Push arg regs as if they'd been provided by caller in stack. */ + for (i = 0; i < NPARM_REGS(SImode); i++) + { + int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; + if (i >= (NPARM_REGS(SImode) + - current_function_args_info.arg_count[(int) SH_ARG_INT] + )) + break; + push (rn); + extra_push += 4; + } + } + } + + /* If we're supposed to switch stacks at function entry, do so now. */ + if (sp_switch) + emit_insn (gen_sp_switch_1 ()); + + live_regs_mask = calc_live_regs (&d, &live_regs_mask2); + /* ??? Maybe we could save some switching if we can move a mode switch + that already happens to be at the function start into the prologue. */ + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + push_regs (live_regs_mask, live_regs_mask2); + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + + if (TARGET_ALIGN_DOUBLE && d & 1) + double_align = 4; + + target_flags = save_flags; + + output_stack_adjust (-get_frame_size () - double_align, + stack_pointer_rtx, 3); + + if (frame_pointer_needed) + emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); +} + +void +sh_expand_epilogue () +{ + int live_regs_mask; + int d, i; + + int live_regs_mask2; + int save_flags = target_flags; + int frame_size = get_frame_size (); + + live_regs_mask = calc_live_regs (&d, &live_regs_mask2); + + if (TARGET_ALIGN_DOUBLE && d & 1) + frame_size += 4; + + if (frame_pointer_needed) + { + output_stack_adjust (frame_size, frame_pointer_rtx, 7); + + /* We must avoid moving the stack pointer adjustment past code + which reads from the local frame, else an interrupt could + occur after the SP adjustment and clobber data in the local + frame. */ + emit_insn (gen_blockage ()); + emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx)); + } + else if (frame_size) + { + /* We must avoid moving the stack pointer adjustment past code + which reads from the local frame, else an interrupt could + occur after the SP adjustment and clobber data in the local + frame. */ + emit_insn (gen_blockage ()); + output_stack_adjust (frame_size, stack_pointer_rtx, 7); + } + + /* Pop all the registers. */ + + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + if (live_regs_mask & (1 << PR_REG)) + pop (PR_REG); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + int j = (FIRST_PSEUDO_REGISTER - 1) - i; + if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG) + pop (j); + else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32)))) + pop (j); + } + if (target_flags != save_flags) + emit_insn (gen_toggle_sz ()); + target_flags = save_flags; + + output_stack_adjust (extra_push + current_function_pretend_args_size, + stack_pointer_rtx, 7); + + /* Switch back to the normal stack if necessary. */ + if (sp_switch) + emit_insn (gen_sp_switch_2 ()); +} + +/* Clear variables at function end. */ + +void +function_epilogue (stream, size) + FILE *stream; + int size; +{ + trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0; + sp_switch = NULL_RTX; +} + +rtx +sh_builtin_saveregs (arglist) + tree arglist; +{ + tree fntype = TREE_TYPE (current_function_decl); + /* First unnamed integer register. */ + int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT]; + /* Number of integer registers we need to save. */ + int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); + /* First unnamed SFmode float reg */ + int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT]; + /* Number of SFmode float regs to save. */ + int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); + int ptrsize = GET_MODE_SIZE (Pmode); + rtx valist, regbuf, fpregs; + int bufsize, regno; + + /* Allocate block of memory for the regs. */ + /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? + Or can assign_stack_local accept a 0 SIZE argument? */ + bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); + + regbuf = assign_stack_local (BLKmode, bufsize, 0); + MEM_SET_IN_STRUCT_P (regbuf, 1); + + /* Save int args. + This is optimized to only save the regs that are necessary. Explicitly + named args need not be saved. */ + if (n_intregs > 0) + move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, + gen_rtx (MEM, BLKmode, + plus_constant (XEXP (regbuf, 0), + n_floatregs * UNITS_PER_WORD)), + n_intregs, n_intregs * UNITS_PER_WORD); + + /* Save float args. + This is optimized to only save the regs that are necessary. Explicitly + named args need not be saved. + We explicitly build a pointer to the buffer because it halves the insn + count when not optimizing (otherwise the pointer is built for each reg + saved). + We emit the moves in reverse order so that we can use predecrement. */ + + fpregs = gen_reg_rtx (Pmode); + emit_move_insn (fpregs, XEXP (regbuf, 0)); + emit_insn (gen_addsi3 (fpregs, fpregs, + GEN_INT (n_floatregs * UNITS_PER_WORD))); + if (TARGET_SH4) + { + for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) + { + emit_insn (gen_addsi3 (fpregs, fpregs, + GEN_INT (-2 * UNITS_PER_WORD))); + emit_move_insn (gen_rtx (MEM, DFmode, fpregs), + gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno)); + } + regno = first_floatreg; + if (regno & 1) + { + emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); + emit_move_insn (gen_rtx (MEM, SFmode, fpregs), + gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno + - (TARGET_LITTLE_ENDIAN != 0))); + } + } + else + for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) + { + emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); + emit_move_insn (gen_rtx (MEM, SFmode, fpregs), + gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno)); + } + + /* Return the address of the regbuf. */ + return XEXP (regbuf, 0); +} + +/* Define the offset between two registers, one to be eliminated, and + the other its replacement, at the start of a routine. */ + +int +initial_elimination_offset (from, to) + int from; + int to; +{ + int regs_saved; + int total_saved_regs_space; + int total_auto_space = get_frame_size (); + int save_flags = target_flags; + + int live_regs_mask, live_regs_mask2; + live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2); + if (TARGET_ALIGN_DOUBLE && regs_saved & 1) + total_auto_space += 4; + target_flags = save_flags; + + total_saved_regs_space = (regs_saved) * 4; + + if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) + return total_saved_regs_space + total_auto_space; + + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return total_saved_regs_space + total_auto_space; + + /* Initial gap between fp and sp is 0. */ + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return 0; + + if (from == RETURN_ADDRESS_POINTER_REGNUM + && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM)) + { + int i, n = total_saved_regs_space; + for (i = PR_REG-1; i >= 0; i--) + if (live_regs_mask & (1 << i)) + n -= 4; + return n + total_auto_space; + } + + abort (); +} + +/* Handle machine specific pragmas to be semi-compatible with Hitachi + compiler. */ + +int +sh_handle_pragma (p_getc, p_ungetc, pname) + int (* p_getc) PROTO((void)); + void (* p_ungetc) PROTO((int)); + char * pname; +{ + int retval = 0; + + if (strcmp (pname, "interrupt") == 0) + pragma_interrupt = retval = 1; + else if (strcmp (pname, "trapa") == 0) + pragma_interrupt = pragma_trapa = retval = 1; + else if (strcmp (pname, "nosave_low_regs") == 0) + pragma_nosave_low_regs = retval = 1; + + return retval; +} + +/* Generate 'handle_interrupt' attribute for decls */ + +void +sh_pragma_insert_attributes (node, attributes, prefix) + tree node; + tree * attributes; + tree * prefix; +{ + tree a; + + if (! pragma_interrupt + || TREE_CODE (node) != FUNCTION_DECL) + return; + + /* We are only interested in fields. */ + if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd') + return; + + /* Add a 'handle_interrupt' attribute. */ + * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes); + + return; +} + +/* Return nonzero if ATTR is a valid attribute for DECL. + ATTRIBUTES are any existing attributes and ARGS are the arguments + supplied with ATTR. + + Supported attributes: + + interrupt_handler -- specifies this function is an interrupt handler. + + sp_switch -- specifies an alternate stack for an interrupt handler + to run on. + + trap_exit -- use a trapa to exit an interrupt function instead of + an rte instruction. */ + +int +sh_valid_machine_decl_attribute (decl, attributes, attr, args) + tree decl; + tree attributes; + tree attr; + tree args; +{ + int retval = 0; + + if (TREE_CODE (decl) != FUNCTION_DECL) + return 0; + + if (is_attribute_p ("interrupt_handler", attr)) + { + return 1; + } + + if (is_attribute_p ("sp_switch", attr)) + { + /* The sp_switch attribute only has meaning for interrupt functions. */ + if (!pragma_interrupt) + return 0; + + /* sp_switch must have an argument. */ + if (!args || TREE_CODE (args) != TREE_LIST) + return 0; + + /* The argument must be a constant string. */ + if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) + return 0; + + sp_switch = gen_rtx (SYMBOL_REF, VOIDmode, + TREE_STRING_POINTER (TREE_VALUE (args))); + return 1; + } + + if (is_attribute_p ("trap_exit", attr)) + { + /* The trap_exit attribute only has meaning for interrupt functions. */ + if (!pragma_interrupt) + return 0; + + /* trap_exit must have an argument. */ + if (!args || TREE_CODE (args) != TREE_LIST) + return 0; + + /* The argument must be a constant integer. */ + if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) + return 0; + + trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args)); + return 1; + } +} + + +/* Predicates used by the templates. */ + +/* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx. + Used only in general_movsrc_operand. */ + +int +system_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (REGNO (op)) + { + case PR_REG: + case MACL_REG: + case MACH_REG: + return 1; + } + return 0; +} + +/* Returns 1 if OP can be source of a simple move operation. + Same as general_operand, but a LABEL_REF is valid, PRE_DEC is + invalid as are subregs of system registers. */ + +int +general_movsrc_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == MEM) + { + rtx inside = XEXP (op, 0); + if (GET_CODE (inside) == CONST) + inside = XEXP (inside, 0); + + if (GET_CODE (inside) == LABEL_REF) + return 1; + + if (GET_CODE (inside) == PLUS + && GET_CODE (XEXP (inside, 0)) == LABEL_REF + && GET_CODE (XEXP (inside, 1)) == CONST_INT) + return 1; + + /* Only post inc allowed. */ + if (GET_CODE (inside) == PRE_DEC) + return 0; + } + + if ((mode == QImode || mode == HImode) + && (GET_CODE (op) == SUBREG + && GET_CODE (XEXP (op, 0)) == REG + && system_reg_operand (XEXP (op, 0), mode))) + return 0; + + return general_operand (op, mode); +} + +/* Returns 1 if OP can be a destination of a move. + Same as general_operand, but no preinc allowed. */ + +int +general_movdst_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + /* Only pre dec allowed. */ + if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC) + return 0; + + return general_operand (op, mode); +} + +/* Returns 1 if OP is a normal arithmetic register. */ + +int +arith_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + { + int regno; + + if (GET_CODE (op) == REG) + regno = REGNO (op); + else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) + regno = REGNO (SUBREG_REG (op)); + else + return 1; + + return (regno != T_REG && regno != PR_REG + && (regno != FPUL_REG || TARGET_SH4) + && regno != MACH_REG && regno != MACL_REG); + } + return 0; +} + +int +fp_arith_reg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (register_operand (op, mode)) + { + int regno; + + if (GET_CODE (op) == REG) + regno = REGNO (op); + else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) + regno = REGNO (SUBREG_REG (op)); + else + return 1; + + return (regno != T_REG && regno != PR_REG && regno > 15 + && regno != MACH_REG && regno != MACL_REG); + } + return 0; +} + +int +fp_extended_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode) + { + op = XEXP (op, 0); + mode = GET_MODE (op); + } + return fp_arith_reg_operand (op, mode); +} + +/* Returns 1 if OP is a valid source operand for an arithmetic insn. */ + +int +arith_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) + return 1; + + return 0; +} + +/* Returns 1 if OP is a valid source operand for a compare insn. */ + +int +arith_reg_or_0_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op))) + return 1; + + return 0; +} + +/* Returns 1 if OP is a valid source operand for a logical operation. */ + +int +logical_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (arith_reg_operand (op, mode)) + return 1; + + if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) + return 1; + + return 0; +} + +/* Nonzero if OP is a floating point value with value 0.0. */ + +int +fp_zero_operand (op) + rtx op; +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (op) != SFmode) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); +} + +/* Nonzero if OP is a floating point value with value 1.0. */ + +int +fp_one_operand (op) + rtx op; +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (op) != SFmode) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + return REAL_VALUES_EQUAL (r, dconst1); +} + +int +tertiary_reload_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE); +} + +int +fpscr_operand (op) + rtx op; +{ + return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG + && GET_MODE (op) == PSImode); +} + +int +commutative_float_operator (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_MODE (op) != mode) + return 0; + switch (GET_CODE (op)) + { + case PLUS: + case MULT: + return 1; + } + return 0; +} + +int +noncommutative_float_operator (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_MODE (op) != mode) + return 0; + switch (GET_CODE (op)) + { + case MINUS: + case DIV: + return 1; + } + return 0; +} + +int +binary_float_operator (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_MODE (op) != mode) + return 0; + switch (GET_CODE (op)) + { + case PLUS: + case MINUS: + case MULT: + case DIV: + return 1; + } + return 0; +} + +/* Return the destination address of a branch. */ + +int +branch_dest (branch) + rtx branch; +{ + rtx dest = SET_SRC (PATTERN (branch)); + int dest_uid; + + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, 1); + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + return insn_addresses[dest_uid]; +} + +/* Return non-zero if REG is not used after INSN. + We assume REG is a reload reg, and therefore does + not live past labels. It may live past calls or jumps though. */ +int +reg_unused_after (reg, insn) + rtx reg; + rtx insn; +{ + enum rtx_code code; + rtx set; + + /* If the reg is set by this instruction, then it is safe for our + case. Disregard the case where this is a store to memory, since + we are checking a register used in the store address. */ + set = single_set (insn); + if (set && GET_CODE (SET_DEST (set)) != MEM + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; + + while (insn = NEXT_INSN (insn)) + { + code = GET_CODE (insn); + +#if 0 + /* If this is a label that existed before reload, then the register + if dead here. However, if this is a label added by reorg, then + the register may still be live here. We can't tell the difference, + so we just ignore labels completely. */ + if (code == CODE_LABEL) + return 1; + /* else */ +#endif + + if (code == JUMP_INSN) + return 0; + + /* If this is a sequence, we must handle them all at once. + We could have for instance a call that sets the target register, + and a insn in a delay slot that uses the register. In this case, + we must return 0. */ + else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + { + int i; + int retval = 0; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx this_insn = XVECEXP (PATTERN (insn), 0, i); + rtx set = single_set (this_insn); + + if (GET_CODE (this_insn) == CALL_INSN) + code = CALL_INSN; + else if (GET_CODE (this_insn) == JUMP_INSN) + { + if (INSN_ANNULLED_BRANCH_P (this_insn)) + return 0; + code = JUMP_INSN; + } + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + { + if (GET_CODE (SET_DEST (set)) != MEM) + retval = 1; + else + return 0; + } + if (set == 0 + && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) + return 0; + } + if (retval == 1) + return 1; + else if (code == JUMP_INSN) + return 0; + } + else if (GET_RTX_CLASS (code) == 'i') + { + rtx set = single_set (insn); + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return GET_CODE (SET_DEST (set)) != MEM; + if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return 0; + } + + if (code == CALL_INSN && call_used_regs[REGNO (reg)]) + return 1; + } + return 1; +} + +extern struct obstack permanent_obstack; + +rtx +get_fpscr_rtx () +{ + static rtx fpscr_rtx; + + if (! fpscr_rtx) + { + push_obstacks (&permanent_obstack, &permanent_obstack); + fpscr_rtx = gen_rtx (REG, PSImode, 48); + REG_USERVAR_P (fpscr_rtx) = 1; + pop_obstacks (); + mark_user_reg (fpscr_rtx); + } + if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) + mark_user_reg (fpscr_rtx); + return fpscr_rtx; +} + +void +emit_sf_insn (pat) + rtx pat; +{ + rtx addr; + /* When generating reload insns, we must not create new registers. FPSCR + should already have the correct value, so do nothing to change it. */ + if (! TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch0 (addr)); + } + emit_insn (pat); + if (! TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch1 (addr)); + } +} + +void +emit_df_insn (pat) + rtx pat; +{ + rtx addr; + if (TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch0 (addr)); + } + emit_insn (pat); + if (TARGET_FPU_SINGLE && ! reload_in_progress) + { + addr = gen_reg_rtx (SImode); + emit_insn (gen_fpu_switch1 (addr)); + } +} + +void +expand_sf_unop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); +} + +void +expand_sf_binop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_sf_insn ((*fun) (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); +} + +void +expand_df_unop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); +} + +void +expand_df_binop (fun, operands) + rtx (*fun)(); + rtx *operands; +{ + emit_df_insn ((*fun) (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); +} + +void +expand_fp_branch (compare, branch) + rtx (*compare) (), (*branch) (); +{ + (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn) + ((*compare) ()); + emit_jump_insn ((*branch) ()); +} + +/* We don't want to make fpscr call-saved, because that would prevent + channging it, and it would also cost an exstra instruction to save it. + We don't want it to be known as a global register either, because + that disables all flow analysis. But it has to be live at the function + return. Thus, we need to insert a USE at the end of the function. */ +/* This should best be called at about the time FINALIZE_PIC is called, + but not dependent on flag_pic. Alas, there is no suitable hook there, + so this gets called from HAVE_RETURN. */ +int +emit_fpscr_use () +{ + static int fpscr_uses = 0; + + if (rtx_equal_function_value_matters) + { + emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ())); + fpscr_uses++; + } + else + { + if (fpscr_uses > 1) + { + /* Due to he crude way we emit the USEs, we might end up with + some extra ones. Delete all but the last one. */ + rtx insn; + + for (insn = get_last_insn(); insn; insn = PREV_INSN (insn)) + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == USE + && GET_CODE (XEXP (PATTERN (insn), 0)) == REG + && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) + { + insn = PREV_INSN (insn); + break; + } + for (; insn; insn = PREV_INSN (insn)) + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == USE + && GET_CODE (XEXP (PATTERN (insn), 0)) == REG + && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) + { + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + } + } + fpscr_uses = 0; + } +} + +/* ??? gcc does flow analysis strictly after common subexpression + elimination. As a result, common subespression elimination fails + when there are some intervening statements setting the same register. + If we did nothing about this, this would hurt the precision switching + for SH4 badly. There is some cse after reload, but it is unable to + undo the extra register pressure from the unused instructions, and + it cannot remove auto-increment loads. + + A C code example that shows this flow/cse weakness for (at least) SH + and sparc (as of gcc ss-970706) is this: + +double +f(double a) +{ + double d; + d = 0.1; + a += d; + d = 1.1; + d = 0.1; + a *= d; + return a; +} + + So we add another pass before common subexpression elimination, to + remove assignments that are dead due to a following assignment in the + same basic block. */ + +int sh_flag_remove_dead_before_cse; + +static void +mark_use (x, reg_set_block) + rtx x, *reg_set_block; +{ + enum rtx_code code; + + if (! x) + return; + code = GET_CODE (x); + switch (code) + { + case REG: + { + int regno = REGNO (x); + int nregs = (regno < FIRST_PSEUDO_REGISTER + ? HARD_REGNO_NREGS (regno, GET_MODE (x)) + : 1); + do + { + reg_set_block[regno + nregs - 1] = 0; + } + while (--nregs); + break; + } + case SET: + { + rtx dest = SET_DEST (x); + + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + if (GET_CODE (dest) != REG) + mark_use (dest, reg_set_block); + mark_use (SET_SRC (x), reg_set_block); + break; + } + case CLOBBER: + break; + default: + { + char *fmt = GET_RTX_FORMAT (code); + int i, j; + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + mark_use (XEXP (x, i), reg_set_block); + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + mark_use (XVECEXP (x, i, j), reg_set_block); + } + break; + } + } +} + +int +remove_dead_before_cse () +{ + rtx *reg_set_block, last, last_call, insn, set; + int in_libcall = 0; + + /* This pass should run just once, after rtl generation. */ + + if (! sh_flag_remove_dead_before_cse + || rtx_equal_function_value_matters + || reload_completed) + return; + + sh_flag_remove_dead_before_cse = 0; + + reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx)); + bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx)); + last_call = last = get_last_insn (); + for (insn = last; insn; insn = PREV_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') + continue; + if (GET_CODE (insn) == JUMP_INSN) + { + last_call = last = insn; + continue; + } + set = single_set (insn); + + /* Don't delete parts of libcalls, since that would confuse cse, loop + and flow. */ + if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) + in_libcall = 1; + else if (in_libcall) + { + if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) + in_libcall = 0; + } + else if (set && GET_CODE (SET_DEST (set)) == REG) + { + int regno = REGNO (SET_DEST (set)); + rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno] + ? last_call + : last); + if (reg_set_block[regno] == ref_insn + && (regno >= FIRST_PSEUDO_REGISTER + || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1) + && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn))) + { + PUT_CODE (insn, NOTE); + NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (insn) = 0; + continue; + } + else + reg_set_block[REGNO (SET_DEST (set))] = ref_insn; + } + if (GET_CODE (insn) == CALL_INSN) + { + last_call = insn; + mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block); + } + mark_use (PATTERN (insn), reg_set_block); + } + return 0; +} diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h new file mode 100755 index 0000000..eff316a --- /dev/null +++ b/gcc/config/sh/sh.h @@ -0,0 +1,2232 @@ +/* Definitions of target machine for GNU compiler for Hitachi Super-H. + Copyright (C) 1993-1998 Free Software Foundation, Inc. + Contributed by Steve Chamberlain (sac@cygnus.com). + Improved by Jim Wilson (wilson@cygnus.com). + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#define TARGET_VERSION \ + fputs (" (Hitachi SH)", stderr); + +/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h. We can't + include it here, because hconfig.h is also included by gencodes.c . */ +extern int code_for_indirect_jump_scratch; + +/* Generate SDB debugging information. */ + +#define SDB_DEBUGGING_INFO + +/* Output DBX (stabs) debugging information if doing -gstabs. */ + +#include "dbxcoff.h" + +#define SDB_DELIM ";" + +#define CPP_SPEC "%{ml:-D__LITTLE_ENDIAN__} \ +%{m1:-D__sh1__} \ +%{m2:-D__sh2__} \ +%{m3:-D__sh3__} \ +%{m3e:-D__SH3E__} \ +%{m4-single-only:-D__SH4_SINGLE_ONLY__} \ +%{m4-single:-D__SH4_SINGLE__} \ +%{m4:-D__SH4__} \ +%{!m1:%{!m2:%{!m3:%{!m3e:%{!m4:%{!m4-single:%{!m4-single-only:-D__sh1__}}}}}}}" + +#define CPP_PREDEFINES "-D__sh__ -Acpu(sh) -Amachine(sh)" + +#define ASM_SPEC "%{ml:-little} %{mrelax:-relax}" + +#define LINK_SPEC "%{ml:-m shl} %{mrelax:-relax}" + +/* We can not debug without a frame pointer. */ +/* #define CAN_DEBUG_WITHOUT_FP */ + +#define CONDITIONAL_REGISTER_USAGE \ + if (! TARGET_SH4 || ! TARGET_FMOVD) \ + { \ + int regno; \ + for (regno = FIRST_XD_REG; regno <= LAST_XD_REG; regno++) \ + fixed_regs[regno] = call_used_regs[regno] = 1; \ + if (! TARGET_SH4) \ + { \ + if (! TARGET_SH3E) \ + { \ + int regno; \ + for (regno = FIRST_FP_REG; regno <= LAST_FP_REG; regno++) \ + fixed_regs[regno] = call_used_regs[regno] = 1; \ + fixed_regs[FPUL_REG] = call_used_regs[FPUL_REG] = 1; \ + } \ + } \ + } \ + /* Hitachi saves and restores mac registers on call. */ \ + if (TARGET_HITACHI) \ + { \ + call_used_regs[MACH_REG] = 0; \ + call_used_regs[MACL_REG] = 0; \ + } + +/* ??? Need to write documentation for all SH options and add it to the + invoke.texi file. */ + +/* Run-time compilation parameters selecting different hardware subsets. */ + +extern int target_flags; +#define ISIZE_BIT (1<<1) +#define DALIGN_BIT (1<<6) +#define SH1_BIT (1<<8) +#define SH2_BIT (1<<9) +#define SH3_BIT (1<<10) +#define SH3E_BIT (1<<11) +#define HARD_SH4_BIT (1<<5) +#define FPU_SINGLE_BIT (1<<7) +#define SH4_BIT (1<<12) +#define FMOVD_BIT (1<<4) +#define SPACE_BIT (1<<13) +#define BIGTABLE_BIT (1<<14) +#define RELAX_BIT (1<<15) +#define HITACHI_BIT (1<<22) +#define PADSTRUCT_BIT (1<<28) +#define LITTLE_ENDIAN_BIT (1<<29) +#define IEEE_BIT (1<<30) + +/* Nonzero if we should dump out instruction size info. */ +#define TARGET_DUMPISIZE (target_flags & ISIZE_BIT) + +/* Nonzero to align doubles on 64 bit boundaries. */ +#define TARGET_ALIGN_DOUBLE (target_flags & DALIGN_BIT) + +/* Nonzero if we should generate code using type 1 insns. */ +#define TARGET_SH1 (target_flags & SH1_BIT) + +/* Nonzero if we should generate code using type 2 insns. */ +#define TARGET_SH2 (target_flags & SH2_BIT) + +/* Nonzero if we should generate code using type 3 insns. */ +#define TARGET_SH3 (target_flags & SH3_BIT) + +/* Nonzero if we should generate code using type 3E insns. */ +#define TARGET_SH3E (target_flags & SH3E_BIT) + +/* Nonzero if the cache line size is 32. */ +#define TARGET_CACHE32 (target_flags & HARD_SH4_BIT) + +/* Nonzero if we schedule for a superscalar implementation. */ +#define TARGET_SUPERSCALAR (target_flags & HARD_SH4_BIT) + +/* Nonzero if the target has separate instruction and data caches. */ +#define TARGET_HARVARD (target_flags & HARD_SH4_BIT) + +/* Nonzero if compiling for SH4 hardware (to be used for insn costs etc.) */ +#define TARGET_HARD_SH4 (target_flags & HARD_SH4_BIT) + +/* Nonzero if the default precision of th FPU is single */ +#define TARGET_FPU_SINGLE (target_flags & FPU_SINGLE_BIT) + +/* Nonzero if we should generate code using type 4 insns. */ +#define TARGET_SH4 (target_flags & SH4_BIT) + +/* Nonzero if we should generate fmovd. */ +#define TARGET_FMOVD (target_flags & FMOVD_BIT) + +/* Nonzero if we respect NANs. */ +#define TARGET_IEEE (target_flags & IEEE_BIT) + +/* Nonzero if we should generate smaller code rather than faster code. */ +#define TARGET_SMALLCODE (target_flags & SPACE_BIT) + +/* Nonzero to use long jump tables. */ +#define TARGET_BIGTABLE (target_flags & BIGTABLE_BIT) + +/* Nonzero to generate pseudo-ops needed by the assembler and linker + to do function call relaxing. */ +#define TARGET_RELAX (target_flags & RELAX_BIT) + +/* Nonzero if using Hitachi's calling convention. */ +#define TARGET_HITACHI (target_flags & HITACHI_BIT) + +/* Nonzero if padding structures to a multiple of 4 bytes. This is + incompatible with Hitachi's compiler, and gives unusual structure layouts + which confuse programmers. + ??? This option is not useful, but is retained in case there are people + who are still relying on it. It may be deleted in the future. */ +#define TARGET_PADSTRUCT (target_flags & PADSTRUCT_BIT) + +/* Nonzero if generating code for a little endian SH. */ +#define TARGET_LITTLE_ENDIAN (target_flags & LITTLE_ENDIAN_BIT) + +#define TARGET_SWITCHES \ +{ {"1", SH1_BIT}, \ + {"2", SH2_BIT}, \ + {"3", SH3_BIT|SH2_BIT}, \ + {"3e", SH3E_BIT|SH3_BIT|SH2_BIT|FPU_SINGLE_BIT}, \ + {"4-single-only", SH3E_BIT|SH3_BIT|SH2_BIT|SH3E_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT}, \ + {"4-single", SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT},\ + {"4", SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT}, \ + {"b", -LITTLE_ENDIAN_BIT}, \ + {"bigtable", BIGTABLE_BIT}, \ + {"dalign", DALIGN_BIT}, \ + {"fmovd", FMOVD_BIT}, \ + {"hitachi", HITACHI_BIT}, \ + {"ieee", IEEE_BIT}, \ + {"isize", ISIZE_BIT}, \ + {"l", LITTLE_ENDIAN_BIT}, \ + {"no-ieee", -IEEE_BIT}, \ + {"padstruct", PADSTRUCT_BIT}, \ + {"relax", RELAX_BIT}, \ + {"space", SPACE_BIT}, \ + SUBTARGET_SWITCHES \ + {"", TARGET_DEFAULT} \ +} + +/* This are meant to be redefined in the host dependent files */ +#define SUBTARGET_SWITCHES + +#define TARGET_DEFAULT (0) + +#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) \ +do { \ + if (LEVEL) \ + flag_omit_frame_pointer = -1; \ + if (LEVEL) \ + sh_flag_remove_dead_before_cse = 1; \ + if (SIZE) \ + target_flags |= SPACE_BIT; \ +} while (0) + +#define ASSEMBLER_DIALECT assembler_dialect + +extern int assembler_dialect; + +#define OVERRIDE_OPTIONS \ +do { \ + sh_cpu = CPU_SH1; \ + assembler_dialect = 0; \ + if (TARGET_SH2) \ + sh_cpu = CPU_SH2; \ + if (TARGET_SH3) \ + sh_cpu = CPU_SH3; \ + if (TARGET_SH3E) \ + sh_cpu = CPU_SH3E; \ + if (TARGET_SH4) \ + { \ + assembler_dialect = 1; \ + sh_cpu = CPU_SH4; \ + } \ + if (! TARGET_SH4 || ! TARGET_FMOVD) \ + { \ + /* Prevent usage of explicit register names for variables \ + for registers not present / not addressable in the \ + target architecture. */ \ + int regno; \ + for (regno = (TARGET_SH3E) ? 17 : 0; \ + regno <= 24; regno++) \ + fp_reg_names[regno][0] = 0; \ + } \ + if (flag_omit_frame_pointer < 0) \ + /* The debugging information is sufficient, \ + but gdb doesn't implement this yet */ \ + if (0) \ + flag_omit_frame_pointer \ + = (PREFERRED_DEBUGGING_TYPE == DWARF_DEBUG \ + || PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG); \ + else \ + flag_omit_frame_pointer = 0; \ + \ + /* Never run scheduling before reload, since that can \ + break global alloc, and generates slower code anyway due \ + to the pressure on R0. */ \ + flag_schedule_insns = 0; \ + sh_addr_diff_vec_mode = TARGET_BIGTABLE ? SImode : HImode; \ +} while (0) + +/* Target machine storage layout. */ + +/* Define to use software floating point emulator for REAL_ARITHMETIC and + decimal <-> binary conversion. */ +#define REAL_ARITHMETIC + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ + +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0) + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0) + +/* Define this to set the endianness to use in libgcc2.c, which can + not depend on target_flags. */ +#if defined(__LITTLE_ENDIAN__) +#define LIBGCC2_WORDS_BIG_ENDIAN 0 +#else +#define LIBGCC2_WORDS_BIG_ENDIAN 1 +#endif + +/* Number of bits in an addressable storage unit. */ +#define BITS_PER_UNIT 8 + +/* Width in bits of a "word", which is the contents of a machine register. + Note that this is not necessarily the width of data type `int'; + if using 16-bit ints on a 68000, this would still be 32. + But on a machine with 16-bit registers, this would be 16. */ +#define BITS_PER_WORD 32 +#define MAX_BITS_PER_WORD 32 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +#define POINTER_SIZE 32 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY BIGGEST_ALIGNMENT + +/* The log (base 2) of the cache line size, in bytes. Processors prior to + SH3 have no actual cache, but they fetch code in chunks of 4 bytes. */ +#define CACHE_LOG (TARGET_CACHE32 ? 5 : TARGET_SH3 ? 4 : 2) + +/* Allocation boundary (in *bits*) for the code of a function. + 32 bit alignment is faster, because instructions are always fetched as a + pair from a longword boundary. */ +#define FUNCTION_BOUNDARY (TARGET_SMALLCODE ? 16 : (1 << CACHE_LOG) * 8) + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32) + +/* The best alignment to use in cases where we have a choice. */ +#define FASTEST_ALIGNMENT 32 + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT : (ALIGN)) + +#ifndef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT 128 +#endif + +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Number of bits which any structure or union's size must be a + multiple of. Each structure or union's size is rounded up to a + multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm. */ +#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \ + barrier_align (LABEL_AFTER_BARRIER) + +#define LOOP_ALIGN(A_LABEL) \ + ((! optimize || TARGET_HARVARD || TARGET_SMALLCODE) \ + ? 0 : sh_loop_align (A_LABEL)) + +#define LABEL_ALIGN(A_LABEL) \ +( \ + (PREV_INSN (A_LABEL) \ + && GET_CODE (PREV_INSN (A_LABEL)) == INSN \ + && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE \ + && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == 1) \ + /* explicit alignment insn in constant tables. */ \ + ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0)) \ + : 0) + +/* Jump tables must be 32 bit aligned, no matter the size of the element. */ +#define ADDR_VEC_ALIGN(ADDR_VEC) 2 + +/* The base two logarithm of the known minimum alignment of an insn length. */ +#define INSN_LENGTH_ALIGNMENT(A_INSN) \ + (GET_CODE (A_INSN) == INSN \ + ? 1 \ + : GET_CODE (A_INSN) == JUMP_INSN || GET_CODE (A_INSN) == CALL_INSN \ + ? 1 \ + : CACHE_LOG) + +/* Standard register usage. */ + +/* Register allocation for the Hitachi calling convention: + + r0 arg return + r1..r3 scratch + r4..r7 args in + r8..r13 call saved + r14 frame pointer/call saved + r15 stack pointer + ap arg pointer (doesn't really exist, always eliminated) + pr subroutine return address + t t bit + mach multiply/accumulate result, high part + macl multiply/accumulate result, low part. + fpul fp/int communication register + rap return address pointer register + fr0 fp arg return + fr1..fr3 scratch floating point registers + fr4..fr11 fp args in + fr12..fr15 call saved floating point registers */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. */ + +#define AP_REG 16 +#define PR_REG 17 +#define T_REG 18 +#define GBR_REG 19 +#define MACH_REG 20 +#define MACL_REG 21 +#define SPECIAL_REG(REGNO) ((REGNO) >= 18 && (REGNO) <= 21) +#define FPUL_REG 22 +#define RAP_REG 23 +#define FIRST_FP_REG 24 +#define LAST_FP_REG 39 +#define FIRST_XD_REG 40 +#define LAST_XD_REG 47 +#define FPSCR_REG 48 + +#define FIRST_PSEUDO_REGISTER 49 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + Mach register is fixed 'cause it's only 10 bits wide for SH1. + It is 32 bits wide for SH2. */ + +#define FIXED_REGISTERS \ + { 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 1, \ + 1, 1, 1, 1, \ + 1, 1, 0, 1, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 1, \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 0, 0, 0, 0, \ + 0, 0, 0, 1, \ + 1, 0, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 0, 0, 0, 0, \ + 1, 1, 1, 1, \ + 1, 1, 0, 0, \ + 1, \ +} + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + On the SH all but the XD regs are UNITS_PER_WORD bits wide. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \ + ? (GET_MODE_SIZE (MODE) / (2 * UNITS_PER_WORD)) \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) \ + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. + We can allow any mode in any general register. The special registers + only allow SImode. Don't allow any mode in the PR. */ + +/* We cannot hold DCmode values in the XD registers because alter_reg + handles subregs of them incorrectly. We could work around this by + spacing the XD registers like the DR registers, but this would require + additional memory in every compilation to hold larger register vectors. + We could hold SFmode / SCmode values in XD registers, but that + would require a tertiary reload when reloading from / to memory, + and a secondary reload to reload from / to general regs; that + seems to be a loosing proposition. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (SPECIAL_REG (REGNO) ? (MODE) == SImode \ + : (REGNO) == FPUL_REG ? (MODE) == SImode || (MODE) == SFmode \ + : (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG && (MODE) == SFmode \ + ? 1 \ + : (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG \ + ? ((MODE) == SFmode \ + || (TARGET_SH3E && (MODE) == SCmode) \ + || (((TARGET_SH4 && (MODE) == DFmode) || (MODE) == DCmode) \ + && (((REGNO) - FIRST_FP_REG) & 1) == 0)) \ + : (REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \ + ? (MODE) == DFmode \ + : (REGNO) == PR_REG ? 0 \ + : (REGNO) == FPSCR_REG ? (MODE) == PSImode \ + : 1) + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. */ + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((MODE1) == (MODE2) || GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* Define this if the program counter is overloaded on a register. */ +/* #define PC_REGNUM 15*/ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 15 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 14 + +/* Fake register that holds the address on the stack of the + current function's return address. */ +#define RETURN_ADDRESS_POINTER_REGNUM 23 + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may be accessed + via the stack pointer) in functions that seem suitable. */ + +#define FRAME_POINTER_REQUIRED 0 + +/* Definitions for register eliminations. + + We have three registers that can be eliminated on the SH. First, the + frame pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the argument pointer register can always be + eliminated; it is replaced with either the stack or frame pointer. + Third, there is the return address pointer, which can also be replaced + with either the stack or the frame pointer. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. */ + +/* If you add any registers here that are not actually hard registers, + and that have any alternative of elimination that doesn't always + apply, you need to amend calc_live_regs to exclude it, because + reload spills all eliminable registers where it sees an + can_eliminate == 0 entry, thus making them 'live' . + If you add any hard registers that can be eliminated in different + ways, you have to patch reload to spill them only when all alternatives + of elimination fail. */ + +#define ELIMINABLE_REGS \ +{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { RETURN_ADDRESS_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},} + +/* Given FROM and TO register numbers, say whether this elimination + is allowed. */ +#define CAN_ELIMINATE(FROM, TO) \ + (!((FROM) == FRAME_POINTER_REGNUM && FRAME_POINTER_REQUIRED)) + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + OFFSET = initial_elimination_offset ((FROM), (TO)) + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 16 + +/* Register in which the static-chain is passed to a function. */ +#define STATIC_CHAIN_REGNUM 13 + +/* The register in which a struct value address is passed. */ + +#define STRUCT_VALUE_REGNUM 2 + +/* If the structure value address is not passed in a register, define + `STRUCT_VALUE' as an expression returning an RTX for the place + where the address is passed. If it returns 0, the address is + passed as an "invisible" first argument. */ + +/*#define STRUCT_VALUE ((rtx)0)*/ + +/* Don't default to pcc-struct-return, because we have already specified + exactly how to return structures in the RETURN_IN_MEMORY macro. */ + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +/* The SH has two sorts of general registers, R0 and the rest. R0 can + be used as the destination of some of the arithmetic ops. There are + also some special purpose registers; the T bit register, the + Procedure Return Register and the Multiply Accumulate Registers. */ +/* Place GENERAL_REGS after FPUL_REGS so that it will be preferred by + reg_class_subunion. We don't want to have an actual union class + of these, because it would only be used when both classes are calculated + to give the same cost, but there is only one FPUL register. + Besides, regclass fails to notice the different REGISTER_MOVE_COSTS + applying to the actual instruction alternative considered. E.g., the + y/r alternative of movsi_ie is considered to have no more cost that + the r/r alternative, which is patently untrue. */ + +enum reg_class +{ + NO_REGS, + R0_REGS, + PR_REGS, + T_REGS, + MAC_REGS, + FPUL_REGS, + GENERAL_REGS, + FP0_REGS, + FP_REGS, + DF_REGS, + FPSCR_REGS, + GENERAL_FP_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "R0_REGS", \ + "PR_REGS", \ + "T_REGS", \ + "MAC_REGS", \ + "FPUL_REGS", \ + "GENERAL_REGS", \ + "FP0_REGS", \ + "FP_REGS", \ + "DF_REGS", \ + "FPSCR_REGS", \ + "GENERAL_FP_REGS", \ + "ALL_REGS", \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x00000001, 0x00000000 }, /* R0_REGS */ \ + { 0x00020000, 0x00000000 }, /* PR_REGS */ \ + { 0x00040000, 0x00000000 }, /* T_REGS */ \ + { 0x00300000, 0x00000000 }, /* MAC_REGS */ \ + { 0x00400000, 0x00000000 }, /* FPUL_REGS */ \ + { 0x0081FFFF, 0x00000000 }, /* GENERAL_REGS */ \ + { 0x01000000, 0x00000000 }, /* FP0_REGS */ \ + { 0xFF000000, 0x000000FF }, /* FP_REGS */ \ + { 0xFF000000, 0x0000FFFF }, /* DF_REGS */ \ + { 0x00000000, 0x00010000 }, /* FPSCR_REGS */ \ + { 0xFF81FFFF, 0x0000FFFF }, /* GENERAL_FP_REGS */ \ + { 0xFFFFFFFF, 0x0001FFFF }, /* ALL_REGS */ \ +} + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +extern int regno_reg_class[]; +#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)] + +/* When defined, the compiler allows registers explicitly used in the + rtl to be used as spill registers but prevents the compiler from + extending the lifetime of these registers. */ + +#define SMALL_REGISTER_CLASSES 1 + +/* The order in which register should be allocated. */ +/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo, + and GENERAL_FP_REGS the alternate class. Since FP0 is likely to be + spilled or used otherwise, we better have the FP_REGS allocated first. */ +#define REG_ALLOC_ORDER \ + { 25,26,27,28,29,30,31,24,32,33,34,35,36,37,38,39, \ + 40,41,42,43,44,45,46,47,48, \ + 1,2,3,7,6,5,4,0,8,9,10,11,12,13,14, \ + 22,15,16,17,18,19,20,21,23 } + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS R0_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Get reg_class from a letter such as appears in the machine + description. */ +extern enum reg_class reg_class_from_letter[]; + +#define REG_CLASS_FROM_LETTER(C) \ + ( (C) >= 'a' && (C) <= 'z' ? reg_class_from_letter[(C)-'a'] : NO_REGS ) + +/* The letters I, J, K, L and M in a register constraint string + can be used to stand for particular ranges of immediate operands. + This macro defines what the ranges are. + C is the letter, and VALUE is a constant value. + Return 1 if VALUE is in the range specified by C. + I: arithmetic operand -127..128, as used in add, sub, etc + K: shift operand 1,2,8 or 16 + L: logical operand 0..255, as used in and, or, etc. + M: constant 1 + N: constant 0 */ + +#define CONST_OK_FOR_I(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \ + && ((HOST_WIDE_INT)(VALUE)) <= 127) +#define CONST_OK_FOR_K(VALUE) ((VALUE)==1||(VALUE)==2||(VALUE)==8||(VALUE)==16) +#define CONST_OK_FOR_L(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \ + && ((HOST_WIDE_INT)(VALUE)) <= 255) +#define CONST_OK_FOR_M(VALUE) ((VALUE)==1) +#define CONST_OK_FOR_N(VALUE) ((VALUE)==0) +#define CONST_OK_FOR_LETTER_P(VALUE, C) \ + ((C) == 'I' ? CONST_OK_FOR_I (VALUE) \ + : (C) == 'K' ? CONST_OK_FOR_K (VALUE) \ + : (C) == 'L' ? CONST_OK_FOR_L (VALUE) \ + : (C) == 'M' ? CONST_OK_FOR_M (VALUE) \ + : (C) == 'N' ? CONST_OK_FOR_N (VALUE) \ + : 0) + +/* Similar, but for floating constants, and defining letters G and H. + Here VALUE is the CONST_DOUBLE rtx itself. */ + +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ +((C) == 'G' ? fp_zero_operand (VALUE) \ + : (C) == 'H' ? fp_one_operand (VALUE) \ + : (C) == 'F') + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ + +#define PREFERRED_RELOAD_CLASS(X, CLASS) (CLASS) + +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS,MODE,X) \ + ((((((CLASS) == FP_REGS || (CLASS) == FP0_REGS \ + || (CLASS) == DF_REGS) \ + && (GET_CODE (X) == REG && REGNO (X) <= AP_REG)) \ + || (((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS) \ + && GET_CODE (X) == REG \ + && REGNO (X) >= FIRST_FP_REG && REGNO (X) <= LAST_FP_REG)) \ + && MODE == SFmode) \ + ? FPUL_REGS \ + : ((CLASS) == FPUL_REGS \ + && (GET_CODE (X) == MEM \ + || (GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER)))\ + ? GENERAL_REGS \ + : (((CLASS) == MAC_REGS || (CLASS) == PR_REGS) \ + && GET_CODE (X) == REG && REGNO (X) > 15 \ + && (CLASS) != REGNO_REG_CLASS (REGNO (X))) \ + ? GENERAL_REGS : NO_REGS) + +#define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X) \ + ((((CLASS) == FP_REGS || (CLASS) == FP0_REGS || (CLASS) == DF_REGS) \ + && immediate_operand ((X), (MODE)) \ + && ! ((fp_zero_operand (X) || fp_one_operand (X)) && (MODE) == SFmode))\ + ? R0_REGS \ + : CLASS == FPUL_REGS && immediate_operand ((X), (MODE)) \ + ? (GET_CODE (X) == CONST_INT && CONST_OK_FOR_I (INTVAL (X)) \ + ? GENERAL_REGS \ + : R0_REGS) \ + : (CLASS == FPSCR_REGS \ + && ((GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER) \ + || GET_CODE (X) == MEM && GET_CODE (XEXP ((X), 0)) == PLUS)) \ + ? GENERAL_REGS \ + : SECONDARY_OUTPUT_RELOAD_CLASS((CLASS),(MODE),(X))) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. + + On SH this is the size of MODE in words. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* If defined, gives a class of registers that cannot be used as the + operand of a SUBREG that changes the size of the object. */ + +#define CLASS_CANNOT_CHANGE_SIZE DF_REGS + +/* Stack layout; function entry, exit and calling. */ + +/* Define the number of registers that can hold parameters. + These macros are used only in other macro definitions below. */ + +#define NPARM_REGS(MODE) \ + (TARGET_SH3E && (MODE) == SFmode \ + ? 8 \ + : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \ + ? 8 \ + : 4) + +#define FIRST_PARM_REG 4 +#define FIRST_RET_REG 0 + +#define FIRST_FP_PARM_REG (FIRST_FP_REG + 4) +#define FIRST_FP_RET_REG FIRST_FP_REG + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this macro if the addresses of local variable slots are at + negative offsets from the frame pointer. + + The SH only has positive indexes, so grow the frame up. */ +/* #define FRAME_GROWS_DOWNWARD */ + +/* Offset from the frame pointer to the first local variable slot to + be allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. */ +/* Don't define PUSH_ROUNDING, since the hardware doesn't do this. + When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to + do correct alignment. */ +#if 0 +#define PUSH_ROUNDING(NPUSHED) (((NPUSHED) + 3) & ~3) +#endif + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Value is the number of byte of arguments automatically + popped when returning from a subroutine call. + FUNDECL is the declaration node of the function (as a tree), + FUNTYPE is the data type of the function (as a tree), + or for a library call it is an identifier node for the subroutine name. + SIZE is the number of bytes of arguments passed on the stack. + + On the SH, the caller does not pop any of its arguments that were passed + on the stack. */ +#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) 0 + +/* Nonzero if we do not know how to pass TYPE solely in registers. + Values that come in registers with inconvenient padding are stored + to memory at the function start. */ + +#define MUST_PASS_IN_STACK(MODE,TYPE) \ + ((TYPE) != 0 \ + && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST \ + || TREE_ADDRESSABLE (TYPE))) +/* Some subroutine macros specific to this machine. */ + +#define BASE_RETURN_VALUE_REG(MODE) \ + ((TARGET_SH3E && ((MODE) == SFmode)) \ + ? FIRST_FP_RET_REG \ + : TARGET_SH3E && (MODE) == SCmode \ + ? FIRST_FP_RET_REG \ + : (TARGET_SH4 \ + && ((MODE) == DFmode || (MODE) == SFmode \ + || (MODE) == DCmode || (MODE) == SCmode )) \ + ? FIRST_FP_RET_REG \ + : FIRST_RET_REG) + +#define BASE_ARG_REG(MODE) \ + ((TARGET_SH3E && ((MODE) == SFmode)) \ + ? FIRST_FP_PARM_REG \ + : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\ + ? FIRST_FP_PARM_REG \ + : FIRST_PARM_REG) + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. + For the SH, this is like LIBCALL_VALUE, except that we must change the + mode like PROMOTE_MODE does. + ??? PROMOTE_MODE is ignored for non-scalar types. The set of types + tested here has to be kept in sync with the one in explow.c:promote_mode. */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + gen_rtx (REG, \ + ((GET_MODE_CLASS (TYPE_MODE (VALTYPE)) == MODE_INT \ + && GET_MODE_SIZE (TYPE_MODE (VALTYPE)) < UNITS_PER_WORD \ + && (TREE_CODE (VALTYPE) == INTEGER_TYPE \ + || TREE_CODE (VALTYPE) == ENUMERAL_TYPE \ + || TREE_CODE (VALTYPE) == BOOLEAN_TYPE \ + || TREE_CODE (VALTYPE) == CHAR_TYPE \ + || TREE_CODE (VALTYPE) == REAL_TYPE \ + || TREE_CODE (VALTYPE) == OFFSET_TYPE)) \ + ? SImode : TYPE_MODE (VALTYPE)), \ + BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE))) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +#define LIBCALL_VALUE(MODE) \ + gen_rtx (REG, (MODE), BASE_RETURN_VALUE_REG (MODE)) + +/* 1 if N is a possible register number for a function value. */ +#define FUNCTION_VALUE_REGNO_P(REGNO) \ + ((REGNO) == FIRST_RET_REG || (TARGET_SH3E && (REGNO) == FIRST_FP_RET_REG)) + +/* 1 if N is a possible register number for function argument passing. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + (((REGNO) >= FIRST_PARM_REG && (REGNO) < (FIRST_PARM_REG + 4)) \ + || (TARGET_SH3E \ + && (REGNO) >= FIRST_FP_PARM_REG && (REGNO) < (FIRST_FP_PARM_REG + 8))) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On SH, this is a single integer, which is a number of words + of arguments scanned so far (including the invisible argument, + if any, which holds the structure-value-address). + Thus NARGREGS or more means all following args should go on the stack. */ + +enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 }; +struct sh_args { + int arg_count[2]; +}; + +#define CUMULATIVE_ARGS struct sh_args + +#define GET_SH_ARG_CLASS(MODE) \ + ((TARGET_SH3E && (MODE) == SFmode) \ + ? SH_ARG_FLOAT \ + : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \ + || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \ + ? SH_ARG_FLOAT : SH_ARG_INT) + +#define ROUND_ADVANCE(SIZE) \ + (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Round a register number up to a proper boundary for an arg of mode + MODE. + + The SH doesn't care about double alignment, so we only + round doubles to even regs when asked to explicitly. */ + +#define ROUND_REG(CUM, MODE) \ + (((TARGET_ALIGN_DOUBLE \ + || (TARGET_SH4 && ((MODE) == DFmode || (MODE) == DCmode) \ + && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))\ + && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD) \ + ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \ + + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1)) \ + : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]) + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. + + On SH, the offset always starts at 0: the first parm reg is always + the same reg for a given argument class. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT) \ + do { \ + (CUM).arg_count[(int) SH_ARG_INT] = 0; \ + (CUM).arg_count[(int) SH_ARG_FLOAT] = 0; \ + } while (0) + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be + available.) */ + +#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ + if (! TARGET_SH4 || PASS_IN_REG_P ((CUM), (MODE), (TYPE))) \ + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \ + = (ROUND_REG ((CUM), (MODE)) \ + + ((MODE) == BLKmode \ + ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \ + : ROUND_ADVANCE (GET_MODE_SIZE (MODE))))) + +/* Return boolean indicating arg of mode MODE will be passed in a reg. + This macro is only used in this file. */ + +#define PASS_IN_REG_P(CUM, MODE, TYPE) \ + (((TYPE) == 0 || ! TREE_ADDRESSABLE ((tree)(TYPE))) \ + && (TARGET_SH3E \ + ? ((MODE) == BLKmode \ + ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \ + + int_size_in_bytes (TYPE)) \ + <= NPARM_REGS (SImode) * UNITS_PER_WORD) \ + : ((ROUND_REG((CUM), (MODE)) \ + + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \ + <= NPARM_REGS (MODE))) \ + : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE))) + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On SH the first args are normally in registers + and the rest are pushed. Any arg that starts within the first + NPARM_REGS words is at least partially passed in a register unless + its data type forbids. */ + +extern int current_function_varargs; + +#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \ + ((PASS_IN_REG_P ((CUM), (MODE), (TYPE)) \ + && ((NAMED) || TARGET_SH3E || ! current_function_varargs)) \ + ? gen_rtx (REG, (MODE), \ + ((BASE_ARG_REG (MODE) + ROUND_REG ((CUM), (MODE))) \ + ^ ((MODE) == SFmode && TARGET_SH4 \ + && TARGET_LITTLE_ENDIAN != 0))) \ + : 0) + +/* For an arg passed partly in registers and partly in memory, + this is the number of registers used. + For args passed entirely in registers or entirely in memory, zero. + + We sometimes split args. */ + +#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \ + ((PASS_IN_REG_P ((CUM), (MODE), (TYPE)) \ + && ! TARGET_SH4 \ + && (ROUND_REG ((CUM), (MODE)) \ + + ((MODE) != BLKmode \ + ? ROUND_ADVANCE (GET_MODE_SIZE (MODE)) \ + : ROUND_ADVANCE (int_size_in_bytes (TYPE))) \ + - NPARM_REGS (MODE) > 0)) \ + ? NPARM_REGS (MODE) - ROUND_REG ((CUM), (MODE)) \ + : 0) + +extern int current_function_anonymous_args; + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. */ + +#define SETUP_INCOMING_VARARGS(ASF, MODE, TYPE, PAS, ST) \ + current_function_anonymous_args = 1; + +/* Call the function profiler with a given profile label. + We use two .aligns, so as to make sure that both the .long is aligned + on a 4 byte boundary, and that the .long is a fixed distance (2 bytes) + from the trapa instruction. */ + +#define FUNCTION_PROFILER(STREAM,LABELNO) \ +{ \ + fprintf((STREAM), "\t.align\t2\n"); \ + fprintf((STREAM), "\ttrapa\t#33\n"); \ + fprintf((STREAM), "\t.align\t2\n"); \ + asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO)); \ +} + +/* Define this macro if the code for function profiling should come + before the function prologue. Normally, the profiling code comes + after. */ + +#define PROFILE_BEFORE_PROLOGUE + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK 1 + +/* Generate the assembly code for function exit + Just dump out any accumulated constant table. */ + +#define FUNCTION_EPILOGUE(STREAM, SIZE) function_epilogue ((STREAM), (SIZE)) + +/* + On the SH, the trampoline looks like + 2 0002 DD02 mov.l l2,r13 + 1 0000 D301 mov.l l1,r3 + 3 0004 4D2B jmp @r13 + 4 0006 0009 nop + 5 0008 00000000 l1: .long function + 6 000c 00000000 l2: .long area */ + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE 16 + +/* Alignment required for a trampoline in bits . */ +#define TRAMPOLINE_ALIGNMENT \ + ((CACHE_LOG < 3 || TARGET_SMALLCODE && ! TARGET_HARVARD) ? 32 : 64) + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ + +#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \ +{ \ + emit_move_insn (gen_rtx (MEM, SImode, (TRAMP)), \ + GEN_INT (TARGET_LITTLE_ENDIAN ? 0xd301dd02 : 0xdd02d301));\ + emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 4)), \ + GEN_INT (TARGET_LITTLE_ENDIAN ? 0x00094d2b : 0x4d2b0009));\ + emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 8)), \ + (CXT)); \ + emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 12)), \ + (FNADDR)); \ + if (TARGET_HARVARD) \ + emit_insn (gen_ic_invalidate_line (TRAMP)); \ +} + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. + FRAMEADDR is already the frame pointer of the COUNT frame, so we + can ignore COUNT. */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + (((COUNT) == 0) \ + ? gen_rtx (MEM, Pmode, gen_rtx (REG, Pmode, RETURN_ADDRESS_POINTER_REGNUM)) \ + : (rtx) 0) + +/* Generate necessary RTL for __builtin_saveregs(). + ARGLIST is the argument list; see expr.c. */ +extern struct rtx_def *sh_builtin_saveregs (); +#define EXPAND_BUILTIN_SAVEREGS(ARGLIST) sh_builtin_saveregs (ARGLIST) + +/* Addressing modes, and classification of registers for them. */ +#define HAVE_POST_INCREMENT 1 +/*#define HAVE_PRE_INCREMENT 1*/ +/*#define HAVE_POST_DECREMENT 1*/ +#define HAVE_PRE_DECREMENT 1 + +#define USE_LOAD_POST_INCREMENT(mode) ((mode == SImode || mode == DImode) \ + ? 0 : 1) +#define USE_LOAD_PRE_DECREMENT(mode) 0 +#define USE_STORE_POST_INCREMENT(mode) 0 +#define USE_STORE_PRE_DECREMENT(mode) ((mode == SImode || mode == DImode) \ + ? 0 : 1) + +#define MOVE_BY_PIECES_P(SIZE, ALIGN) (move_by_pieces_ninsns (SIZE, ALIGN) \ + < (TARGET_SMALLCODE ? 2 : \ + ((ALIGN >= 4) ? 16 : 2))) + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ + +#define REGNO_OK_FOR_BASE_P(REGNO) \ + ((REGNO) < PR_REG || (unsigned) reg_renumber[(REGNO)] < PR_REG) +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + ((REGNO) == 0 || (unsigned) reg_renumber[(REGNO)] == 0) + +/* Maximum number of registers that can appear in a valid memory + address. */ + +#define MAX_REGS_PER_ADDRESS 2 + +/* Recognize any constant value that is a valid address. */ + +#define CONSTANT_ADDRESS_P(X) (GET_CODE (X) == LABEL_REF) + +/* Nonzero if the constant value X is a legitimate general operand. */ + +#define LEGITIMATE_CONSTANT_P(X) \ + (GET_CODE (X) != CONST_DOUBLE \ + || GET_MODE (X) == DFmode || GET_MODE (X) == SFmode \ + || (TARGET_SH3E && (fp_zero_operand (X) || fp_one_operand (X)))) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. */ + +#ifndef REG_OK_STRICT + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define REG_OK_FOR_BASE_P(X) \ + (REGNO (X) <= 16 || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) \ + (REGNO (X) == 0 || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* Nonzero if X/OFFSET is a hard reg that can be used as an index + or if X is a pseudo reg. */ +#define SUBREG_OK_FOR_INDEX_P(X, OFFSET) \ + ((REGNO (X) == 0 && OFFSET == 0) || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +#else + +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P(X) \ + REGNO_OK_FOR_BASE_P (REGNO (X)) + +/* Nonzero if X is a hard reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P(X) \ + REGNO_OK_FOR_INDEX_P (REGNO (X)) + +/* Nonzero if X/OFFSET is a hard reg that can be used as an index. */ +#define SUBREG_OK_FOR_INDEX_P(X, OFFSET) \ + (REGNO_OK_FOR_INDEX_P (REGNO (X)) && (OFFSET) == 0) + +#endif + +/* The 'Q' constraint is a pc relative load operand. */ +#define EXTRA_CONSTRAINT_Q(OP) \ + (GET_CODE (OP) == MEM && \ + ((GET_CODE (XEXP ((OP), 0)) == LABEL_REF) \ + || (GET_CODE (XEXP ((OP), 0)) == CONST \ + && GET_CODE (XEXP (XEXP ((OP), 0), 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (XEXP ((OP), 0), 0), 0)) == LABEL_REF \ + && GET_CODE (XEXP (XEXP (XEXP ((OP), 0), 0), 1)) == CONST_INT))) + +#define EXTRA_CONSTRAINT(OP, C) \ + ((C) == 'Q' ? EXTRA_CONSTRAINT_Q (OP) \ + : 0) + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression + that is a valid memory address for an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. + + The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS. */ + +#define MODE_DISP_OK_4(X,MODE) \ +(GET_MODE_SIZE (MODE) == 4 && (unsigned) INTVAL (X) < 64 \ + && ! (INTVAL (X) & 3) && ! (TARGET_SH3E && (MODE) == SFmode)) + +#define MODE_DISP_OK_8(X,MODE) \ +((GET_MODE_SIZE(MODE)==8) && ((unsigned)INTVAL(X)<60) \ + && ! (INTVAL(X) & 3) && ! (TARGET_SH4 && (MODE) == DFmode)) + +#define BASE_REGISTER_RTX_P(X) \ + ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + || (GET_CODE (X) == SUBREG \ + && GET_CODE (SUBREG_REG (X)) == REG \ + && REG_OK_FOR_BASE_P (SUBREG_REG (X)))) + +/* Since this must be r0, which is a single register class, we must check + SUBREGs more carefully, to be sure that we don't accept one that extends + outside the class. */ +#define INDEX_REGISTER_RTX_P(X) \ + ((GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X)) \ + || (GET_CODE (X) == SUBREG \ + && GET_CODE (SUBREG_REG (X)) == REG \ + && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_WORD (X)))) + +/* Jump to LABEL if X is a valid address RTX. This must also take + REG_OK_STRICT into account when deciding about valid registers, but it uses + the above macros so we are in luck. + + Allow REG + REG+disp + REG+r0 + REG++ + --REG */ + +/* ??? The SH3e does not have the REG+disp addressing mode when loading values + into the FRx registers. We implement this by setting the maximum offset + to zero when the value is SFmode. This also restricts loading of SFmode + values into the integer registers, but that can't be helped. */ + +/* The SH allows a displacement in a QI or HI amode, but only when the + other operand is R0. GCC doesn't handle this very well, so we forgo + all of that. + + A legitimate index for a QI or HI is 0, SI can be any number 0..63, + DI can be any number 0..60. */ + +#define GO_IF_LEGITIMATE_INDEX(MODE, OP, LABEL) \ + do { \ + if (GET_CODE (OP) == CONST_INT) \ + { \ + if (MODE_DISP_OK_4 ((OP), (MODE))) goto LABEL; \ + if (MODE_DISP_OK_8 ((OP), (MODE))) goto LABEL; \ + } \ + } while(0) + +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ +{ \ + if (BASE_REGISTER_RTX_P (X)) \ + goto LABEL; \ + else if ((GET_CODE (X) == POST_INC || GET_CODE (X) == PRE_DEC) \ + && BASE_REGISTER_RTX_P (XEXP ((X), 0))) \ + goto LABEL; \ + else if (GET_CODE (X) == PLUS \ + && ((MODE) != PSImode || reload_completed)) \ + { \ + rtx xop0 = XEXP ((X), 0); \ + rtx xop1 = XEXP ((X), 1); \ + if (GET_MODE_SIZE (MODE) <= 8 && BASE_REGISTER_RTX_P (xop0)) \ + GO_IF_LEGITIMATE_INDEX ((MODE), xop1, LABEL); \ + if (GET_MODE_SIZE (MODE) <= 4 \ + || TARGET_SH4 && TARGET_FMOVD && MODE == DFmode) \ + { \ + if (BASE_REGISTER_RTX_P (xop1) && INDEX_REGISTER_RTX_P (xop0))\ + goto LABEL; \ + if (INDEX_REGISTER_RTX_P (xop1) && BASE_REGISTER_RTX_P (xop0))\ + goto LABEL; \ + } \ + } \ +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + MODE and WIN are passed so that this macro can use + GO_IF_LEGITIMATE_ADDRESS. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the SH, if X is almost suitable for indexing, but the offset is + out of range, convert it into a normal form so that cse has a chance + of reducing the number of address registers used. */ + +#define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN) \ +{ \ + if (GET_CODE (X) == PLUS \ + && (GET_MODE_SIZE (MODE) == 4 \ + || GET_MODE_SIZE (MODE) == 8) \ + && GET_CODE (XEXP ((X), 1)) == CONST_INT \ + && BASE_REGISTER_RTX_P (XEXP ((X), 0)) \ + && ! (TARGET_SH4 && (MODE) == DFmode) \ + && ! (TARGET_SH3E && (MODE) == SFmode)) \ + { \ + rtx index_rtx = XEXP ((X), 1); \ + HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; \ + rtx sum; \ + \ + GO_IF_LEGITIMATE_INDEX ((MODE), index_rtx, WIN); \ + /* On rare occasions, we might get an unaligned pointer \ + that is indexed in a way to give an aligned address. \ + Therefore, keep the lower two bits in offset_base. */ \ + /* Instead of offset_base 128..131 use 124..127, so that \ + simple add suffices. */ \ + if (offset > 127) \ + { \ + offset_base = ((offset + 4) & ~60) - 4; \ + } \ + else \ + offset_base = offset & ~60; \ + /* Sometimes the normal form does not suit DImode. We \ + could avoid that by using smaller ranges, but that \ + would give less optimized code when SImode is \ + prevalent. */ \ + if (GET_MODE_SIZE (MODE) + offset - offset_base <= 64) \ + { \ + sum = expand_binop (Pmode, add_optab, XEXP ((X), 0), \ + GEN_INT (offset_base), NULL_RTX, 0, \ + OPTAB_LIB_WIDEN); \ + \ + (X) = gen_rtx (PLUS, Pmode, sum, GEN_INT (offset - offset_base)); \ + goto WIN; \ + } \ + } \ +} + +/* A C compound statement that attempts to replace X, which is an address + that needs reloading, with a valid memory address for an operand of + mode MODE. WIN is a C statement label elsewhere in the code. + + Like for LEGITIMIZE_ADDRESS, for the SH we try to get a normal form + of the address. That will allow inheritance of the address reloads. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \ +{ \ + if (GET_CODE (X) == PLUS \ + && (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8) \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && BASE_REGISTER_RTX_P (XEXP (X, 0)) \ + && ! (TARGET_SH4 && (MODE) == DFmode) \ + && ! ((MODE) == PSImode && (TYPE) == RELOAD_FOR_INPUT_ADDRESS)) \ + { \ + rtx index_rtx = XEXP (X, 1); \ + HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; \ + rtx sum; \ + \ + if (TARGET_SH3E && MODE == SFmode) \ + { \ + X = copy_rtx (X); \ + push_reload (index_rtx, NULL_RTX, &XEXP (X, 1), NULL_PTR, \ + INDEX_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), \ + (TYPE)); \ + goto WIN; \ + } \ + /* Instead of offset_base 128..131 use 124..127, so that \ + simple add suffices. */ \ + if (offset > 127) \ + { \ + offset_base = ((offset + 4) & ~60) - 4; \ + } \ + else \ + offset_base = offset & ~60; \ + /* Sometimes the normal form does not suit DImode. We \ + could avoid that by using smaller ranges, but that \ + would give less optimized code when SImode is \ + prevalent. */ \ + if (GET_MODE_SIZE (MODE) + offset - offset_base <= 64) \ + { \ + sum = gen_rtx (PLUS, Pmode, XEXP (X, 0), \ + GEN_INT (offset_base)); \ + X = gen_rtx (PLUS, Pmode, sum, GEN_INT (offset - offset_base));\ + push_reload (sum, NULL_RTX, &XEXP (X, 0), NULL_PTR, \ + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), \ + (TYPE)); \ + goto WIN; \ + } \ + } \ + /* We must re-recognize what we created before. */ \ + else if (GET_CODE (X) == PLUS \ + && (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8) \ + && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \ + && BASE_REGISTER_RTX_P (XEXP (XEXP (X, 0), 0)) \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && ! (TARGET_SH3E && MODE == SFmode)) \ + { \ + /* Because this address is so complex, we know it must have \ + been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, \ + it is already unshared, and needs no further unsharing. */ \ + push_reload (XEXP ((X), 0), NULL_RTX, &XEXP ((X), 0), NULL_PTR, \ + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), (TYPE));\ + goto WIN; \ + } \ +} + +/* Go to LABEL if ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. + + ??? Strictly speaking, we should also include all indexed addressing, + because the index scale factor is the length of the operand. + However, the impact of GO_IF_MODE_DEPENDENT_ADDRESS would be to + high if we did that. So we rely on reload to fix things up. */ + +#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL) \ +{ \ + if (GET_CODE(ADDR) == PRE_DEC || GET_CODE(ADDR) == POST_INC) \ + goto LABEL; \ +} + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE (TARGET_BIGTABLE ? SImode : HImode) + +#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \ +((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \ + : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \ + : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \ + : SImode) + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + Do not define this if the table should contain absolute addresses. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Specify the tree operation to be used to convert reals to integers. */ +#define IMPLICIT_FIX_EXPR FIX_ROUND_EXPR + +/* This is the kind of divide that is easiest to do in the general case. */ +#define EASY_DIV_EXPR TRUNC_DIV_EXPR + +/* Since the SH3e has only `float' support, it is desirable to make all + floating point types equivalent to `float'. */ +#define DOUBLE_TYPE_SIZE ((TARGET_SH3E && ! TARGET_SH4) ? 32 : 64) + +/* 'char' is signed by default. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* The type of size_t unsigned int. */ +#define SIZE_TYPE "unsigned int" + +#define WCHAR_TYPE "short unsigned int" +#define WCHAR_TYPE_SIZE 16 + +/* Don't cse the address of the function being compiled. */ +/*#define NO_RECURSIVE_FUNCTION_CSE 1*/ + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 + +/* Max number of bytes we want move_by_pieces to be able to copy + efficiently. */ +#define MOVE_MAX_PIECES (TARGET_SH4 ? 8 : 4) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, NIL if none. */ +#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND + +/* Define this if zero-extension is slow (more than one real instruction). + On the SH, it's only one instruction. */ +/* #define SLOW_ZERO_EXTEND */ + +/* Nonzero if access to memory by bytes is slow and undesirable. */ +#define SLOW_BYTE_ACCESS 0 + +/* We assume that the store-condition-codes instructions store 0 for false + and some other value for true. This is the value stored for true. */ + +#define STORE_FLAG_VALUE 1 + +/* Immediate shift counts are truncated by the output routines (or was it + the assembler?). Shift counts in a register are truncated by SH. Note + that the native compiler puts too large (> 32) immediate shift counts + into a register and shifts by the register, letting the SH decide what + to do instead of doing that itself. */ +/* ??? The library routines in lib1funcs.asm truncate the shift count. + However, the SH3 has hardware shifts that do not truncate exactly as gcc + expects - the sign bit is significant - so it appears that we need to + leave this zero for correct SH3 code. */ +#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3) + +/* All integers have the same format so truncation is easy. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) 1 + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. */ +/*#define NO_FUNCTION_CSE 1*/ + +/* Chars and shorts should be passed as ints. */ +#define PROMOTE_PROTOTYPES 1 + +/* The machine modes of pointers and functions. */ +#define Pmode SImode +#define FUNCTION_MODE Pmode + +/* The relative costs of various types of constants. Note that cse.c defines + REG = 1, SUBREG = 2, any node = (2 + sum of subnodes). */ + +#define CONST_COSTS(RTX, CODE, OUTER_CODE) \ + case CONST_INT: \ + if (INTVAL (RTX) == 0) \ + return 0; \ + else if (CONST_OK_FOR_I (INTVAL (RTX))) \ + return 1; \ + else if (((OUTER_CODE) == AND || (OUTER_CODE) == IOR || (OUTER_CODE) == XOR) \ + && CONST_OK_FOR_L (INTVAL (RTX))) \ + return 1; \ + else \ + return 8; \ + case CONST: \ + case LABEL_REF: \ + case SYMBOL_REF: \ + return 5; \ + case CONST_DOUBLE: \ + return 10; + +#define RTX_COSTS(X, CODE, OUTER_CODE) \ + case PLUS: \ + return (COSTS_N_INSNS (1) \ + + rtx_cost (XEXP ((X), 0), PLUS) \ + + (rtx_equal_p (XEXP ((X), 0), XEXP ((X), 1))\ + ? 0 : rtx_cost (XEXP ((X), 1), PLUS)));\ + case AND: \ + return COSTS_N_INSNS (andcosts (X)); \ + case MULT: \ + return COSTS_N_INSNS (multcosts (X)); \ + case ASHIFT: \ + case ASHIFTRT: \ + case LSHIFTRT: \ + /* Add one extra unit for the matching constraint. \ + Otherwise loop strength reduction would think that\ + a shift with different sourc and destination is \ + as cheap as adding a constant to a register. */ \ + return (COSTS_N_INSNS (shiftcosts (X)) \ + + rtx_cost (XEXP ((X), 0), (CODE)) \ + + 1); \ + case DIV: \ + case UDIV: \ + case MOD: \ + case UMOD: \ + return COSTS_N_INSNS (20); \ + case FLOAT: \ + case FIX: \ + return 100; + +/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2 + are actually function calls with some special constraints on arguments + and register usage. + + These macros tell reorg that the references to arguments and + register clobbers for insns of type sfunc do not appear to happen + until after the millicode call. This allows reorg to put insns + which set the argument registers into the delay slot of the millicode + call -- thus they act more like traditional CALL_INSNs. + + get_attr_is_sfunc will try to recognize the given insn, so make sure to + filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns + in particular. */ + +#define INSN_SETS_ARE_DELAYED(X) \ + ((GET_CODE (X) == INSN \ + && GET_CODE (PATTERN (X)) != SEQUENCE \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && get_attr_is_sfunc (X))) + +#define INSN_REFERENCES_ARE_DELAYED(X) \ + ((GET_CODE (X) == INSN \ + && GET_CODE (PATTERN (X)) != SEQUENCE \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && get_attr_is_sfunc (X))) + +/* Compute the cost of an address. For the SH, all valid addresses are + the same cost. */ +/* ??? Perhaps we should make reg+reg addresses have higher cost because + they add to register pressure on r0. */ + +#define ADDRESS_COST(RTX) 1 + +/* Compute extra cost of moving data between one register class + and another. */ + +/* Regclass always uses 2 for moves in the same register class; + If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, + it uses this information. Hence, the general register <-> floating point + register information here is not used for SFmode. */ +#define REGISTER_MOVE_COST(SRCCLASS, DSTCLASS) \ + ((((DSTCLASS) == T_REGS) || ((DSTCLASS) == PR_REG)) ? 10 \ + : ((((DSTCLASS) == FP0_REGS || (DSTCLASS) == FP_REGS || (DSTCLASS) == DF_REGS) \ + && ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS)) \ + || (((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS) \ + && ((SRCCLASS) == FP0_REGS || (SRCCLASS) == FP_REGS \ + || (SRCCLASS) == DF_REGS))) \ + ? TARGET_FMOVD ? 8 : 12 \ + : (((DSTCLASS) == FPUL_REGS \ + && ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS)) \ + || (SRCCLASS == FPUL_REGS \ + && ((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS))) \ + ? 5 \ + : (((DSTCLASS) == FPUL_REGS \ + && ((SRCCLASS) == PR_REGS || (SRCCLASS) == MAC_REGS)) \ + || ((SRCCLASS) == FPUL_REGS \ + && ((DSTCLASS) == PR_REGS || (DSTCLASS) == MAC_REGS))) \ + ? 7 \ + : 2) + +/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option? This + would be so that people would slow memory systems could generate + different code that does fewer memory accesses. */ + +/* Assembler output control. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at + the end of the line. */ +#define ASM_COMMENT_START "!" + +/* The text to go at the start of the assembler file. */ +#define ASM_FILE_START(STREAM) \ + output_file_start (STREAM) + +#define ASM_FILE_END(STREAM) + +#define ASM_APP_ON "" +#define ASM_APP_OFF "" +#define FILE_ASM_OP "\t.file\n" +#define IDENT_ASM_OP "\t.ident\n" +#define SET_ASM_OP ".set" + +/* How to change between sections. */ + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define CTORS_SECTION_ASM_OP "\t.section\t.ctors\n" +#define DTORS_SECTION_ASM_OP "\t.section\t.dtors\n" +#define EXTRA_SECTIONS in_ctors, in_dtors +#define EXTRA_SECTION_FUNCTIONS \ +void \ +ctors_section() \ +{ \ + if (in_section != in_ctors) \ + { \ + fprintf (asm_out_file, "%s\n", CTORS_SECTION_ASM_OP); \ + in_section = in_ctors; \ + } \ +} \ +void \ +dtors_section() \ +{ \ + if (in_section != in_dtors) \ + { \ + fprintf (asm_out_file, "%s\n", DTORS_SECTION_ASM_OP); \ + in_section = in_dtors; \ + } \ +} + +/* If defined, a C expression whose value is a string containing the + assembler operation to identify the following data as + uninitialized global data. If not defined, and neither + `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined, + uninitialized global data will be output in the data section if + `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be + used. */ +#ifndef BSS_SECTION_ASM_OP +#define BSS_SECTION_ASM_OP ".section\t.bss" +#endif + +/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a + separate, explicit argument. If you define this macro, it is used + in place of `ASM_OUTPUT_BSS', and gives you more flexibility in + handling the required alignment of the variable. The alignment is + specified as the number of bits. + + Try to use function `asm_output_aligned_bss' defined in file + `varasm.c' when defining this macro. */ +#ifndef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) +#endif + +/* Define this so that jump tables go in same section as the current function, + which could be text or it could be a user defined section. */ +#define JUMP_TABLES_IN_TEXT_SECTION 1 + +/* A C statement to output something to the assembler file to switch to section + NAME for object DECL which is either a FUNCTION_DECL, a VAR_DECL or + NULL_TREE. Some target formats do not support arbitrary sections. Do not + define this macro in such cases. */ + +#define ASM_OUTPUT_SECTION_NAME(FILE, DECL, NAME, RELOC) \ + do { fprintf (FILE, ".section\t%s\n", NAME); } while (0) + +#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME) \ + do { ctors_section(); asm_fprintf((FILE),"\t.long\t%U%s\n", (NAME)); } while (0) + +#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME) \ + do { dtors_section(); asm_fprintf((FILE),"\t.long\t%U%s\n", (NAME)); } while (0) + +#undef DO_GLOBAL_CTORS_BODY + +#define DO_GLOBAL_CTORS_BODY \ +{ \ + typedef (*pfunc)(); \ + extern pfunc __ctors[]; \ + extern pfunc __ctors_end[]; \ + pfunc *p; \ + for (p = __ctors_end; p > __ctors; ) \ + { \ + (*--p)(); \ + } \ +} + +#undef DO_GLOBAL_DTORS_BODY +#define DO_GLOBAL_DTORS_BODY \ +{ \ + typedef (*pfunc)(); \ + extern pfunc __dtors[]; \ + extern pfunc __dtors_end[]; \ + pfunc *p; \ + for (p = __dtors; p < __dtors_end; p++) \ + { \ + (*p)(); \ + } \ +} + +#define ASM_OUTPUT_REG_PUSH(file, v) \ + fprintf ((file), "\tmov.l\tr%s,-@r15\n", (v)); + +#define ASM_OUTPUT_REG_POP(file, v) \ + fprintf ((file), "\tmov.l\t@r15+,r%s\n", (v)); + +/* The assembler's names for the registers. RFP need not always be used as + the Real framepointer; it can also be used as a normal general register. + Note that the name `fp' is horribly misleading since `fp' is in fact only + the argument-and-return-context pointer. */ + +extern char fp_reg_names[][5]; + +#define REGISTER_NAMES \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "ap", "pr", "t", "gbr", "mach","macl", fp_reg_names[16], "rap", \ + fp_reg_names[0], fp_reg_names[1] , fp_reg_names[2], fp_reg_names[3], \ + fp_reg_names[4], fp_reg_names[5], fp_reg_names[6], fp_reg_names[7], \ + fp_reg_names[8], fp_reg_names[9], fp_reg_names[10], fp_reg_names[11], \ + fp_reg_names[12], fp_reg_names[13], fp_reg_names[14], fp_reg_names[15], \ + fp_reg_names[17], fp_reg_names[18], fp_reg_names[19], fp_reg_names[20], \ + fp_reg_names[21], fp_reg_names[22], fp_reg_names[23], fp_reg_names[24], \ + "fpscr", \ +} + +#define DEBUG_REGISTER_NAMES \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "ap", "pr", "t", "gbr", "mach","macl", "fpul","rap", \ + "fr0","fr1","fr2", "fr3", "fr4", "fr5", "fr6", "fr7", \ + "fr8","fr9","fr10","fr11","fr12","fr13","fr14","fr15",\ + "xd0","xd2","xd4", "xd6", "xd8", "xd10","xd12","xd14", \ + "fpscr", \ +} + +/* DBX register number for a given compiler register number. */ +/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers + to match gdb. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + (((REGNO) >= 22 && (REGNO) <= 39) ? ((REGNO) + 1) : (REGNO)) + +/* Output a label definition. */ +#define ASM_OUTPUT_LABEL(FILE,NAME) \ + do { assemble_name ((FILE), (NAME)); fputs (":\n", (FILE)); } while (0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf ((FILE), "\t.align %d\n", (LOG)) + +/* Output a function label definition. */ +#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \ + ASM_OUTPUT_LABEL((STREAM), (NAME)) + +/* Output a globalising directive for a label. */ +#define ASM_GLOBALIZE_LABEL(STREAM,NAME) \ + (fprintf ((STREAM), "\t.global\t"), \ + assemble_name ((STREAM), (NAME)), \ + fputc ('\n', (STREAM))) + +/* The prefix to add to user-visible assembler symbols. */ + +#define USER_LABEL_PREFIX "_" + +/* The prefix to add to an internally generated label. */ + +#define LOCAL_LABEL_PREFIX "" + +/* Make an internal label into a string. */ +#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \ + sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM)) + +/* Output an internal label definition. */ +#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \ + asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM)) + +/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE) */ + +/* Construct a private name. */ +#define ASM_FORMAT_PRIVATE_NAME(OUTVAR,NAME,NUMBER) \ + ((OUTVAR) = (char *) alloca (strlen (NAME) + 10), \ + sprintf ((OUTVAR), "%s.%d", (NAME), (NUMBER))) + +/* Output a relative address table. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL) \ + switch (GET_MODE (BODY)) \ + { \ + case SImode: \ + asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL)); \ + break; \ + case HImode: \ + asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL)); \ + break; \ + case QImode: \ + asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL)); \ + break; \ + } + +/* Output an absolute table element. */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE) \ + if (TARGET_BIGTABLE) \ + asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); \ + else \ + asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE)); \ + +/* Output various types of constants. */ + +/* This is how to output an assembler line defining a `double'. */ + +#define ASM_OUTPUT_DOUBLE(FILE,VALUE) \ +do { char dstr[30]; \ + REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", dstr); \ + fprintf ((FILE), "\t.double %s\n", dstr); \ + } while (0) + +/* This is how to output an assembler line defining a `float' constant. */ +#define ASM_OUTPUT_FLOAT(FILE,VALUE) \ +do { char dstr[30]; \ + REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", dstr); \ + fprintf ((FILE), "\t.float %s\n", dstr); \ + } while (0) + +#define ASM_OUTPUT_INT(STREAM, EXP) \ + (fprintf ((STREAM), "\t.long\t"), \ + output_addr_const ((STREAM), (EXP)), \ + fputc ('\n', (STREAM))) + +#define ASM_OUTPUT_SHORT(STREAM, EXP) \ + (fprintf ((STREAM), "\t.short\t"), \ + output_addr_const ((STREAM), (EXP)), \ + fputc ('\n', (STREAM))) + +#define ASM_OUTPUT_CHAR(STREAM, EXP) \ + (fprintf ((STREAM), "\t.byte\t"), \ + output_addr_const ((STREAM), (EXP)), \ + fputc ('\n', (STREAM))) + +#define ASM_OUTPUT_BYTE(STREAM, VALUE) \ + fprintf ((STREAM), "\t.byte\t%d\n", (VALUE)) \ + +/* The next two are used for debug info when compiling with -gdwarf. */ +#define UNALIGNED_SHORT_ASM_OP ".uaword" +#define UNALIGNED_INT_ASM_OP ".ualong" + +/* Loop alignment is now done in machine_dependent_reorg, so that + branch shortening can know about it. */ + +/* This is how to output an assembler line + that says to advance the location counter by SIZE bytes. */ + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf ((FILE), "\t.space %d\n", (SIZE)) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs ("\t.comm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%d\n", (SIZE))) + +/* This says how to output an assembler line + to define a local common symbol. */ + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +( fputs ("\t.lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ",%d\n", (SIZE))) + +/* The assembler's parentheses characters. */ +#define ASM_OPEN_PAREN "(" +#define ASM_CLOSE_PAREN ")" + +/* Target characters. */ +#define TARGET_BELL 007 +#define TARGET_BS 010 +#define TARGET_TAB 011 +#define TARGET_NEWLINE 012 +#define TARGET_VT 013 +#define TARGET_FF 014 +#define TARGET_CR 015 + +/* A C statement to be executed just prior to the output of + assembler code for INSN, to modify the extracted operands so + they will be output differently. + + Here the argument OPVEC is the vector containing the operands + extracted from INSN, and NOPERANDS is the number of elements of + the vector which contain meaningful data for this insn. + The contents of this vector are what will be used to convert the insn + template into assembler code, so you can change the assembler output + by changing the contents of the vector. */ + +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + final_prescan_insn ((INSN), (OPVEC), (NOPERANDS)) + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +#define PRINT_OPERAND(STREAM, X, CODE) print_operand ((STREAM), (X), (CODE)) + +/* Print a memory address as an operand to reference that memory location. */ + +#define PRINT_OPERAND_ADDRESS(STREAM,X) print_operand_address ((STREAM), (X)) + +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \ + ((CHAR) == '.' || (CHAR) == '#' || (CHAR) == '@' || (CHAR) == ',' \ + || (CHAR) == '$') + +extern struct rtx_def *sh_compare_op0; +extern struct rtx_def *sh_compare_op1; +extern struct rtx_def *prepare_scc_operands(); + +/* Which processor to schedule for. The elements of the enumeration must + match exactly the cpu attribute in the sh.md file. */ + +enum processor_type { + PROCESSOR_SH1, + PROCESSOR_SH2, + PROCESSOR_SH3, + PROCESSOR_SH3E, + PROCESSOR_SH4 +}; + +#define sh_cpu_attr ((enum attr_cpu)sh_cpu) +extern enum processor_type sh_cpu; + +extern enum machine_mode sh_addr_diff_vec_mode; + +extern int optimize; /* needed for gen_casesi. */ + +/* Declare functions defined in sh.c and used in templates. */ + +extern char *output_branch(); +extern char *output_ieee_ccmpeq(); +extern char *output_branchy_insn(); +extern char *output_shift(); +extern char *output_movedouble(); +extern char *output_movepcrel(); +extern char *output_jump_label_table(); +extern char *output_far_jump(); + +enum mdep_reorg_phase_e +{ + SH_BEFORE_MDEP_REORG, + SH_INSERT_USES_LABELS, + SH_SHORTEN_BRANCHES0, + SH_FIXUP_PCLOAD, + SH_SHORTEN_BRANCHES1, + SH_AFTER_MDEP_REORG +}; + +extern enum mdep_reorg_phase_e mdep_reorg_phase; + +void machine_dependent_reorg (); +struct rtx_def *sfunc_uses_reg (); +int barrier_align (); +int sh_loop_align (); + +#define MACHINE_DEPENDENT_REORG(X) machine_dependent_reorg(X) + +/* Generate calls to memcpy, memcmp and memset. */ + +#define TARGET_MEM_FUNCTIONS + +/* Define this macro if you want to implement any pragmas. If defined, it + is a C expression whose value is 1 if the pragma was handled by the + macro, zero otherwise. */ +#define HANDLE_PRAGMA(GETC, UNGETC, NODE) sh_handle_pragma (GETC, UNGETC, NODE) +extern int sh_handle_pragma (); + +/* Set when processing a function with pragma interrupt turned on. */ + +extern int pragma_interrupt; + +/* Set to an RTX containing the address of the stack to switch to + for interrupt functions. */ +extern struct rtx_def *sp_switch; + +/* A C expression whose value is nonzero if IDENTIFIER with arguments ARGS + is a valid machine specific attribute for DECL. + The attributes in ATTRIBUTES have previously been assigned to DECL. */ +extern int sh_valid_machine_decl_attribute (); +#define VALID_MACHINE_DECL_ATTRIBUTE(DECL, ATTRIBUTES, IDENTIFIER, ARGS) \ +sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS) + +extern void sh_pragma_insert_attributes (); +#define PRAGMA_INSERT_ATTRIBUTES(node, pattr, prefix_attr) \ + sh_pragma_insert_attributes (node, pattr, prefix_attr) + +extern int sh_flag_remove_dead_before_cse; +extern int rtx_equal_function_value_matters; +extern struct rtx_def *fpscr_rtx; +extern struct rtx_def *get_fpscr_rtx (); + + +/* Instructions with unfilled delay slots take up an extra two bytes for + the nop in the delay slot. */ + +#define ADJUST_INSN_LENGTH(X, LENGTH) \ + if (((GET_CODE (X) == INSN \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER) \ + || GET_CODE (X) == CALL_INSN \ + || (GET_CODE (X) == JUMP_INSN \ + && GET_CODE (PATTERN (X)) != ADDR_DIFF_VEC \ + && GET_CODE (PATTERN (X)) != ADDR_VEC)) \ + && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (X)))) != SEQUENCE \ + && get_attr_needs_delay_slot (X) == NEEDS_DELAY_SLOT_YES) \ + (LENGTH) += 2; + +/* Define the codes that are matched by predicates in sh.c. */ +#define PREDICATE_CODES \ + {"arith_operand", {SUBREG, REG, CONST_INT}}, \ + {"arith_reg_operand", {SUBREG, REG}}, \ + {"arith_reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \ + {"binary_float_operator", {PLUS, MULT}}, \ + {"commutative_float_operator", {PLUS, MULT}}, \ + {"fp_arith_reg_operand", {SUBREG, REG}}, \ + {"fp_extended_operand", {SUBREG, REG, FLOAT_EXTEND}}, \ + {"fpscr_operand", {REG}}, \ + {"general_movsrc_operand", {SUBREG, REG, CONST_INT, MEM}}, \ + {"general_movdst_operand", {SUBREG, REG, CONST_INT, MEM}}, \ + {"logical_operand", {SUBREG, REG, CONST_INT}}, \ + {"noncommutative_float_operator", {MINUS, DIV}}, \ + {"register_operand", {SUBREG, REG}}, + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. + + Leaving the unsignedp unchanged gives better code than always setting it + to 0. This is despite the fact that we have only signed char and short + load instructions. */ +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + (MODE) = SImode; + +/* Defining PROMOTE_FUNCTION_ARGS eliminates some unnecessary zero/sign + extensions applied to char/short functions arguments. Defining + PROMOTE_FUNCTION_RETURN does the same for function returns. */ + +#define PROMOTE_FUNCTION_ARGS +#define PROMOTE_FUNCTION_RETURN + +/* ??? Define ACCUMULATE_OUTGOING_ARGS? This is more efficient than pushing + and poping arguments. However, we do have push/pop instructions, and + rather limited offsets (4 bits) in load/store instructions, so it isn't + clear if this would give better code. If implemented, should check for + compatibility problems. */ + +/* A C statement (sans semicolon) to update the integer variable COST + based on the relationship between INSN that is dependent on + DEP_INSN through the dependence LINK. The default is to make no + adjustment to COST. This can be used for example to specify to + the scheduler that an output- or anti-dependence does not incur + the same cost as a data-dependence. */ + +#define ADJUST_COST(insn,link,dep_insn,cost) \ +do { \ + rtx reg; \ + \ + if (GET_CODE(insn) == CALL_INSN) \ + { \ + /* The only input for a call that is timing-critical is the \ + function's address. */ \ + rtx call = PATTERN (insn); \ + \ + if (GET_CODE (call) == PARALLEL) \ + call = XVECEXP (call, 0 ,0); \ + if (GET_CODE (call) == SET) \ + call = SET_SRC (call); \ + if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM \ + && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)) \ + (cost) = 0; \ + } \ + /* All sfunc calls are parallels with at least four components. \ + Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ \ + else if (GET_CODE (PATTERN (insn)) == PARALLEL \ + && XVECLEN (PATTERN (insn), 0) >= 4 \ + && (reg = sfunc_uses_reg (insn))) \ + { \ + /* Likewise, the most timing critical input for an sfuncs call \ + is the function address. However, sfuncs typically start \ + using their arguments pretty quickly. \ + Assume a four cycle delay before they are needed. */ \ + if (! reg_set_p (reg, dep_insn)) \ + cost -= TARGET_SUPERSCALAR ? 40 : 4; \ + } \ + /* Adjust load_si / pcload_si type insns latency. Use the known \ + nominal latency and form of the insn to speed up the check. */ \ + else if (cost == 3 \ + && GET_CODE (PATTERN (dep_insn)) == SET \ + /* Latency for dmpy type insns is also 3, so check the that \ + it's actually a move insn. */ \ + && general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))\ + cost = 2; \ + else if (cost == 30 \ + && GET_CODE (PATTERN (dep_insn)) == SET \ + && GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode) \ + cost = 20; \ +} while (0) \ + +/* For the sake of libgcc2.c, indicate target supports atexit. */ +#define HAVE_ATEXIT + +#define SH_DYNAMIC_SHIFT_COST \ + (TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20) diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md new file mode 100755 index 0000000..7e417ab --- /dev/null +++ b/gcc/config/sh/sh.md @@ -0,0 +1,4654 @@ +;; CYGNUS LOCAL SH4 Phase III: REG_LIBCALL / REG_RETVAL wrapping of +;; MACH_REG / MACL_REG usage. +;;- Machine description for the Hitachi SH. +;; Copyright (C) 1993 - 1999 Free Software Foundation, Inc. +;; Contributed by Steve Chamberlain (sac@cygnus.com). +;; Improved by Jim Wilson (wilson@cygnus.com). + +;; This file is part of GNU CC. + +;; GNU CC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU CC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU CC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + + +;; ??? Should prepend a * to all pattern names which are not used. +;; This will make the compiler smaller, and rebuilds after changes faster. + +;; ??? Should be enhanced to include support for many more GNU superoptimizer +;; sequences. Especially the sequences for arithmetic right shifts. + +;; ??? Should check all DImode patterns for consistency and usefulness. + +;; ??? The MAC.W and MAC.L instructions are not supported. There is no +;; way to generate them. + +;; ??? The cmp/str instruction is not supported. Perhaps it can be used +;; for a str* inline function. + +;; BSR is not generated by the compiler proper, but when relaxing, it +;; generates .uses pseudo-ops that allow linker relaxation to create +;; BSR. This is actually implemented in bfd/{coff,elf32}-sh.c + +;; Special constraints for SH machine description: +;; +;; t -- T +;; x -- mac +;; l -- pr +;; z -- r0 +;; +;; Special formats used for outputting SH instructions: +;; +;; %. -- print a .s if insn needs delay slot +;; %@ -- print rte/rts if is/isn't an interrupt function +;; %# -- output a nop if there is nothing to put in the delay slot +;; %O -- print a constant without the # +;; %R -- print the lsw reg of a double +;; %S -- print the msw reg of a double +;; %T -- print next word of a double REG or MEM +;; +;; Special predicates: +;; +;; arith_operand -- operand is valid source for arithmetic op +;; arith_reg_operand -- operand is valid register for arithmetic op +;; general_movdst_operand -- operand is valid move destination +;; general_movsrc_operand -- operand is valid move source +;; logical_operand -- operand is valid source for logical op +;; ------------------------------------------------------------------------- +;; Attributes +;; ------------------------------------------------------------------------- + +;; Target CPU. + +(define_attr "cpu" + "sh1,sh2,sh3,sh3e,sh4" + (const (symbol_ref "sh_cpu_attr"))) + +(define_attr "endian" "big,little" + (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN") + (const_string "little") (const_string "big")))) + +(define_attr "fmovd" "yes,no" + (const (if_then_else (symbol_ref "TARGET_FMOVD") + (const_string "yes") (const_string "no")))) +;; issues/clock +(define_attr "issues" "1,2" + (const (if_then_else (symbol_ref "TARGET_SUPERSCALAR") (const_string "2") (const_string "1")))) + +;; cbranch conditional branch instructions +;; jump unconditional jumps +;; arith ordinary arithmetic +;; arith3 a compound insn that behaves similarly to a sequence of +;; three insns of type arith +;; arith3b like above, but might end with a redirected branch +;; load from memory +;; load_si Likewise, SImode variant for general register. +;; store to memory +;; move register to register +;; fmove register to register, floating point +;; smpy word precision integer multiply +;; dmpy longword or doublelongword precision integer multiply +;; return rts +;; pload load of pr reg, which can't be put into delay slot of rts +;; pstore store of pr reg, which can't be put into delay slot of jsr +;; pcload pc relative load of constant value +;; pcload_si Likewise, SImode variant for general register. +;; rte return from exception +;; sfunc special function call with known used registers +;; call function call +;; fp floating point +;; fdiv floating point divide (or square root) +;; gp_fpul move between general purpose register and fpul +;; dfp_arith, dfp_cmp,dfp_conv +;; dfdiv double precision floating point divide (or square root) +;; nil no-op move, will be deleted. + +(define_attr "type" + "cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil" + (const_string "other")) + +; If a conditional branch destination is within -252..258 bytes away +; from the instruction it can be 2 bytes long. Something in the +; range -4090..4100 bytes can be 6 bytes long. All other conditional +; branches are initially assumed to be 16 bytes long. +; In machine_dependent_reorg, we split all branches that are longer than +; 2 bytes. + +;; The maximum range used for SImode constant pool entrys is 1018. A final +;; instruction can add 8 bytes while only being 4 bytes in size, thus we +;; can have a total of 1022 bytes in the pool. Add 4 bytes for a branch +;; instruction around the pool table, 2 bytes of alignment before the table, +;; and 30 bytes of alignment after the table. That gives a maximum total +;; pool size of 1058 bytes. +;; Worst case code/pool content size ratio is 1:2 (using asms). +;; Thus, in the worst case, there is one instruction in front of a maximum +;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of +;; code. For the last n bytes of code, there are 2n + 36 bytes of pool. +;; If we have a forward branch, the initial table will be put after the +;; unconditional branch. +;; +;; ??? We could do much better by keeping track of the actual pcloads within +;; the branch range and in the pcload range in front of the branch range. + +;; ??? This looks ugly because genattrtab won't allow if_then_else or cond +;; inside an le. +(define_attr "short_cbranch_p" "no,yes" + (cond [(ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506)) + (const_string "yes") + (ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) != insn") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "med_branch_p" "no,yes" + (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990)) + (const_int 1988)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 4092)) + (const_int 8186)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "med_cbranch_p" "no,yes" + (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988)) + (const_int 1986)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 4090)) + (const_int 8184)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "braf_branch_p" "no,yes" + (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 10330)) + (const_int 20660)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 32764)) + (const_int 65530)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "braf_cbranch_p" "no,yes" + (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 10328)) + (const_int 20658)) + (const_string "yes") + (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0)) + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 32762)) + (const_int 65528)) + (const_string "yes") + ] (const_string "no"))) + +; An unconditional jump in the range -4092..4098 can be 2 bytes long. +; For wider ranges, we need a combination of a code and a data part. +; If we can get a scratch register for a long range jump, the code +; part can be 4 bytes long; otherwise, it must be 8 bytes long. +; If the jump is in the range -32764..32770, the data part can be 2 bytes +; long; otherwise, it must be 6 bytes long. + +; All other instructions are two bytes long by default. + +;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)), +;; but getattrtab doesn't understand this. +(define_attr "length" "" + (cond [(eq_attr "type" "cbranch") + (cond [(eq_attr "short_cbranch_p" "yes") + (const_int 2) + (eq_attr "med_cbranch_p" "yes") + (const_int 6) + (eq_attr "braf_cbranch_p" "yes") + (const_int 12) +;; ??? using pc is not computed transitively. + (ne (match_dup 0) (match_dup 0)) + (const_int 14) + ] (const_int 16)) + (eq_attr "type" "jump") + (cond [(eq_attr "med_branch_p" "yes") + (const_int 2) + (and (eq (symbol_ref "GET_CODE (PREV_INSN (insn))") + (symbol_ref "INSN")) + (eq (symbol_ref "INSN_CODE (PREV_INSN (insn))") + (symbol_ref "code_for_indirect_jump_scratch"))) + (if_then_else (eq_attr "braf_branch_p" "yes") + (const_int 6) + (const_int 10)) + (eq_attr "braf_branch_p" "yes") + (const_int 10) +;; ??? using pc is not computed transitively. + (ne (match_dup 0) (match_dup 0)) + (const_int 12) + ] (const_int 14)) + ] (const_int 2))) + +;; (define_function_unit {name} {num-units} {n-users} {test} +;; {ready-delay} {issue-delay} [{conflict-list}]) + +;; Load and store instructions save a cycle if they are aligned on a +;; four byte boundary. Using a function unit for stores encourages +;; gcc to separate load and store instructions by one instruction, +;; which makes it more likely that the linker will be able to word +;; align them when relaxing. + +;; Loads have a latency of two. +;; However, call insns can have a delay slot, so that we want one more +;; insn to be scheduled between the load of the function address and the call. +;; This is equivalent to a latency of three. +;; We cannot use a conflict list for this, because we need to distinguish +;; between the actual call address and the function arguments. +;; ADJUST_COST can only properly handle reductions of the cost, so we +;; use a latency of three here. +;; We only do this for SImode loads of general registers, to make the work +;; for ADJUST_COST easier. +(define_function_unit "memory" 1 0 + (and (eq_attr "issues" "1") + (eq_attr "type" "load_si,pcload_si")) + 3 2) +(define_function_unit "memory" 1 0 + (and (eq_attr "issues" "1") + (eq_attr "type" "load,pcload,pload,store,pstore")) + 2 2) + +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "arith3,arith3b")) 3 3) + +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "dyn_shift")) 2 2) + +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1) + +;; ??? These are approximations. +(define_function_unit "mpy" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "smpy")) 2 2) +(define_function_unit "mpy" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "dmpy")) 3 3) + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "fp,fmove")) 2 1) +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "fdiv")) 13 12) + + +;; SH4 scheduling +;; The SH4 is a dual-issue implementation, thus we have to multiply all +;; costs by at least two. +;; There will be single increments of the modeled that don't correspond +;; to the actual target ;; whenever two insns to be issued depend one a +;; single resource, and the scheduler picks to be the first one. +;; If we multiplied the costs just by two, just two of these single +;; increments would amount to an actual cycle. By picking a larger +;; factor, we can ameliorate the effect; However, we then have to make sure +;; that only two insns are modeled as issued per actual cycle. +;; Moreover, we need a way to specify the latency of insns that don't +;; use an actual function unit. +;; We use an 'issue' function unit to do that, and a cost factor of 10. + +(define_function_unit "issue" 2 0 + (and (eq_attr "issues" "2") (eq_attr "type" "!nil,arith3")) + 10 10) + +(define_function_unit "issue" 2 0 + (and (eq_attr "issues" "2") (eq_attr "type" "arith3")) + 30 30) + +;; There is no point in providing exact scheduling information about branches, +;; because they are at the starts / ends of basic blocks anyways. + +;; Some insns cannot be issued before/after another insn in the same cycle, +;; irrespective of the type of the other insn. + +;; default is dual-issue, but can't be paired with an insn that +;; uses multiple function units. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "!smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul,call,sfunc,arith3,arith3b")) + 1 10 + [(eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")]) + +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")) + 10 10 + [(const_int 1)]) + +;; arith3 insns are always pairable at the start, but not inecessarily at +;; the end; however, there doesn;t seem to be a way to express that. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "arith3")) + 30 20 + [(const_int 1)]) + +;; arith3b insn are pairable at the end and have latency that prevents pairing +;; with the following branch, but we don't want this latency be respected; +;; When the following branch is immediately adjacent, we can redirect the +;; internal branch, which is likly to be a larger win. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "arith3b")) + 20 20 + [(const_int 1)]) + +;; calls introduce a longisch delay that is likely to flush the pipelines. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "call,sfunc")) + 160 160 + [(eq_attr "type" "!call") (eq_attr "type" "call")]) + +;; Load and store instructions have no alignment peculiarities for the SH4, +;; but they use the load-store unit, which they share with the fmove type +;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . +;; Loads have a latency of two. +;; However, call insns can only paired with a preceding insn, and have +;; a delay slot, so that we want two more insns to be scheduled between the +;; load of the function address and the call. This is equivalent to a +;; latency of three. +;; We cannot use a conflict list for this, because we need to distinguish +;; between the actual call address and the function arguments. +;; ADJUST_COST can only properly handle reductions of the cost, so we +;; use a latency of three here, which gets multiplied by 10 to yield 30. +;; We only do this for SImode loads of general registers, to make the work +;; for ADJUST_COST easier. + +;; When specifying different latencies for different insns using the +;; the same function unit, genattrtab.c assumes a 'FIFO constraint' +;; so that the blockage is at least READY-COST (E) + 1 - READY-COST (C) +;; for an executing insn E and a candidate insn C. +;; Therefore, we define three different function units for load_store: +;; load_store, load and load_si. + +(define_function_unit "load_si" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "load_si,pcload_si")) 30 10) +(define_function_unit "load" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "load,pcload,pload")) 20 10) +(define_function_unit "load_store" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "load_si,pcload_si,load,pcload,pload,store,pstore,fmove")) + 10 10) + +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "arith,dyn_shift")) 10 10) + +;; Again, we have to pretend a lower latency for the "int" unit to avoid a +;; spurious FIFO constraint; the multiply instructions use the "int" +;; unit actually only for two cycles. +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 20 20) + +;; We use a fictous "mpy" unit to express the actual latency. +(define_function_unit "mpy" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 20) + +;; Again, we have to pretend a lower latency for the "int" unit to avoid a +;; spurious FIFO constraint. +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 10 10) + +;; We use a fictous "gp_fpul" unit to express the actual latency. +(define_function_unit "gp_fpul" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 20 10) + +;; ??? multiply uses the floating point unit, but with a two cycle delay. +;; Thus, a simple single-precision fp operation could finish if issued in +;; the very next cycle, but stalls when issued two or three cycles later. +;; Similarily, a divide / sqrt can work without stalls if issued in +;; the very next cycle, while it would have to block if issued two or +;; three cycles later. +;; There is no way to model this with gcc's function units. This problem is +;; actually mentioned in md.texi. Tackling this problem requires first that +;; it is possible to speak about the target in an open discussion. +;; +;; However, simple double-precision operations always conflict. + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 40 + [(eq_attr "type" "dfp_cmp,dfp_conv,dfp_arith")]) + +;; The "fp" unit is for pipeline stages F1 and F2. + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "fp")) 30 10) + +;; Again, we have to pretend a lower latency for the "fp" unit to avoid a +;; spurious FIFO constraint; the bulk of the fdiv type insns executes in +;; the F3 stage. +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 30 10) + +;; The "fdiv" function unit models the aggregate effect of the F1, F2 and F3 +;; pipeline stages on the pipelining of fdiv/fsqrt insns. +;; We also use it to give the actual latency here. +;; fsqrt is actually one cycle faster than fdiv (and the value used here), +;; but that will hardly matter in practice for scheduling. +(define_function_unit "fdiv" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 120 100) + +;; There is again a late use of the "fp" unit by [d]fdiv type insns +;; that we can't express. + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfp_cmp,dfp_conv")) 40 20) + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfp_arith")) 80 60) + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 10) + +(define_function_unit "fdiv" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 210) + +; Definitions for filling branch delay slots. + +(define_attr "needs_delay_slot" "yes,no" (const_string "no")) + +;; ??? This should be (nil) instead of (const_int 0) +(define_attr "hit_stack" "yes,no" + (cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, 15)") (const_int 0)) + (const_string "no")] + (const_string "yes"))) + +(define_attr "interrupt_function" "no,yes" + (const (symbol_ref "pragma_interrupt"))) + +(define_attr "in_delay_slot" "yes,no" + (cond [(eq_attr "type" "cbranch") (const_string "no") + (eq_attr "type" "pcload,pcload_si") (const_string "no") + (eq_attr "needs_delay_slot" "yes") (const_string "no") + (eq_attr "length" "2") (const_string "yes") + ] (const_string "no"))) + +(define_attr "is_sfunc" "" + (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0))) + +(define_delay + (eq_attr "needs_delay_slot" "yes") + [(eq_attr "in_delay_slot" "yes") (nil) (nil)]) + +;; On the SH and SH2, the rte instruction reads the return pc from the stack, +;; and thus we can't put a pop instruction in its delay slot. +;; ??? On the SH3, the rte instruction does not use the stack, so a pop +;; instruction can go in the delay slot. + +;; Since a normal return (rts) implicitly uses the PR register, +;; we can't allow PR register loads in an rts delay slot. + +(define_delay + (eq_attr "type" "return") + [(and (eq_attr "in_delay_slot" "yes") + (ior (and (eq_attr "interrupt_function" "no") + (eq_attr "type" "!pload")) + (and (eq_attr "interrupt_function" "yes") + (eq_attr "hit_stack" "no")))) (nil) (nil)]) + +;; Since a call implicitly uses the PR register, we can't allow +;; a PR register store in a jsr delay slot. + +(define_delay + (ior (eq_attr "type" "call") (eq_attr "type" "sfunc")) + [(and (eq_attr "in_delay_slot" "yes") + (eq_attr "type" "!pstore")) (nil) (nil)]) + +;; Say that we have annulled true branches, since this gives smaller and +;; faster code when branches are predicted as not taken. + +(define_delay + (and (eq_attr "type" "cbranch") + (ne (symbol_ref "TARGET_SH2") (const_int 0))) + [(eq_attr "in_delay_slot" "yes") (eq_attr "in_delay_slot" "yes") (nil)]) + +;; ------------------------------------------------------------------------- +;; SImode signed integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "" + [(set (reg:SI 18) + (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r") + (match_operand:SI 1 "arith_operand" "L,r")) + (const_int 0)))] + "" + "tst %1,%0") + +;; ??? Perhaps should only accept reg/constant if the register is reg 0. +;; That would still allow reload to create cmpi instructions, but would +;; perhaps allow forcing the constant into a register when that is better. +;; Probably should use r0 for mem/imm compares, but force constant into a +;; register for pseudo/imm compares. + +(define_insn "cmpeqsi_t" + [(set (reg:SI 18) (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r") + (match_operand:SI 1 "arith_operand" "N,rI,r")))] + "" + "@ + tst %0,%0 + cmp/eq %1,%0 + cmp/eq %1,%0") + +(define_insn "cmpgtsi_t" + [(set (reg:SI 18) (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r") + (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] + "" + "@ + cmp/gt %1,%0 + cmp/pl %0") + +(define_insn "cmpgesi_t" + [(set (reg:SI 18) (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r") + (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))] + "" + "@ + cmp/ge %1,%0 + cmp/pz %0") + +;; ------------------------------------------------------------------------- +;; SImode unsigned integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "cmpgeusi_t" + [(set (reg:SI 18) (geu:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "" + "cmp/hs %1,%0") + +(define_insn "cmpgtusi_t" + [(set (reg:SI 18) (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "" + "cmp/hi %1,%0") + +;; We save the compare operands in the cmpxx patterns and use them when +;; we generate the branch. + +(define_expand "cmpsi" + [(set (reg:SI 18) (compare (match_operand:SI 0 "arith_operand" "") + (match_operand:SI 1 "arith_operand" "")))] + "" + " +{ + sh_compare_op0 = operands[0]; + sh_compare_op1 = operands[1]; + DONE; +}") + +;; ------------------------------------------------------------------------- +;; DImode signed integer comparisons +;; ------------------------------------------------------------------------- + +;; ??? Could get better scheduling by splitting the initial test from the +;; rest of the insn after reload. However, the gain would hardly justify +;; the sh.md size increase necessary to do that. + +(define_insn "" + [(set (reg:SI 18) + (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_operand" "r")) + (const_int 0)))] + "" + "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\", + insn, operands);" + [(set_attr "length" "6") + (set_attr "type" "arith3b")]) + +(define_insn "cmpeqdi_t" + [(set (reg:SI 18) (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))] + "" + "* + return output_branchy_insn + (EQ, + (which_alternative + ? \"cmp/eq\\t%S1,%S0\;bf\\t%l9\;cmp/eq\\t%R1,%R0\" + : \"tst\\t%S0,%S0\;bf\\t%l9\;tst\\t%R0,%R0\"), + insn, operands);" + [(set_attr "length" "6") + (set_attr "type" "arith3b")]) + +(define_insn "cmpgtdi_t" + [(set (reg:SI 18) (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH2" + "@ + cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=: + tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:" + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpgedi_t" + [(set (reg:SI 18) (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH2" + "@ + cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=: + cmp/pz\\t%S0" + [(set_attr "length" "8,2") + (set_attr "type" "arith3,arith")]) + +;; ------------------------------------------------------------------------- +;; DImode unsigned integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "cmpgeudi_t" + [(set (reg:SI 18) (geu:SI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:" + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpgtudi_t" + [(set (reg:SI 18) (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:" + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +;; We save the compare operands in the cmpxx patterns and use them when +;; we generate the branch. + +(define_expand "cmpdi" + [(set (reg:SI 18) (compare (match_operand:DI 0 "arith_operand" "") + (match_operand:DI 1 "arith_operand" "")))] + "TARGET_SH2" + " +{ + sh_compare_op0 = operands[0]; + sh_compare_op1 = operands[1]; + DONE; +}") + +;; ------------------------------------------------------------------------- +;; Addition instructions +;; ------------------------------------------------------------------------- + +;; ??? This should be a define expand. + +(define_insn "adddi3" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (reg:SI 18))] + "" + "#" + [(set_attr "length" "6")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (reg:SI 18))] + "reload_completed" + [(const_int 0)] + " +{ + rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]); + high0 = gen_rtx (REG, SImode, + true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + high2 = gen_rtx (REG, SImode, + true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + emit_insn (gen_clrt ()); + emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2]))); + emit_insn (gen_addc1 (high0, high0, high2)); + DONE; +}") + +(define_insn "addc" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI 18))) + (set (reg:SI 18) + (ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))] + "" + "addc %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "addc1" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI 18))) + (clobber (reg:SI 18))] + "" + "addc %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "addsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (plus:SI (match_operand:SI 1 "arith_operand" "%0") + (match_operand:SI 2 "arith_operand" "rI")))] + "" + "add %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Subtraction instructions +;; ------------------------------------------------------------------------- + +;; ??? This should be a define expand. + +(define_insn "subdi3" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (minus:DI (match_operand:DI 1 "arith_reg_operand" "0") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (reg:SI 18))] + "" + "#" + [(set_attr "length" "6")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (minus:DI (match_operand:DI 1 "arith_reg_operand" "0") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (reg:SI 18))] + "reload_completed" + [(const_int 0)] + " +{ + rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]); + high0 = gen_rtx (REG, SImode, + true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + high2 = gen_rtx (REG, SImode, + true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0)); + emit_insn (gen_clrt ()); + emit_insn (gen_subc (low0, low0, gen_lowpart (SImode, operands[2]))); + emit_insn (gen_subc1 (high0, high0, high2)); + DONE; +}") + +(define_insn "subc" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI 18))) + (set (reg:SI 18) + (gtu:SI (minus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))] + "" + "subc %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "subc1" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI 18))) + (clobber (reg:SI 18))] + "" + "subc %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "*subsi3_internal" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")))] + "" + "sub %2,%0" + [(set_attr "type" "arith")]) + +;; Convert `constant - reg' to `neg rX; add rX, #const' since this +;; will sometimes save one instruction. Otherwise we might get +;; `mov #const, rY; sub rY,rX; mov rX, rY' if the source and dest regs +;; are the same. + +(define_expand "subsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (minus:SI (match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")))] + "" + " +{ + if (GET_CODE (operands[1]) == CONST_INT) + { + emit_insn (gen_negsi2 (operands[0], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[0], operands[1])); + DONE; + } +}") + +;; ------------------------------------------------------------------------- +;; Division instructions +;; ------------------------------------------------------------------------- + +;; We take advantage of the library routines which don't clobber as many +;; registers as a normal function call would. + +;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it +;; also has an effect on the register that holds the address of the sfunc. +;; To make this work, we have an extra dummy insns that shows the use +;; of this register for reorg. + +(define_insn "use_sfunc_addr" + [(set (reg:SI 17) (unspec [(match_operand:SI 0 "register_operand" "r")] 5))] + "" + "" + [(set_attr "length" "0")]) + +;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than +;; hard register 0. If we used hard register 0, then the next instruction +;; would be a move from hard register 0 to a pseudo-reg. If the pseudo-reg +;; gets allocated to a stack slot that needs its address reloaded, then +;; there is nothing to prevent reload from using r0 to reload the address. +;; This reload would clobber the value in r0 we are trying to store. +;; If we let reload allocate r0, then this problem can never happen. + +(define_insn "udivsi3_i1" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (clobber (reg:SI 4)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "! TARGET_SH4" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (clobber (reg:DF 28)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (use (reg:PSI 48)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH4 && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (clobber (reg:DF 28)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_HARD_SH4 && TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "udivsi3" + [(set (reg:SI 4) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI 5) (match_operand:SI 2 "general_operand" "")) + (set (match_dup 3) (symbol_ref:SI "__udivsi3")) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (reg:SI 4) + (reg:SI 5))) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (clobber (reg:SI 4)) + (use (match_dup 3))])] + "" + " +{ + rtx first, last; + + first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); + operands[3] = gen_reg_rtx(SImode); + if (TARGET_HARD_SH4) + { + emit_move_insn (operands[3], + gen_rtx_SYMBOL_REF (SImode, \"__udivsi3_i4\")); + if (TARGET_FPU_SINGLE) + last = emit_insn (gen_udivsi3_i4_single (operands[0], operands[3])); + else + last = emit_insn (gen_udivsi3_i4 (operands[0], operands[3])); + } + else + { + emit_move_insn (operands[3], + gen_rtx_SYMBOL_REF (SImode, \"__udivsi3\")); + last = emit_insn (gen_udivsi3_i1 (operands[0], operands[3])); + } + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +(define_insn "divsi3_i1" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI 3)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "! TARGET_SH4" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (use (reg:PSI 48)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH4 && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (clobber (reg:SI 2)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_HARD_SH4 && TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "divsi3" + [(set (reg:SI 4) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI 5) (match_operand:SI 2 "general_operand" "")) + (set (match_dup 3) (symbol_ref:SI "__sdivsi3")) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (reg:SI 4) + (reg:SI 5))) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI 3)) + (use (match_dup 3))])] + "" + " +{ + rtx first, last; + + first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); + operands[3] = gen_reg_rtx(SImode); + if (TARGET_HARD_SH4) + { + emit_move_insn (operands[3], + gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\")); + if (TARGET_FPU_SINGLE) + last = emit_insn (gen_divsi3_i4_single (operands[0], operands[3])); + else + last = emit_insn (gen_divsi3_i4 (operands[0], operands[3])); + } + else + { + emit_move_insn (operands[3], gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3\")); + last = emit_insn (gen_divsi3_i1 (operands[0], operands[3])); + } + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +;; ------------------------------------------------------------------------- +;; Multiplication instructions +;; ------------------------------------------------------------------------- + +(define_insn "umulhisi3_i" + [(set (reg:SI 21) + (mult:SI (zero_extend:SI (match_operand:HI 0 "arith_reg_operand" "r")) + (zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r"))))] + "" + "mulu %1,%0" + [(set_attr "type" "smpy")]) + +(define_insn "mulhisi3_i" + [(set (reg:SI 21) + (mult:SI (sign_extend:SI + (match_operand:HI 0 "arith_reg_operand" "r")) + (sign_extend:SI + (match_operand:HI 1 "arith_reg_operand" "r"))))] + "" + "muls %1,%0" + [(set_attr "type" "smpy")]) + +(define_expand "mulhisi3" + [(set (reg:SI 21) + (mult:SI (sign_extend:SI + (match_operand:HI 1 "arith_reg_operand" "")) + (sign_extend:SI + (match_operand:HI 2 "arith_reg_operand" "")))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI 21))] + "" + " +{ + rtx first, last; + + first = emit_insn (gen_mulhisi3_i (operands[1], operands[2])); + last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 21)); + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +(define_expand "umulhisi3" + [(set (reg:SI 21) + (mult:SI (zero_extend:SI + (match_operand:HI 1 "arith_reg_operand" "")) + (zero_extend:SI + (match_operand:HI 2 "arith_reg_operand" "")))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI 21))] + "" + " +{ + rtx first, last; + + first = emit_insn (gen_umulhisi3_i (operands[1], operands[2])); + last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 21)); + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate +;; a call to a routine which clobbers known registers. + +(define_insn "" + [(set (match_operand:SI 1 "register_operand" "=z") + (mult:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 21)) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (clobber (reg:SI 3)) + (clobber (reg:SI 2)) + (clobber (reg:SI 1)) + (use (match_operand:SI 0 "arith_reg_operand" "r"))] + "" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "mulsi3_call" + [(set (reg:SI 4) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI 5) (match_operand:SI 2 "general_operand" "")) + (parallel[(set (match_operand:SI 0 "register_operand" "") + (mult:SI (reg:SI 4) + (reg:SI 5))) + (clobber (reg:SI 21)) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (clobber (reg:SI 3)) + (clobber (reg:SI 2)) + (clobber (reg:SI 1)) + (use (match_operand:SI 3 "register_operand" ""))])] + "" + "") + +(define_insn "mul_l" + [(set (reg:SI 21) + (mult:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "mul.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "mulsi3" + [(set (reg:SI 21) + (mult:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI 21))] + "" + " +{ + rtx first, last; + + if (!TARGET_SH2) + { + /* The address must be set outside the libcall, + since it goes into a pseudo. */ + rtx addr = force_reg (SImode, gen_rtx_SYMBOL_REF (SImode, \"__mulsi3\")); + rtx insns = gen_mulsi3_call (operands[0], operands[1], operands[2], addr); + first = XVECEXP (insns, 0, 0); + last = XVECEXP (insns, 0, XVECLEN (insns, 0) - 1); + emit_insn (insns); + } + else + { + rtx macl = gen_rtx_REG (SImode, MACL_REG); + first = emit_insn (gen_mul_l (operands[1], operands[2])); + last = emit_insn (gen_movsi_i ((operands[0]), macl)); + } + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +(define_insn "mulsidi3_i" + [(set (reg:SI 20) + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (set (reg:SI 21) + (mult:SI (match_dup 0) + (match_dup 1)))] + "TARGET_SH2" + "dmuls.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "arith_reg_operand" "r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r")))) + (clobber (reg:DI 20))] + "TARGET_SH2" + "#") + +(define_split + [(set (match_operand:DI 0 "arith_reg_operand" "") + (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))) + (clobber (reg:DI 20))] + "TARGET_SH2" + [(const_int 0)] + " +{ + rtx low_dst = gen_lowpart (SImode, operands[0]); + rtx high_dst = gen_highpart (SImode, operands[0]); + + emit_insn (gen_mulsidi3_i (operands[1], operands[2])); + + emit_move_insn (low_dst, gen_rtx_REG (SImode, 21)); + emit_move_insn (high_dst, gen_rtx_REG (SImode, 20)); + /* We need something to tag the possible REG_EQUAL notes on to. */ + emit_move_insn (operands[0], operands[0]); + DONE; +}") + +(define_insn "umulsidi3_i" + [(set (reg:SI 20) + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (set (reg:SI 21) + (mult:SI (match_dup 0) + (match_dup 1)))] + "TARGET_SH2" + "dmulu.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_insn "umulsidi3" + [(set (match_operand:DI 0 "arith_reg_operand" "r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r")))) + (clobber (reg:DI 20))] + "TARGET_SH2" + "#") + +(define_split + [(set (match_operand:DI 0 "arith_reg_operand" "") + (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))) + (clobber (reg:DI 20))] + "TARGET_SH2" + [(const_int 0)] + " +{ + rtx low_dst = gen_lowpart (SImode, operands[0]); + rtx high_dst = gen_highpart (SImode, operands[0]); + + emit_insn (gen_umulsidi3_i (operands[1], operands[2])); + + emit_move_insn (low_dst, gen_rtx_REG (SImode, 21)); + emit_move_insn (high_dst, gen_rtx_REG (SImode, 20)); + /* We need something to tag the possible REG_EQUAL notes on to. */ + emit_move_insn (operands[0], operands[0]); + DONE; +}") + +(define_insn "smulsi3_highpart_i" + [(set (reg:SI 20) + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (clobber (reg:SI 21))] + "TARGET_SH2" + "dmuls.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "smulsi3_highpart" + [(parallel [(set (reg:SI 20) + (truncate:SI + (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))) + (const_int 32)))) + (clobber (reg:SI 21))]) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI 20))] + "TARGET_SH2" + " +{ + rtx first, last; + + first = emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2])); + last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 20)); + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +(define_insn "umulsi3_highpart_i" + [(set (reg:SI 20) + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (clobber (reg:SI 21))] + "TARGET_SH2" + "dmulu.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "umulsi3_highpart" + [(parallel [(set (reg:SI 20) + (truncate:SI + (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))) + (const_int 32)))) + (clobber (reg:SI 21))]) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI 20))] + "TARGET_SH2" + " +{ + rtx first, last; + + first = emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2])); + last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 20)); + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; +}") + +;; ------------------------------------------------------------------------- +;; Logical operations +;; ------------------------------------------------------------------------- + +(define_insn "" + [(set (match_operand:SI 0 "arith_reg_operand" "=r,z") + (and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "r,L")))] + "" + "and %2,%0" + [(set_attr "type" "arith")]) + +;; If the constant is 255, then emit a extu.b instruction instead of an +;; and, since that will give better code. + +(define_expand "andsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (and:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "logical_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 255) + { + emit_insn (gen_zero_extendqisi2 (operands[0], + gen_lowpart (QImode, operands[1]))); + DONE; + } +}") + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "=r,z") + (ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "r,L")))] + "" + "or %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "=z,r") + (xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "L,r")))] + "" + "xor %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Shifts and rotates +;; ------------------------------------------------------------------------- + +(define_insn "rotlsi3_1" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI 18) + (lshiftrt:SI (match_dup 1) (const_int 31)))] + "" + "rotl %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_31" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI 18))] + "" + "rotr %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_16" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "" + "swap.w %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "immediate_operand" "")))] + "" + " +{ + static char rot_tab[] = { + 000, 000, 000, 000, 000, 000, 010, 001, + 001, 001, 011, 013, 003, 003, 003, 003, + 003, 003, 003, 003, 003, 013, 012, 002, + 002, 002, 010, 000, 000, 000, 000, 000, + }; + + int count, choice; + + if (GET_CODE (operands[2]) != CONST_INT) + FAIL; + count = INTVAL (operands[2]); + choice = rot_tab[count]; + if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1) + FAIL; + choice &= 7; + switch (choice) + { + case 0: + emit_move_insn (operands[0], operands[1]); + count -= (count & 16) * 2; + break; + case 3: + emit_insn (gen_rotlsi3_16 (operands[0], operands[1])); + count -= 16; + break; + case 1: + case 2: + { + rtx parts[2]; + parts[0] = gen_reg_rtx (SImode); + parts[1] = gen_reg_rtx (SImode); + emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1])); + parts[choice-1] = operands[1]; + emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8))); + emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8))); + emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1])); + count = (count & ~16) - 8; + } + } + + for (; count > 0; count--) + emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); + for (; count < 0; count++) + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + + DONE; +}") + +(define_insn "*rotlhi3_8" + [(set (match_operand:HI 0 "arith_reg_operand" "=r") + (rotate:HI (match_operand:HI 1 "arith_reg_operand" "r") + (const_int 8)))] + "" + "swap.b %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "rotlhi3" + [(set (match_operand:HI 0 "arith_reg_operand" "") + (rotate:HI (match_operand:HI 1 "arith_reg_operand" "") + (match_operand:HI 2 "immediate_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 8) + FAIL; +}") + +;; +;; shift left + +(define_insn "ashlsi3_d" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")))] + "TARGET_SH3" + "shld %2,%0" + [(set_attr "type" "dyn_shift")]) + +(define_insn "ashlsi3_k" + [(set (match_operand:SI 0 "arith_reg_operand" "=r,r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0") + (match_operand:SI 2 "const_int_operand" "M,K")))] + "CONST_OK_FOR_K (INTVAL (operands[2]))" + "@ + add %0,%0 + shll%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn "ashlhi3_k" + [(set (match_operand:HI 0 "arith_reg_operand" "=r,r") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0") + (match_operand:HI 2 "const_int_operand" "M,K")))] + "CONST_OK_FOR_K (INTVAL (operands[2]))" + "@ + add %0,%0 + shll%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn "ashlsi3_n" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "! sh_dynamicalize_shift_p (operands[2])" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3)) + (const_string "6")] + (const_string "8"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "" + [(use (reg:SI 0))] + " +{ + gen_shifty_op (ASHIFT, operands); + DONE; +}") + +(define_expand "ashlsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI 18))])] + "" + " +{ + if (GET_CODE (operands[2]) == CONST_INT + && sh_dynamicalize_shift_p (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2]))) + { + emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2])); + DONE; + } + if (! immediate_operand (operands[2], GET_MODE (operands[2]))) + FAIL; +}") + +(define_insn "ashlhi3" + [(set (match_operand:HI 0 "arith_reg_operand" "=r") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2)) + (const_string "4")] + (const_string "6"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:HI 0 "arith_reg_operand" "") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "" + [(use (reg:SI 0))] + " +{ + gen_shifty_hi_op (ASHIFT, operands); + DONE; +}") + +; +; arithmetic shift right +; + +(define_insn "ashrsi3_k" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "M"))) + (clobber (reg:SI 18))] + "INTVAL (operands[2]) == 1" + "shar %0" + [(set_attr "type" "arith")]) + +;; We can't do HImode right shifts correctly unless we start out with an +;; explicit zero / sign extension; doing that would result in worse overall +;; code, so just let the machine independent code widen the mode. +;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 . + + +;; ??? This should be a define expand. + +(define_insn "ashrsi2_16" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "" + [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16))) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] + "operands[2] = gen_lowpart (HImode, operands[0]);") + +;; ??? This should be a define expand. + +(define_insn "ashrsi2_31" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI 18))] + "" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI 18))] + "" + [(const_int 0)] + " +{ + emit_insn (gen_ashlsi_c (operands[0], operands[1])); + emit_insn (gen_subc1 (operands[0], operands[0], operands[0])); + DONE; +}") + +(define_insn "ashlsi_c" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1))) + (set (reg:SI 18) (lt:SI (match_dup 1) + (const_int 0)))] + "" + "shll %0" + [(set_attr "type" "arith")]) + +(define_insn "ashrsi3_d" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH3" + "shad %2,%0" + [(set_attr "type" "dyn_shift")]) + +(define_insn "ashrsi3_n" + [(set (reg:SI 4) + (ashiftrt:SI (reg:SI 4) + (match_operand:SI 0 "const_int_operand" "i"))) + (clobber (reg:SI 18)) + (clobber (reg:SI 17)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "ashrsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI 18))])] + "" + "if (expand_ashiftrt (operands)) DONE; else FAIL;") + +;; logical shift right + +(define_insn "lshrsi3_d" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SH3" + "shld %2,%0" + [(set_attr "type" "dyn_shift")]) + +;; Only the single bit shift clobbers the T bit. + +(define_insn "lshrsi3_m" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "M"))) + (clobber (reg:SI 18))] + "CONST_OK_FOR_M (INTVAL (operands[2]))" + "shlr %0" + [(set_attr "type" "arith")]) + +(define_insn "lshrsi3_k" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "K")))] + "CONST_OK_FOR_K (INTVAL (operands[2])) + && ! CONST_OK_FOR_M (INTVAL (operands[2]))" + "shlr%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn "lshrsi3_n" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "! sh_dynamicalize_shift_p (operands[2])" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3)) + (const_string "6")] + (const_string "8"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "" + [(use (reg:SI 0))] + " +{ + gen_shifty_op (LSHIFTRT, operands); + DONE; +}") + +(define_expand "lshrsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI 18))])] + "" + " +{ + if (GET_CODE (operands[2]) == CONST_INT + && sh_dynamicalize_shift_p (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2]))) + { + rtx count = copy_to_mode_reg (SImode, operands[2]); + emit_insn (gen_negsi2 (count, count)); + emit_insn (gen_lshrsi3_d (operands[0], operands[1], count)); + DONE; + } + if (! immediate_operand (operands[2], GET_MODE (operands[2]))) + FAIL; +}") + +;; ??? This should be a define expand. + +(define_insn "ashldi3_k" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI 18))] + "" + "shll %R0\;rotcl %S0" + [(set_attr "length" "4") + (set_attr "type" "arith")]) + +(define_expand "ashldi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI 18))])] + "" + "{ if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) != 1) FAIL;} ") + +;; ??? This should be a define expand. + +(define_insn "lshrdi3_k" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI 18))] + "" + "shlr %S0\;rotcr %R0" + [(set_attr "length" "4") + (set_attr "type" "arith")]) + +(define_expand "lshrdi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI 18))])] + "" + "{ if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) != 1) FAIL;} ") + +;; ??? This should be a define expand. + +(define_insn "ashrdi3_k" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI 18))] + "" + "shar %S0\;rotcr %R0" + [(set_attr "length" "4") + (set_attr "type" "arith")]) + +(define_expand "ashrdi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI 18))])] + "" + "{ if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) != 1) FAIL; } ") + +;; combined left/right shift + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n")))] + "(unsigned)INTVAL (operands[2]) < 32" + [(use (reg:SI 0))] + "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL; + DONE;") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "(unsigned)INTVAL (operands[2]) < 32" + [(use (reg:SI 0))] + "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL; + DONE;") + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "shl_and_kind (operands[2], operands[3], 0) == 1" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_and_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_and_length (insn)") (const_int 6)) + (const_string "12") + (eq (symbol_ref "shl_and_length (insn)") (const_int 7)) + (const_string "14") + (eq (symbol_ref "shl_and_length (insn)") (const_int 8)) + (const_string "16")] + (const_string "18"))) + (set_attr "type" "arith")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=z") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI 18))] + "shl_and_kind (operands[2], operands[3], 0) == 2" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_length (insn)") (const_int 4)) + (const_string "8")] + (const_string "10"))) + (set_attr "type" "arith")]) + +;; shift left / and combination with a scratch register: The combine pass +;; does not accept the individual instructions, even though they are +;; cheap. But it needs a precise description so that it is usable after +;; reload. +(define_insn "and_shl_scratch" + [(set (match_operand:SI 0 "register_operand" "=r,&r") + (lshiftrt:SI (ashift:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0") + (match_operand:SI 2 "const_int_operand" "N,n")) + (match_operand:SI 3 "" "0,r")) + (match_operand:SI 4 "const_int_operand" "n,n")) + (match_operand:SI 5 "const_int_operand" "n,n"))) + (clobber (reg:SI 18))] + "" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5)) + (const_string "10")] + (const_string "12"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "=r,&r") + (lshiftrt:SI (ashift:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0") + (match_operand:SI 2 "const_int_operand" "N,n")) + (match_operand:SI 3 "register_operand" "0,r")) + (match_operand:SI 4 "const_int_operand" "n,n")) + (match_operand:SI 5 "const_int_operand" "n,n"))) + (clobber (reg:SI 18))] + "" + [(use (reg:SI 0))] + " +{ + rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1]; + + if (INTVAL (operands[2])) + { + gen_shifty_op (LSHIFTRT, operands); + } + emit_insn (gen_andsi3 (operands[0], operands[0], and_source)); + operands[2] = operands[4]; + gen_shifty_op (ASHIFT, operands); + if (INTVAL (operands[5])) + { + operands[2] = operands[5]; + gen_shifty_op (LSHIFTRT, operands); + } + DONE; +}") + +;; signed left/right shift combination. +(define_split + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI 18))] + "" + [(use (reg:SI 0))] + "if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1])) FAIL; + DONE;") + +(define_insn "shl_sext_ext" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI 18))] + "(unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 1)) + (const_string "2") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 6)) + (const_string "12") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 7)) + (const_string "14") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 8)) + (const_string "16")] + (const_string "18"))) + (set_attr "type" "arith")]) + +(define_insn "shl_sext_sub" + [(set (match_operand:SI 0 "register_operand" "=z") + (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI 18))] + "(shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 6)) + (const_string "12")] + (const_string "14"))) + (set_attr "type" "arith")]) + +;; These patterns are found in expansions of DImode shifts by 16, and +;; allow the xtrct instruction to be generated from C source. + +(define_insn "xtrct_left" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)) + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0") + (const_int 16))))] + "" + "xtrct %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "xtrct_right" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 16)) + (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r") + (const_int 16))))] + "" + "xtrct %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Unary arithmetic +;; ------------------------------------------------------------------------- + +(define_insn "negc" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (neg:SI (plus:SI (reg:SI 18) + (match_operand:SI 1 "arith_reg_operand" "r")))) + (set (reg:SI 18) + (ne:SI (ior:SI (reg:SI 18) (match_dup 1)) + (const_int 0)))] + "" + "negc %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "negdi2" + [(set (match_operand:DI 0 "arith_reg_operand" "") + (neg:DI (match_operand:DI 1 "arith_reg_operand" ""))) + (clobber (reg:SI 18))] + "" + " +{ + int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1); + int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0); + + rtx low_src = operand_subword (operands[1], low_word, 0, DImode); + rtx high_src = operand_subword (operands[1], high_word, 0, DImode); + + rtx low_dst = operand_subword (operands[0], low_word, 1, DImode); + rtx high_dst = operand_subword (operands[0], high_word, 1, DImode); + + emit_insn (gen_clrt ()); + emit_insn (gen_negc (low_dst, low_src)); + emit_insn (gen_negc (high_dst, high_src)); + DONE; +}") + +(define_insn "negsi2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))] + "" + "neg %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (not:SI (match_operand:SI 1 "arith_reg_operand" "r")))] + "" + "not %1,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Zero extension instructions +;; ------------------------------------------------------------------------- + +(define_insn "zero_extendhisi2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))] + "" + "extu.w %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))] + "" + "extu.b %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "arith_reg_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))] + "" + "extu.b %1,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Sign extension instructions +;; ------------------------------------------------------------------------- + +;; ??? This should be a define expand. +;; ??? Or perhaps it should be dropped? + +/* There is no point in defining extendsidi2; convert_move generates good + code for that. */ + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "general_movsrc_operand" "r,m")))] + "" + "@ + exts.w %1,%0 + mov.w %1,%0" + [(set_attr "type" "arith,load")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "general_movsrc_operand" "r,m")))] + "" + "@ + exts.b %1,%0 + mov.b %1,%0" + [(set_attr "type" "arith,load")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "arith_reg_operand" "=r,r") + (sign_extend:HI (match_operand:QI 1 "general_movsrc_operand" "r,m")))] + "" + "@ + exts.b %1,%0 + mov.b %1,%0" + [(set_attr "type" "arith,load")]) + +;; ------------------------------------------------------------------------- +;; Move instructions +;; ------------------------------------------------------------------------- + +;; define push and pop so it is easy for sh.c + +(define_expand "push" + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "register_operand" "r,l,x"))] + "" + "") + +(define_expand "pop" + [(set (match_operand:SI 0 "register_operand" "=r,l,x") + (mem:SI (post_inc:SI (reg:SI 15))))] + "" + "") + +(define_expand "push_e" + [(parallel [(set (mem:SF (pre_dec:SI (reg:SI 15))) + (match_operand:SF 0 "" "")) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + +(define_insn "push_fpul" + [(set (mem:SF (pre_dec:SI (reg:SI 15))) (reg:SF 22))] + "TARGET_SH3E" + "sts.l fpul,@-r15" + [(set_attr "type" "store") + (set_attr "hit_stack" "yes")]) + +;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4, +;; so use that. +(define_expand "push_4" + [(parallel [(set (mem:DF (pre_dec:SI (reg:SI 15))) (match_operand:DF 0 "" "")) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + +(define_expand "pop_e" + [(parallel [(set (match_operand:SF 0 "" "") + (mem:SF (post_inc:SI (reg:SI 15)))) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + +(define_insn "pop_fpul" + [(set (reg:SF 22) (mem:SF (post_inc:SI (reg:SI 15))))] + "TARGET_SH3E" + "lds.l @r15+,fpul" + [(set_attr "type" "load") + (set_attr "hit_stack" "yes")]) + +(define_expand "pop_4" + [(parallel [(set (match_operand:DF 0 "" "") + (mem:DF (post_inc:SI (reg:SI 15)))) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + +;; These two patterns can happen as the result of optimization, when +;; comparisons get simplified to a move of zero or 1 into the T reg. +;; They don't disappear completely, because the T reg is a fixed hard reg. + +(define_insn "clrt" + [(set (reg:SI 18) (const_int 0))] + "" + "clrt") + +(define_insn "sett" + [(set (reg:SI 18) (const_int 1))] + "" + "sett") + +;; t/r is first, so that it will be preferred over r/r when reloading a move +;; of a pseudo-reg into the T reg +(define_insn "movsi_i" + [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,<,xl,x,l,r") + (match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,mr,xl,t,r,x,l,r,>,>,i"))] + " + ! TARGET_SH3E + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + cmp/pl %1 + mov.l %1,%0 + mov %1,%0 + mov.l %1,%0 + sts %1,%0 + movt %0 + mov.l %1,%0 + sts.l %1,%0 + sts.l %1,%0 + lds %1,%0 + lds.l %1,%0 + lds.l %1,%0 + fake %1,%0" + [(set_attr "type" "*,pcload_si,move,load_si,move,move,store,store,pstore,move,load,pload,pcload_si") + (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*")]) + +;; t/r must come after r/r, lest reload will try to reload stuff like +;; (subreg:SI (reg:SF 38 fr14) 0) into T (compiling stdlib/strtod.c -m3e -O2) +;; ??? This allows moves from macl to fpul to be recognized, but these moves +;; will require a reload. +(define_insn "movsi_ie" + [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,y,r,y,r,y") + (match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,mr,xl,t,r,x,l,r,>,>,>,i,r,y,y"))] + "TARGET_SH3E + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + cmp/pl %1 + mov.l %1,%0 + sts %1,%0 + movt %0 + mov.l %1,%0 + sts.l %1,%0 + sts.l %1,%0 + lds %1,%0 + lds.l %1,%0 + lds.l %1,%0 + lds.l %1,%0 + fake %1,%0 + lds %1,%0 + sts %1,%0 + ! move optimized away" + [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,load,pcload_si,gp_fpul,gp_fpul,nil") + (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")]) + +(define_insn "movsi_i_lowpart" + [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "=r,r,r,r,r,m,r")) + (match_operand:SI 1 "general_movsrc_operand" "Q,rI,mr,xl,t,r,i"))] + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode)" + "@ + mov.l %1,%0 + mov %1,%0 + mov.l %1,%0 + sts %1,%0 + movt %0 + mov.l %1,%0 + fake %1,%0" + [(set_attr "type" "pcload,move,load,move,move,store,pcload")]) +(define_expand "movsi" + [(set (match_operand:SI 0 "general_movdst_operand" "") + (match_operand:SI 1 "general_movsrc_operand" ""))] + "" + "{ if (prepare_move_operands (operands, SImode)) DONE; }") + +(define_expand "ic_invalidate_line" + [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r") + (match_dup 1)] 12) + (clobber (scratch:SI))])] + "TARGET_HARD_SH4" + " +{ + operands[0] = force_reg (Pmode, operands[0]); + operands[1] = force_reg (Pmode, GEN_INT (0xf0000008)); +}") + +;; The address %0 is assumed to be 4-aligned at least. Thus, by ORing +;; 0xf0000008, we get the low-oder bits *1*00 (binary), ;; which fits +;; the requirement *0*00 for associative address writes. The alignment of +;; %0 implies that its least significant bit is cleared, +;; thus we clear the V bit of a matching entry if there is one. +(define_insn "ic_invalidate_line_i" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "register_operand" "r,r")] 12) + (clobber (match_scratch:SI 2 "=&r,1"))] + "TARGET_HARD_SH4" + "ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%r1,%r2\;mov.l\\t%0,@%2" + [(set_attr "length" "8")]) + +(define_insn "movqi_i" + [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l") + (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))] + "arith_reg_operand (operands[0], QImode) + || arith_reg_operand (operands[1], QImode)" + "@ + mov %1,%0 + mov.b %1,%0 + mov.b %1,%0 + movt %0 + sts %1,%0 + lds %1,%0" + [(set_attr "type" "move,load,store,move,move,move")]) + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "{ if (prepare_move_operands (operands, QImode)) DONE; }") + +(define_insn "movhi_i" + [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,r,m,r,l,r") + (match_operand:HI 1 "general_movsrc_operand" "Q,rI,m,t,r,l,r,i"))] + "arith_reg_operand (operands[0], HImode) + || arith_reg_operand (operands[1], HImode)" + "@ + mov.w %1,%0 + mov %1,%0 + mov.w %1,%0 + movt %0 + mov.w %1,%0 + sts %1,%0 + lds %1,%0 + fake %1,%0" + [(set_attr "type" "pcload,move,load,move,store,move,move,pcload")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_movdst_operand" "") + (match_operand:HI 1 "general_movsrc_operand" ""))] + "" + "{ if (prepare_move_operands (operands, HImode)) DONE; }") + +;; ??? This should be a define expand. + +;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c +;; compiled with -m2 -ml -O3 -funroll-loops +(define_insn "" + [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x") + (match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I,i,x,r"))] + "arith_reg_operand (operands[0], DImode) + || arith_reg_operand (operands[1], DImode)" + "* return output_movedouble (insn, operands, DImode);" + [(set_attr "length" "4") + (set_attr "type" "pcload,move,load,store,move,pcload,move,move")]) + +;; If the output is a register and the input is memory or a register, we have +;; to be careful and see which word needs to be loaded first. + +(define_split + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "general_movsrc_operand" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + " +{ + int regno; + + if ((GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) + || (GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == POST_INC)) + FAIL; + + if (GET_CODE (operands[0]) == REG) + regno = REGNO (operands[0]); + else if (GET_CODE (operands[0]) == SUBREG) + regno = REGNO (SUBREG_REG (operands[0])) + SUBREG_WORD (operands[0]); + else if (GET_CODE (operands[0]) == MEM) + regno = -1; + + if (regno == -1 + || ! refers_to_regno_p (regno, regno + 1, operands[1], 0)) + { + operands[2] = operand_subword (operands[0], 0, 0, DImode); + operands[3] = operand_subword (operands[1], 0, 0, DImode); + operands[4] = operand_subword (operands[0], 1, 0, DImode); + operands[5] = operand_subword (operands[1], 1, 0, DImode); + } + else + { + operands[2] = operand_subword (operands[0], 1, 0, DImode); + operands[3] = operand_subword (operands[1], 1, 0, DImode); + operands[4] = operand_subword (operands[0], 0, 0, DImode); + operands[5] = operand_subword (operands[1], 0, 0, DImode); + } + + if (operands[2] == 0 || operands[3] == 0 + || operands[4] == 0 || operands[5] == 0) + FAIL; +}") + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "general_movsrc_operand" ""))] + "" + "{ if ( prepare_move_operands (operands, DImode)) DONE; }") + +;; ??? This should be a define expand. + +(define_insn "movdf_k" + [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))] + "(! TARGET_SH4 || reload_completed + /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */ + || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3 + || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3) + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" + "* return output_movedouble (insn, operands, DFmode);" + [(set_attr "length" "4") + (set_attr "type" "move,pcload,load,store")]) + +;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD. +;; However, the d/F/c/z alternative cannot be split directly; it is converted +;; with special code in machine_dependent_reorg into a load of the R0_REG and +;; the d/m/c/X alternative, which is split later into single-precision +;; instructions. And when not optimizing, no splits are done before fixing +;; up pcloads, so we need usable length information for that. +(define_insn "movdf_i4" + [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d") + (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))] + "TARGET_SH4 + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" + "@ + fmov %1,%0 + # + # + fmov.d %1,%0 + fmov.d %1,%0 + # + # + # + # + #" + [(set_attr_alternative "length" + [(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4)) + (const_int 4) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6)) + (const_int 4) + (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn) + (const_int 8) (const_int 8)]) + (set_attr "type" "fmove,move,pcload,load,store,pcload,load,store,load,load")]) + +;; Moving DFmode between fp/general registers through memory +;; (the top of the stack) is faster than moving through fpul even for +;; little endian. Because the type of an instruction is important for its +;; scheduling, it is beneficial to split these operations, rather than +;; emitting them in one single chunk, even if this will expose a stack +;; use that will prevent scheduling of other stack accesses beyond this +;; instruction. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "=X"))] + "TARGET_SH4 && reload_completed + && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)" + [(const_int 0)] + " +{ + rtx insn, tos; + + tos = gen_rtx (MEM, DFmode, gen_rtx (PRE_DEC, Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX); + tos = gen_rtx (MEM, DFmode, gen_rtx (POST_INC, Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX); + DONE; +}") + +;; local-alloc sometimes allocates scratch registers even when not required, +;; so we must be prepared to handle these. + +;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k. +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 + && reload_completed + && true_regnum (operands[0]) < 16 + && true_regnum (operands[1]) < 16" + [(set (match_dup 0) (match_dup 1))] + " +{ + /* If this was a reg <-> mem operation with base + index reg addressing, + we have to handle this in a special way. */ + rtx mem = operands[0]; + int store_p = 1; + if (! memory_operand (mem, DFmode)) + { + mem = operands[1]; + store_p = 0; + } + if (GET_CODE (mem) == SUBREG && SUBREG_WORD (mem) == 0) + mem = SUBREG_REG (mem); + if (GET_CODE (mem) == MEM) + { + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == REG + && GET_CODE (XEXP (addr, 1)) == REG) + { + int offset; + rtx reg0 = gen_rtx (REG, Pmode, 0); + rtx regop = operands[store_p], word0 ,word1; + + if (GET_CODE (regop) == SUBREG) + regop = alter_subreg (regop); + if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1))) + offset = 2; + else + offset = 4; + mem = copy_rtx (mem); + PUT_MODE (mem, SImode); + word0 = gen_rtx(SUBREG, SImode, regop, 0); + emit_insn (store_p + ? gen_movsi_ie (mem, word0) : gen_movsi_ie (word0, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset))); + mem = copy_rtx (mem); + word1 = gen_rtx(SUBREG, SImode, regop, 1); + emit_insn (store_p + ? gen_movsi_ie (mem, word1) : gen_movsi_ie (word1, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset))); + DONE; + } + } +}") + +;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 0))] + "TARGET_SH4 && reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "reload_indf" + [(parallel [(set (match_operand:DF 0 "register_operand" "=f") + (match_operand:DF 1 "immediate_operand" "FQ")) + (use (reg:PSI 48)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "" + "") + +(define_expand "reload_outdf" + [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f") + (match_operand:DF 1 "register_operand" "af,r")) + (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])] + "" + "") + +;; Simplify no-op moves. +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH3E && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 0) (match_dup 0))] + "") + +;; fmovd substitute post-reload splits +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[0]) >= FIRST_FP_REG + && true_regnum (operands[1]) >= FIRST_FP_REG" + [(const_int 0)] + " +{ + int dst = true_regnum (operands[0]), src = true_regnum (operands[1]); + emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst), + gen_rtx (REG, SFmode, src), operands[2])); + emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst + 1), + gen_rtx (REG, SFmode, src + 1), operands[2])); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (mem:DF (match_operand:SI 1 "register_operand" ""))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[0]) >= FIRST_FP_REG + && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[0]); + rtx insn; + rtx mem2 = gen_rtx (MEM, SFmode, gen_rtx (POST_INC, Pmode, operands[1])); + + insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, + regno + !! TARGET_LITTLE_ENDIAN), + mem2, operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[1], NULL_RTX); + insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, + regno + ! TARGET_LITTLE_ENDIAN), + gen_rtx (MEM, SFmode, operands[1]), + operands[2])); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[0]) >= FIRST_FP_REG" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[0]); + rtx addr, insn, adjust = NULL_RTX; + rtx mem2 = copy_rtx (operands[1]); + rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN); + rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN); + + PUT_MODE (mem2, SFmode); + operands[1] = copy_rtx (mem2); + addr = XEXP (mem2, 0); + if (GET_CODE (addr) != POST_INC) + { + /* If we have to modify the stack pointer, the value that we have + read with post-increment might be modified by an interrupt, + so write it back. */ + if (REGNO (addr) == STACK_POINTER_REGNUM) + adjust = gen_push_e (reg0); + else + adjust = gen_addsi3 (addr, addr, GEN_INT (-4)); + XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr); + } + addr = XEXP (addr, 0); + insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2])); + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX); + insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + if (adjust) + emit_insn (adjust); + else + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[1]) >= FIRST_FP_REG" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[1]); + rtx insn, addr, adjust = NULL_RTX; + + operands[0] = copy_rtx (operands[0]); + PUT_MODE (operands[0], SFmode); + insn = emit_insn (gen_movsf_ie (operands[0], + gen_rtx (REG, SFmode, + regno + ! TARGET_LITTLE_ENDIAN), + operands[2])); + operands[0] = copy_rtx (operands[0]); + addr = XEXP (operands[0], 0); + if (GET_CODE (addr) != PRE_DEC) + { + adjust = gen_addsi3 (addr, addr, GEN_INT (4)); + emit_insn_before (adjust, insn); + XEXP (operands[0], 0) = addr = gen_rtx (PRE_DEC, SImode, addr); + } + addr = XEXP (addr, 0); + if (! adjust) + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX); + insn = emit_insn (gen_movsf_ie (operands[0], + gen_rtx (REG, SFmode, + regno + !! TARGET_LITTLE_ENDIAN), + operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX); + DONE; +}") + +;; The '&' for operand 2 is not really true, but push_secondary_reload +;; insists on it. +;; Operand 1 must accept FPUL_REGS in case fpul is reloaded to memory, +;; to avoid a bogus tertiary reload. +;; We need a tertiary reload when a floating point register is reloaded +;; to memory, so the predicate for operand 0 must accept this, while the +;; constraint of operand 1 must reject the secondary reload register. +;; Thus, the secondary reload register for this case has to be GENERAL_REGS, +;; too. +;; By having the predicate for operand 0 reject any register, we make +;; sure that the ordinary moves that just need an intermediate register +;; won't get a bogus tertiary reload. +;; We use tertiary_reload_operand instead of memory_operand here because +;; memory_operand rejects operands that are not directly addressible, e.g.: +;; (mem:SF (plus:SI (reg:SI 14 r14) +;; (const_int 132))) + +(define_expand "reload_outsf" + [(parallel [(set (match_operand:SF 2 "register_operand" "=&r") + (match_operand:SF 1 "register_operand" "y")) + (clobber (scratch:SI))]) + (parallel [(set (match_operand:SF 0 "tertiary_reload_operand" "=m") + (match_dup 2)) + (clobber (scratch:SI))])] + "" + "") + +;; If the output is a register and the input is memory or a register, we have +;; to be careful and see which word needs to be loaded first. + +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + " +{ + int regno; + + if ((GET_CODE (operands[0]) == MEM + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) + || (GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == POST_INC)) + FAIL; + + if (GET_CODE (operands[0]) == REG) + regno = REGNO (operands[0]); + else if (GET_CODE (operands[0]) == SUBREG) + regno = REGNO (SUBREG_REG (operands[0])) + SUBREG_WORD (operands[0]); + else if (GET_CODE (operands[0]) == MEM) + regno = -1; + + if (regno == -1 + || ! refers_to_regno_p (regno, regno + 1, operands[1], 0)) + { + operands[2] = operand_subword (operands[0], 0, 0, DFmode); + operands[3] = operand_subword (operands[1], 0, 0, DFmode); + operands[4] = operand_subword (operands[0], 1, 0, DFmode); + operands[5] = operand_subword (operands[1], 1, 0, DFmode); + } + else + { + operands[2] = operand_subword (operands[0], 1, 0, DFmode); + operands[3] = operand_subword (operands[1], 1, 0, DFmode); + operands[4] = operand_subword (operands[0], 0, 0, DFmode); + operands[5] = operand_subword (operands[1], 0, 0, DFmode); + } + + if (operands[2] == 0 || operands[3] == 0 + || operands[4] == 0 || operands[5] == 0) + FAIL; +}") + +;; If a base address generated by LEGITIMIZE_ADDRESS for SImode is +;; used only once, let combine add in the index again. + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "" "")) + (clobber (match_operand 2 "register_operand" ""))] + "! reload_in_progress && ! reload_completed" + [(use (reg:SI 0))] + " +{ + rtx addr, reg, const_int; + + if (GET_CODE (operands[1]) != MEM) + FAIL; + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) != PLUS) + FAIL; + reg = XEXP (addr, 0); + const_int = XEXP (addr, 1); + if (GET_CODE (reg) != REG || GET_CODE (const_int) != CONST_INT) + FAIL; + emit_move_insn (operands[2], const_int); + emit_move_insn (operands[0], + change_address (operands[1], VOIDmode, + gen_rtx (PLUS, SImode, reg, operands[2]))); + DONE; +}") + +(define_split + [(set (match_operand:SI 1 "" "") + (match_operand:SI 0 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))] + "! reload_in_progress && ! reload_completed" + [(use (reg:SI 0))] + " +{ + rtx addr, reg, const_int; + + if (GET_CODE (operands[1]) != MEM) + FAIL; + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) != PLUS) + FAIL; + reg = XEXP (addr, 0); + const_int = XEXP (addr, 1); + if (GET_CODE (reg) != REG || GET_CODE (const_int) != CONST_INT) + FAIL; + emit_move_insn (operands[2], const_int); + emit_move_insn (change_address (operands[1], VOIDmode, + gen_rtx (PLUS, SImode, reg, operands[2])), + operands[0]); + DONE; +}") + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" ""))] + "" + " +{ + if (prepare_move_operands (operands, DFmode)) DONE; + if (TARGET_SH4) + { + if (no_new_pseudos) + { + /* ??? FIXME: This is only a stopgap fix. There is no guarantee + that fpscr is in the right state. */ + emit_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } + emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); + /* We need something to tag possible REG_LIBCALL notes on to. */ + if (TARGET_FPU_SINGLE && rtx_equal_function_value_matters + && GET_CODE (operands[0]) == REG) + emit_insn (gen_mov_nop (operands[0])); + DONE; + } +}") + + +(define_insn "movsf_i" + [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r") + (match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,mr,r,r,l"))] + " + (! TARGET_SH3E + /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */ + || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3 + || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3) + && (arith_reg_operand (operands[0], SFmode) + || arith_reg_operand (operands[1], SFmode))" + "@ + mov %1,%0 + mov %1,%0 + mov.l %1,%0 + mov.l %1,%0 + mov.l %1,%0 + lds %1,%0 + sts %1,%0" + [(set_attr "type" "move,move,pcload,load,store,move,move")]) + +;; We may not split the ry/yr/XX alternatives to movsi_ie, since +;; update_flow_info would not know where to put REG_EQUAL notes +;; when the destination changes mode. +(define_insn "movsf_ie" + [(set (match_operand:SF 0 "general_movdst_operand" + "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,y") + (match_operand:SF 1 "general_movsrc_operand" + "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))] + + "TARGET_SH3E + && (arith_reg_operand (operands[0], SFmode) + || arith_reg_operand (operands[1], SFmode))" + "@ + fmov %1,%0 + mov %1,%0 + fldi0 %0 + fldi1 %0 + # + fmov.s %1,%0 + fmov.s %1,%0 + mov.l %1,%0 + mov.l %1,%0 + mov.l %1,%0 + fsts fpul,%0 + flds %1,fpul + lds.l %1,%0 + # + sts %1,%0 + lds %1,%0 + ! move optimized away" + [(set_attr "type" "fmove,move,fmove,fmove,pcload,load,store,pcload,load,store,fmove,fmove,load,*,gp_fpul,gp_fpul,nil") + (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,4,2,2,0")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "" + [(parallel [(set (reg:SF 22) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))]) + (parallel [(set (match_dup 0) (reg:SF 22)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_movdst_operand" "") + (match_operand:SF 1 "general_movsrc_operand" ""))] + "" + " +{ + if (prepare_move_operands (operands, SFmode)) + DONE; + if (TARGET_SH3E) + { + if (no_new_pseudos) + { + /* ??? FIXME: This is only a stopgap fix. There is no guarantee + that fpscr is in the right state. */ + emit_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } + emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); + /* We need something to tag possible REG_LIBCALL notes on to. */ + if (! TARGET_FPU_SINGLE && rtx_equal_function_value_matters + && GET_CODE (operands[0]) == REG) + emit_insn (gen_mov_nop (operands[0])); + DONE; + } +}") + +(define_insn "mov_nop" + [(set (match_operand 0 "register_operand" "") (match_dup 0))] + "TARGET_SH3E" + "" + [(set_attr "length" "0") + (set_attr "type" "nil")]) + +(define_expand "reload_insf" + [(parallel [(set (match_operand:SF 0 "register_operand" "=f") + (match_operand:SF 1 "immediate_operand" "FQ")) + (use (reg:PSI 48)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "" + "") + +(define_expand "reload_insi" + [(parallel [(set (match_operand:SF 0 "register_operand" "=y") + (match_operand:SF 1 "immediate_operand" "FQ")) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "" + "") + +(define_insn "*movsi_y" + [(set (match_operand:SI 0 "register_operand" "=y,y") + (match_operand:SI 1 "immediate_operand" "Qi,I")) + (clobber (match_scratch:SI 3 "=&z,r"))] + "TARGET_SH3E + && (reload_in_progress || reload_completed)" + "#" + [(set_attr "length" "4") + (set_attr "type" "pcload,move")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "y") + (match_operand:SI 1 "immediate_operand" "I")) + (clobber (match_operand:SI 2 "register_operand" "r"))] + "" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:SI 0 "register_operand" "y") + (match_operand:SI 1 "memory_operand" ">")) + (clobber (reg:SI 0))] + "" + [(set (match_dup 0) (match_dup 1))] + "") + +;; ------------------------------------------------------------------------ +;; Define the real conditional branch instructions. +;; ------------------------------------------------------------------------ + +(define_insn "branch_true" + [(set (pc) (if_then_else (ne (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* return output_branch (1, insn, operands);" + [(set_attr "type" "cbranch")]) + +(define_insn "branch_false" + [(set (pc) (if_then_else (eq (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* return output_branch (0, insn, operands);" + [(set_attr "type" "cbranch")]) + +;; Patterns to prevent reorg from re-combining a condbranch with a branch +;; which destination is too far away. +;; The const_int_operand is distinct for each branch target; it avoids +;; unwanted matches with redundant_insn. +(define_insn "block_branch_redirect" + [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] 4))] + "" + "" + [(set_attr "length" "0")]) + +;; This one has the additional purpose to record a possible scratch register +;; for the following branch. +(define_insn "indirect_jump_scratch" + [(set (match_operand 0 "register_operand" "r") + (unspec [(match_operand 1 "const_int_operand" "")] 4))] + "" + "" + [(set_attr "length" "0")]) + +;; Conditional branch insns + +(define_expand "beq" + [(set (pc) + (if_then_else (ne (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, EQ);") + +(define_expand "bne" + [(set (pc) + (if_then_else (eq (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, EQ);") + +(define_expand "bgt" + [(set (pc) + (if_then_else (ne (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, GT);") + +(define_expand "blt" + [(set (pc) + (if_then_else (eq (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT) + { + rtx tmp = sh_compare_op0; + sh_compare_op0 = sh_compare_op1; + sh_compare_op1 = tmp; + emit_insn (gen_bgt (operands[0])); + DONE; + } + from_compare (operands, GE); +}") + +(define_expand "ble" + [(set (pc) + (if_then_else (eq (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + if (TARGET_SH3E + && TARGET_IEEE + && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT) + { + rtx tmp = sh_compare_op0; + sh_compare_op0 = sh_compare_op1; + sh_compare_op1 = tmp; + emit_insn (gen_bge (operands[0])); + DONE; + } + from_compare (operands, GT); +}") + +(define_expand "bge" + [(set (pc) + (if_then_else (ne (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + " +{ + if (TARGET_SH3E + && ! TARGET_IEEE + && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT) + { + rtx tmp = sh_compare_op0; + sh_compare_op0 = sh_compare_op1; + sh_compare_op1 = tmp; + emit_insn (gen_ble (operands[0])); + DONE; + } + from_compare (operands, GE); +}") + +(define_expand "bgtu" + [(set (pc) + (if_then_else (ne (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, GTU); ") + +(define_expand "bltu" + [(set (pc) + (if_then_else (eq (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, GEU);") + +(define_expand "bgeu" + [(set (pc) + (if_then_else (ne (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, GEU);") + +(define_expand "bleu" + [(set (pc) + (if_then_else (eq (reg:SI 18) (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "from_compare (operands, GTU);") + +;; ------------------------------------------------------------------------ +;; Jump and linkage insns +;; ------------------------------------------------------------------------ + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "* +{ + /* The length is 16 if the delay slot is unfilled. */ + if (get_attr_length(insn) > 4) + return output_far_jump(insn, operands[0]); + else + return \"bra %l0%#\"; +}" + [(set_attr "type" "jump") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "calli" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (use (reg:SI 48)) + (clobber (reg:SI 17))] + "" + "jsr @%0%#" + [(set_attr "type" "call") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_valuei" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (use (reg:SI 48)) + (clobber (reg:SI 17))] + "" + "jsr @%1%#" + [(set_attr "type" "call") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "call" + [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (use (reg:SI 48)) + (clobber (reg:SI 17))])] + "" + "operands[0] = force_reg (SImode, XEXP (operands[0], 0));") + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (use (reg:SI 48)) + (clobber (reg:SI 17))])] + "" + "operands[1] = force_reg (SImode, XEXP (operands[1], 0));") + +(define_insn "indirect_jump" + [(set (pc) + (match_operand:SI 0 "arith_reg_operand" "r"))] + "" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +;; The use of operand 1 / 2 helps us distinguish case table jumps +;; which can be present in structured code from indirect jumps which can not +;; be present in structured code. This allows -fprofile-arcs to work. + +;; For SH1 processors. +(define_insn "casesi_jump_1" + [(set (pc) + (match_operand:SI 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +;; For all later processors. +(define_insn "casesi_jump_2" + [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r") + (label_ref (match_operand 1 "" "")))) + (use (label_ref (match_operand 2 "" "")))] + "! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn" + "braf %0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +;; Call subroutine returning any type. +;; ??? This probably doesn't work. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "TARGET_SH3E" + " +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}") + +;; ------------------------------------------------------------------------ +;; Misc insns +;; ------------------------------------------------------------------------ + +(define_insn "dect" + [(set (reg:SI 18) + (eq:SI (match_operand:SI 0 "arith_reg_operand" "+r") (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))] + "TARGET_SH2" + "dt %0" + [(set_attr "type" "arith")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop") + +;; Load address of a label. This is only generated by the casesi expand, +;; and by machine_dependent_reorg (fixing up fp moves). +;; This must use unspec, because this only works for labels that are +;; within range, + +(define_insn "mova" + [(set (reg:SI 0) + (unspec [(label_ref (match_operand 0 "" ""))] 1))] + "" + "mova %O0,r0" + [(set_attr "in_delay_slot" "no") + (set_attr "type" "arith")]) + +;; case instruction for switch statements. + +;; Operand 0 is index +;; operand 1 is the minimum bound +;; operand 2 is the maximum bound - minimum bound + 1 +;; operand 3 is CODE_LABEL for the table; +;; operand 4 is the CODE_LABEL to go to if index out of range. + +(define_expand "casesi" + [(match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "") + (match_operand 3 "" "") (match_operand 4 "" "")] + "" + " +{ + rtx reg = gen_reg_rtx (SImode); + rtx reg2 = gen_reg_rtx (SImode); + operands[1] = copy_to_mode_reg (SImode, operands[1]); + operands[2] = copy_to_mode_reg (SImode, operands[2]); + /* If optimizing, casesi_worker depends on the mode of the instruction + before label it 'uses' - operands[3]. */ + emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4], + reg)); + emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3])); + if (TARGET_SH2) + emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3])); + else + emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3])); + /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to + operands[3], but to lab. We will fix this up in + machine_dependent_reorg. */ + emit_barrier (); + DONE; +}") + +(define_expand "casesi_0" + [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" "")) + (set (match_dup 4) (minus:SI (match_dup 4) + (match_operand:SI 1 "arith_operand" ""))) + (set (reg:SI 18) + (gtu:SI (match_dup 4) + (match_operand:SI 2 "arith_reg_operand" ""))) + (set (pc) + (if_then_else (ne (reg:SI 18) + (const_int 0)) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +;; ??? reload might clobber r0 if we use it explicitly in the RTL before +;; reload; using a R0_REGS pseudo reg is likely to give poor code. +;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload. + +(define_insn "casesi_worker_0" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec [(match_operand 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" ""))] 2)) + (clobber (match_scratch:SI 3 "=X,1")) + (clobber (match_scratch:SI 4 "=&z,z"))] + "" + "#") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (unspec [(match_operand 1 "register_operand" "") + (label_ref (match_operand 2 "" ""))] 2)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 ""))] + "! TARGET_SH2 && reload_completed" + [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1)) + (parallel [(set (match_dup 0) + (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2)) + (clobber (match_dup 3))]) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI 0)))] + "LABEL_NUSES (operands[2])++;") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (unspec [(match_operand 1 "register_operand" "") + (label_ref (match_operand 2 "" ""))] 2)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_SH2 && reload_completed" + [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1)) + (parallel [(set (match_dup 0) + (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2)) + (clobber (match_dup 3))])] + "LABEL_NUSES (operands[2])++;") + +(define_insn "*casesi_worker" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec [(reg:SI 0) (match_operand 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" ""))] 2)) + (clobber (match_scratch:SI 3 "=X,1"))] + "" + "* +{ + rtx diff_vec = PATTERN (next_real_insn (operands[2])); + + if (GET_CODE (diff_vec) != ADDR_DIFF_VEC) + abort (); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return \"shll2 %1\;mov.l @(r0,%1),%0\"; + case HImode: + return \"add %1,%1\;mov.w @(r0,%1),%0\"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"mov.b @(r0,%1),%0\;extu.b %0,%0\"; + return \"mov.b @(r0,%1),%0\"; + default: + abort (); + } +}" + [(set_attr "length" "4")]) + +;; ??? This is not the proper place to invoke another compiler pass; +;; Alas, there is no proper place to put it. +;; ??? This is also an odd place for the call to emit_fpscr_use. It +;; would be all right if it were for an define_expand for return, but +;; that doesn't mix with emitting a prologue. +(define_insn "return" + [(return)] + "emit_fpscr_use (), + remove_dead_before_cse (), + reload_completed" + "%@ %#" + [(set_attr "type" "return") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "prologue" + [(const_int 0)] + "" + "sh_expand_prologue (); DONE;") + +(define_expand "epilogue" + [(return)] + "" + "sh_expand_epilogue ();") + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] 0)] + "" + "" + [(set_attr "length" "0")]) + +;; ------------------------------------------------------------------------ +;; Scc instructions +;; ------------------------------------------------------------------------ + +(define_insn "movt" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (eq:SI (reg:SI 18) (const_int 1)))] + "" + "movt %0" + [(set_attr "type" "arith")]) + +(define_expand "seq" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (EQ);") + +(define_expand "slt" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (LT);") + +(define_expand "sle" + [(match_operand:SI 0 "arith_reg_operand" "")] + "" + " +{ + rtx tmp = sh_compare_op0; + sh_compare_op0 = sh_compare_op1; + sh_compare_op1 = tmp; + emit_insn (gen_sge (operands[0])); + DONE; +}") + +(define_expand "sgt" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (GT);") + +(define_expand "sge" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + " +{ + if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT) + { + if (TARGET_IEEE) + { + rtx t_reg = gen_rtx (REG, SImode, T_REG); + rtx lab = gen_label_rtx (); + prepare_scc_operands (EQ); + emit_jump_insn (gen_branch_true (lab)); + prepare_scc_operands (GT); + emit_label (lab); + emit_insn (gen_movt (operands[0])); + } + else + emit_insn (gen_movnegt (operands[0], prepare_scc_operands (LT))); + DONE; + } + operands[1] = prepare_scc_operands (GE); +}") + +(define_expand "sgtu" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (GTU);") + +(define_expand "sltu" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (LTU);") + +(define_expand "sleu" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (LEU);") + +(define_expand "sgeu" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (match_dup 1))] + "" + "operands[1] = prepare_scc_operands (GEU);") + +;; sne moves the complement of the T reg to DEST like this: +;; cmp/eq ... +;; mov #-1,temp +;; negc temp,dest +;; This is better than xoring compare result with 1 because it does +;; not require r0 and further, the -1 may be CSE-ed or lifted out of a +;; loop. + +(define_expand "sne" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (match_operand:SI 0 "arith_reg_operand" "") + (neg:SI (plus:SI (match_dup 1) + (match_dup 2)))) + (set (reg:SI 18) + (ne:SI (ior:SI (match_dup 1) (match_dup 2)) + (const_int 0)))])] + "" + " +{ + operands[1] = prepare_scc_operands (EQ); + operands[2] = gen_reg_rtx (SImode); +}") + +;; Use the same trick for FP sle / sge +(define_expand "movnegt" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (match_operand 0 "" "") + (neg:SI (plus:SI (match_dup 1) + (match_dup 2)))) + (set (reg:SI 18) + (ne:SI (ior:SI (match_operand 1 "" "") (match_dup 2)) + (const_int 0)))])] + "" + "operands[2] = gen_reg_rtx (SImode);") + +;; Recognize mov #-1/negc/neg sequence, and change it to movt/add #-1. +;; This prevents a regression that occurred when we switched from xor to +;; mov/neg for sne. + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "") + (plus:SI (reg:SI 18) + (const_int -1)))] + "" + [(set (match_dup 0) (eq:SI (reg:SI 18) (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))] + "") + +;; ------------------------------------------------------------------------- +;; Instructions to cope with inline literal tables +;; ------------------------------------------------------------------------- + +; 2 byte integer in line + +(define_insn "consttable_2" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 2)] + "" + "* +{ + assemble_integer (operands[0], 2, 1); + return \"\"; +}" + [(set_attr "length" "2") + (set_attr "in_delay_slot" "no")]) + +; 4 byte integer in line + +(define_insn "consttable_4" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 4)] + "" + "* +{ + assemble_integer (operands[0], 4, 1); + return \"\"; +}" + [(set_attr "length" "4") + (set_attr "in_delay_slot" "no")]) + +; 8 byte integer in line + +(define_insn "consttable_8" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 6)] + "" + "* +{ + assemble_integer (operands[0], 8, 1); + return \"\"; +}" + [(set_attr "length" "8") + (set_attr "in_delay_slot" "no")]) + +; 4 byte floating point + +(define_insn "consttable_sf" + [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g")] 4)] + "" + "* +{ + union real_extract u; + bcopy ((char *) &CONST_DOUBLE_LOW (operands[0]), (char *) &u, sizeof u); + assemble_real (u.d, SFmode); + return \"\"; +}" + [(set_attr "length" "4") + (set_attr "in_delay_slot" "no")]) + +; 8 byte floating point + +(define_insn "consttable_df" + [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g")] 6)] + "" + "* +{ + union real_extract u; + bcopy ((char *) &CONST_DOUBLE_LOW (operands[0]), (char *) &u, sizeof u); + assemble_real (u.d, DFmode); + return \"\"; +}" + [(set_attr "length" "8") + (set_attr "in_delay_slot" "no")]) + +;; Alignment is needed for some constant tables; it may also be added for +;; Instructions at the start of loops, or after unconditional branches. +;; ??? We would get more accurate lengths if we did instruction +;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used +;; here is too conservative. + +; align to a two byte boundary + +(define_expand "align_2" + [(unspec_volatile [(const_int 1)] 1)] + "" + "") + +; align to a four byte boundary +;; align_4 and align_log are instructions for the starts of loops, or +;; after unconditional branches, which may take up extra room. + +(define_expand "align_4" + [(unspec_volatile [(const_int 2)] 1)] + "" + "") + +; align to a cache line boundary + +(define_insn "align_log" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 1)] + "" + "" + [(set_attr "length" "0") + (set_attr "in_delay_slot" "no")]) + +; emitted at the end of the literal table, used to emit the +; 32bit branch labels if needed. + +(define_insn "consttable_end" + [(unspec_volatile [(const_int 0)] 11)] + "" + "* return output_jump_label_table ();" + [(set_attr "in_delay_slot" "no")]) + +;; ------------------------------------------------------------------------- +;; Misc +;; ------------------------------------------------------------------------- + +;; String/block move insn. + +(define_expand "movstrsi" + [(parallel [(set (mem:BLK (match_operand:BLK 0 "" "")) + (mem:BLK (match_operand:BLK 1 "" ""))) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (reg:SI 17)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (clobber (reg:SI 0))])] + "" + " +{ + if(expand_block_move (operands)) + DONE; + else FAIL; +}") + +(define_insn "block_move_real" + [(parallel [(set (mem:BLK (reg:SI 4)) + (mem:BLK (reg:SI 5))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI 17)) + (clobber (reg:SI 0))])] + "! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real" + [(parallel [(set (mem:BLK (reg:SI 4)) + (mem:BLK (reg:SI 5))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI 6)) + (clobber (reg:SI 17)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (clobber (reg:SI 6)) + (clobber (reg:SI 0))])] + "! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_move_real_i4" + [(parallel [(set (mem:BLK (reg:SI 4)) + (mem:BLK (reg:SI 5))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI 17)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real_i4" + [(parallel [(set (mem:BLK (reg:SI 4)) + (mem:BLK (reg:SI 5))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI 6)) + (clobber (reg:SI 17)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (clobber (reg:SI 6)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI 3))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +;; ------------------------------------------------------------------------- +;; Floating point instructions. +;; ------------------------------------------------------------------------- + +;; ??? All patterns should have a type attribute. + +(define_expand "fpu_switch0" + [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values")) + (set (match_dup 2) (match_dup 1))] + "" + " +{ + operands[1] = gen_rtx (MEM, PSImode, operands[0]); + RTX_UNCHANGING_P (operands[1]) = 1; + operands[2] = get_fpscr_rtx (); +}") + +(define_expand "fpu_switch1" + [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values")) + (set (match_dup 1) (plus:SI (match_dup 0) (const_int 4))) + (set (match_dup 3) (match_dup 2))] + "" + " +{ + operands[1] = gen_reg_rtx (SImode); + operands[2] = gen_rtx (MEM, PSImode, operands[1]); + RTX_UNCHANGING_P (operands[2]) = 1; + operands[3] = get_fpscr_rtx (); +}") + +(define_expand "movpsi" + [(set (match_operand:PSI 0 "register_operand" "") + (match_operand:PSI 1 "general_movsrc_operand" ""))] + "" + "") + +;; The c / m alternative is a fake to guide reload to load directly into +;; fpscr, since reload doesn't know how to use post-increment. +;; GO_IF_LEGITIMATE_ADDRESS guards about bogus addresses before reload, +;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's +;; predicate after reload. +;; The gp_fpul type for r/!c might look a bit odd, but it actually schedules +;; like a gpr <-> fpul move. +(define_insn "fpu_switch" + [(set (match_operand:PSI 0 "register_operand" "c,c,r,c,c,r,m,r") + (match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c"))] + "! reload_completed + || true_regnum (operands[0]) != FPSCR_REG || GET_CODE (operands[1]) != MEM + || GET_CODE (XEXP (operands[1], 0)) != PLUS" + "@ + ! precision stays the same + lds.l %1,fpscr + mov.l %1,%0 + # + lds %1,fpscr + mov %1,%0 + mov.l %1,%0 + sts fpscr,%0" + [(set_attr "length" "0,2,2,4,2,2,2,2") + (set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")]) + +(define_split + [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))] + "find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(set (match_dup 0) (match_dup 0))] + " +{ + rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (), + gen_rtx (MEM, PSImode, + gen_rtx (POST_INC, Pmode, + operands[0])))); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX); +}") + +(define_split + [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))] + "" + [(set (match_dup 0) (plus:SI (match_dup 0) (const_int -4)))] + " +{ + rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (), + gen_rtx (MEM, PSImode, + gen_rtx (POST_INC, Pmode, + operands[0])))); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX); +}") + +;; ??? This uses the fp unit, but has no type indicating that. +;; If we did that, this would either give a bogus latency or introduce +;; a bogus FIFO constraint. +;; Since this insn is currently only used for prologues/epilogues, +;; it is probably best to claim no function unit, which matches the +;; current setting. +(define_insn "toggle_sz" + [(set (reg:PSI 48) (xor:PSI (reg:PSI 48) (const_int 1048576)))] + "TARGET_SH4" + "fschg") + +(define_expand "addsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_binop (&gen_addsf3_i, operands); DONE; }") + +(define_insn "addsf3_i" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (plus:SF (match_operand:SF 1 "arith_reg_operand" "%0") + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fadd %2,%0" + [(set_attr "type" "fp")]) + +(define_expand "subsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_binop (&gen_subsf3_i, operands); DONE; }") + +(define_insn "subsf3_i" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (minus:SF (match_operand:SF 1 "arith_reg_operand" "0") + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fsub %2,%0" + [(set_attr "type" "fp")]) + +;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR +;; register in feeding fp instructions. Thus, we cannot generate fmac for +;; mixed-precision SH4 targets. To allow it to be still generated for the +;; SH3E, we use a separate insn for SH3E mulsf3. + +(define_expand "mulsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + " +{ + if (TARGET_SH4) + expand_sf_binop (&gen_mulsf3_i4, operands); + else + emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "mulsf3_i4" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "arith_reg_operand" "%0") + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fmul %2,%0" + [(set_attr "type" "fp")]) + +(define_insn "mulsf3_ie" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "arith_reg_operand" "%0") + (match_operand:SF 2 "arith_reg_operand" "f")))] + "TARGET_SH3E && ! TARGET_SH4" + "fmul %2,%0" + [(set_attr "type" "fp")]) + +(define_insn "*macsf3" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "arith_reg_operand" "%w") + (match_operand:SF 2 "arith_reg_operand" "f")) + (match_operand:SF 3 "arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand" "c"))] + "TARGET_SH3E && ! TARGET_SH4" + "fmac fr0,%2,%0" + [(set_attr "type" "fp")]) + +(define_expand "divsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_binop (&gen_divsf3_i, operands); DONE; }") + +(define_insn "divsf3_i" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (div:SF (match_operand:SF 1 "arith_reg_operand" "0") + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fdiv %2,%0" + [(set_attr "type" "fdiv")]) + +(define_expand "floatsisf2" + [(set (reg:SI 22) + (match_operand:SI 1 "arith_reg_operand" "")) + (parallel [(set (match_operand:SF 0 "arith_reg_operand" "") + (float:SF (reg:SI 22))) + (use (match_dup 2))])] + "TARGET_SH3E" + " +{ + if (TARGET_SH4) + { + emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), + operands[1])); + emit_sf_insn (gen_floatsisf2_i4 (operands[0], get_fpscr_rtx ())); + DONE; + } + operands[2] = get_fpscr_rtx (); +}") + +(define_insn "floatsisf2_i4" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (float:SF (reg:SI 22))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH3E" + "float fpul,%0" + [(set_attr "type" "fp")]) + +(define_insn "*floatsisf2_ie" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (float:SF (reg:SI 22)))] + "TARGET_SH3E && ! TARGET_SH4" + "float fpul,%0" + [(set_attr "type" "fp")]) + +(define_expand "fix_truncsfsi2" + [(set (reg:SI 22) + (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) + (set (match_operand:SI 0 "arith_reg_operand" "=r") + (reg:SI 22))] + "TARGET_SH3E" + " +{ + if (TARGET_SH4) + { + emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[1], get_fpscr_rtx ())); + emit_insn (gen_rtx (SET, VOIDmode, operands[0], + gen_rtx (REG, SImode, 22))); + DONE; + } +}") + +(define_insn "fix_truncsfsi2_i4" + [(set (reg:SI 22) + (fix:SI (match_operand:SF 0 "arith_reg_operand" "f"))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "ftrc %0,fpul" + [(set_attr "type" "fp")]) + +(define_insn "fix_truncsfsi2_i4_2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (reg:SI 48)) + (clobber (reg:SI 22))] + "TARGET_SH4" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "TARGET_SH4" + [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1))) + (use (match_dup 2))]) + (set (match_dup 0) (reg:SI 22))]) + +(define_insn "*fixsfsi" + [(set (reg:SI 22) + (fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))] + "TARGET_SH3E && ! TARGET_SH4" + "ftrc %0,fpul" + [(set_attr "type" "fp")]) + +(define_insn "cmpgtsf_t" + [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f")))] + "TARGET_SH3E && ! TARGET_SH4" + "fcmp/gt %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "cmpeqsf_t" + [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f")))] + "TARGET_SH3E && ! TARGET_SH4" + "fcmp/eq %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "ieee_ccmpeqsf_t" + [(set (reg:SI 18) (ior:SI (reg:SI 18) + (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f"))))] + "TARGET_SH3E && TARGET_IEEE && ! TARGET_SH4" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4")]) + + +(define_insn "cmpgtsf_t_i4" + [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/gt %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "cmpeqsf_t_i4" + [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/eq %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "*ieee_ccmpeqsf_t_4" + [(set (reg:SI 18) (ior:SI (reg:SI 18) + (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && TARGET_SH4" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4")]) + +(define_expand "cmpsf" + [(set (reg:SI 18) (compare (match_operand:SF 0 "arith_operand" "") + (match_operand:SF 1 "arith_operand" "")))] + "TARGET_SH3E" + " +{ + sh_compare_op0 = operands[0]; + sh_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "negsf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_unop (&gen_negsf2_i, operands); DONE; }") + +(define_insn "negsf2_i" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (neg:SF (match_operand:SF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fneg %0" + [(set_attr "type" "fmove")]) + +(define_expand "sqrtsf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_unop (&gen_sqrtsf2_i, operands); DONE; }") + +(define_insn "sqrtsf2_i" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fsqrt %0" + [(set_attr "type" "fdiv")]) + +(define_expand "abssf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_unop (&gen_abssf2_i, operands); DONE; }") + +(define_insn "abssf2_i" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (abs:SF (match_operand:SF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fabs %0" + [(set_attr "type" "fmove")]) + +(define_expand "adddf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_adddf3_i, operands); DONE; }") + +(define_insn "adddf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (plus:DF (match_operand:DF 1 "arith_reg_operand" "%0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fadd %2,%0" + [(set_attr "type" "dfp_arith")]) + +(define_expand "subdf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_subdf3_i, operands); DONE; }") + +(define_insn "subdf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (minus:DF (match_operand:DF 1 "arith_reg_operand" "0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fsub %2,%0" + [(set_attr "type" "dfp_arith")]) + +(define_expand "muldf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_muldf3_i, operands); DONE; }") + +(define_insn "muldf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (mult:DF (match_operand:DF 1 "arith_reg_operand" "%0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fmul %2,%0" + [(set_attr "type" "dfp_arith")]) + +(define_expand "divdf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_divdf3_i, operands); DONE; }") + +(define_insn "divdf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (div:DF (match_operand:DF 1 "arith_reg_operand" "0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fdiv %2,%0" + [(set_attr "type" "dfdiv")]) + +(define_expand "floatsidf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")] + "TARGET_SH4" + " +{ + emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), operands[1])); + emit_df_insn (gen_floatsidf2_i (operands[0], get_fpscr_rtx ())); + DONE; +}") + +(define_insn "floatsidf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (float:DF (reg:SI 22))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "float fpul,%0" + [(set_attr "type" "dfp_conv")]) + +(define_expand "fix_truncdfsi2" + [(match_operand:SI 0 "arith_reg_operand" "=r") + (match_operand:DF 1 "arith_reg_operand" "f")] + "TARGET_SH4" + " +{ + emit_df_insn (gen_fix_truncdfsi2_i (operands[1], get_fpscr_rtx ())); + emit_insn (gen_rtx (SET, VOIDmode, operands[0], gen_rtx (REG, SImode, 22))); + DONE; +}") + +(define_insn "fix_truncdfsi2_i" + [(set (reg:SI 22) + (fix:SI (match_operand:DF 0 "arith_reg_operand" "f"))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "ftrc %0,fpul" + [(set_attr "type" "dfp_conv")]) + +(define_insn "fix_truncdfsi2_i4" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "TARGET_SH4" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "TARGET_SH4" + [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1))) + (use (match_dup 2))]) + (set (match_dup 0) (reg:SI 22))]) + +(define_insn "cmpgtdf_t" + [(set (reg:SI 18) (gt:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/gt %1,%0" + [(set_attr "type" "dfp_cmp")]) + +(define_insn "cmpeqdf_t" + [(set (reg:SI 18) (eq:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/eq %1,%0" + [(set_attr "type" "dfp_cmp")]) + +(define_insn "*ieee_ccmpeqdf_t" + [(set (reg:SI 18) (ior:SI (reg:SI 18) + (eq:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && TARGET_SH4" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4")]) + +(define_expand "cmpdf" + [(set (reg:SI 18) (compare (match_operand:DF 0 "arith_operand" "") + (match_operand:DF 1 "arith_operand" "")))] + "TARGET_SH4" + " +{ + sh_compare_op0 = operands[0]; + sh_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "negdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_unop (&gen_negdf2_i, operands); DONE; }") + +(define_insn "negdf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fneg %0" + [(set_attr "type" "fmove")]) + +(define_expand "sqrtdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_unop (&gen_sqrtdf2_i, operands); DONE; }") + +(define_insn "sqrtdf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (sqrt:DF (match_operand:DF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fsqrt %0" + [(set_attr "type" "dfdiv")]) + +(define_expand "absdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_unop (&gen_absdf2_i, operands); DONE; }") + +(define_insn "absdf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fabs %0" + [(set_attr "type" "fmove")]) + +(define_expand "extendsfdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH4" + " +{ + emit_sf_insn (gen_movsf_ie (gen_rtx (REG, SFmode, 22), operands[1], + get_fpscr_rtx ())); + emit_df_insn (gen_extendsfdf2_i4 (operands[0], get_fpscr_rtx ())); + DONE; +}") + +(define_insn "extendsfdf2_i4" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (float_extend:DF (reg:SF 22))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcnvsd fpul,%0" + [(set_attr "type" "fp")]) + +(define_expand "truncdfsf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + " +{ + emit_df_insn (gen_truncdfsf2_i4 (operands[1], get_fpscr_rtx ())); + emit_sf_insn (gen_movsf_ie (operands[0], gen_rtx (REG, SFmode, 22), + get_fpscr_rtx ())); + DONE; +}") + +(define_insn "truncdfsf2_i4" + [(set (reg:SF 22) + (float_truncate:SF (match_operand:DF 0 "arith_reg_operand" "f"))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcnvds %0,fpul" + [(set_attr "type" "fp")]) + +;; Bit field extract patterns. These give better code for packed bitfields, +;; because they allow auto-increment addresses to be generated. + +(define_expand "insv" + [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")) + (match_operand:SI 3 "general_operand" ""))] + "! TARGET_LITTLE_ENDIAN" + " +{ + rtx addr_target, orig_address, shift_reg; + HOST_WIDE_INT size; + + /* ??? expmed doesn't care for non-register predicates. */ + if (! memory_operand (operands[0], VOIDmode) + || ! immediate_operand (operands[1], VOIDmode) + || ! immediate_operand (operands[2], VOIDmode) + || ! general_operand (operands[3], VOIDmode)) + FAIL; + /* If this isn't a 16 / 24 / 32 bit field, or if + it doesn't start on a byte boundary, then fail. */ + size = INTVAL (operands[1]); + if (size < 16 || size > 32 || size % 8 != 0 + || (INTVAL (operands[2]) % 8) != 0) + FAIL; + + size /= 8; + orig_address = XEXP (operands[0], 0); + addr_target = gen_reg_rtx (SImode); + shift_reg = gen_reg_rtx (SImode); + emit_insn (gen_movsi (shift_reg, operands[3])); + emit_insn (gen_addsi3 (addr_target, orig_address, GEN_INT (size - 1))); + + operands[0] = change_address (operands[0], QImode, addr_target); + emit_insn (gen_movqi (operands[0], gen_rtx (SUBREG, QImode, shift_reg, 0))); + + while (size -= 1) + { + emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8))); + emit_insn (gen_addsi3 (addr_target, addr_target, GEN_INT (-1))); + emit_insn (gen_movqi (operands[0], + gen_rtx (SUBREG, QImode, shift_reg, 0))); + } + + DONE; +}") + +;; ------------------------------------------------------------------------- +;; Peepholes +;; ------------------------------------------------------------------------- + +;; This matches cases where a stack pointer increment at the start of the +;; epilogue combines with a stack slot read loading the return value. + +(define_peephole + [(set (match_operand:SI 0 "arith_reg_operand" "") + (mem:SI (match_operand:SI 1 "arith_reg_operand" ""))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))] + "REGNO (operands[1]) != REGNO (operands[0])" + "mov.l @%1+,%0") + +;; See the comment on the dt combiner pattern above. + +(define_peephole + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (plus:SI (match_dup 0) + (const_int -1))) + (set (reg:SI 18) + (eq:SI (match_dup 0) + (const_int 0)))] + "TARGET_SH2" + "dt %0") + +;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn' +;; to `mov #k,r0; mov.l @(r0,r15),rn'. These sequences are generated by +;; reload when the constant is too large for a reg+offset address. + +;; ??? We would get much better code if this was done in reload. This would +;; require modifying find_reloads_address to recognize that if the constant +;; is out-of-range for an immediate add, then we get better code by reloading +;; the constant into a register than by reloading the sum into a register, +;; since the former is one instruction shorter if the address does not need +;; to be offsettable. Unfortunately this does not work, because there is +;; only one register, r0, that can be used as an index register. This register +;; is also the function return value register. So, if we try to force reload +;; to use double-reg addresses, then we end up with some instructions that +;; need to use r0 twice. The only way to fix this is to change the calling +;; convention so that r0 is not used to return values. + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 2 "general_movsrc_operand" ""))] + "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.l %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 2 "general_movdst_operand" "") + (mem:SI (match_dup 0)))] + "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.l @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:HI (match_dup 0)) + (match_operand:HI 2 "general_movsrc_operand" ""))] + "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.w %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:HI 2 "general_movdst_operand" "") + (mem:HI (match_dup 0)))] + "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.w @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:QI (match_dup 0)) + (match_operand:QI 2 "general_movsrc_operand" ""))] + "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.b %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:QI 2 "general_movdst_operand" "") + (mem:QI (match_dup 0)))] + "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)" + "mov.b @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 2 "general_movsrc_operand" ""))] + "REGNO (operands[0]) == 0 + && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) < 16) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) < 16)) + && reg_unused_after (operands[0], insn)" + "mov.l %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SF 2 "general_movdst_operand" "") + + (mem:SF (match_dup 0)))] + "REGNO (operands[0]) == 0 + && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) < 16) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) < 16)) + && reg_unused_after (operands[0], insn)" + "mov.l @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 2 "general_movsrc_operand" ""))] + "REGNO (operands[0]) == 0 + && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) >= FIRST_FP_REG) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG)) + && reg_unused_after (operands[0], insn)" + "fmov{.s|} %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SF 2 "general_movdst_operand" "") + + (mem:SF (match_dup 0)))] + "REGNO (operands[0]) == 0 + && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) >= FIRST_FP_REG) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG)) + && reg_unused_after (operands[0], insn)" + "fmov{.s|} @(%0,%1),%2") + +;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF). */ +(define_insn "sp_switch_1" + [(const_int 1)] + "" + "* +{ + rtx xoperands[1]; + + xoperands[0] = sp_switch; + output_asm_insn (\"mov.l r0,@-r15\;mov.l %0,r0\", xoperands); + output_asm_insn (\"mov.l @r0,r0\;mov.l r15,@-r0\", xoperands); + return \"mov r0,r15\"; +}" + [(set_attr "length" "10")]) + +;; Switch back to the original stack for interrupt functions with the +;; sp_switch attribute. */ +(define_insn "sp_switch_2" + [(const_int 2)] + "" + "mov.l @r15+,r15\;mov.l @r15+,r0" + [(set_attr "length" "4")]) diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh new file mode 100755 index 0000000..bfbf45e --- /dev/null +++ b/gcc/config/sh/t-sh @@ -0,0 +1,29 @@ +CROSS_LIBGCC1 = libgcc1-asm.a +LIB1ASMSRC = sh/lib1funcs.asm +LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \ + _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr + +# These are really part of libgcc1, but this will cause them to be +# built correctly, so... + +LIB2FUNCS_EXTRA = fp-bit.c dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +MULTILIB_OPTIONS= ml m2/m3e/m4-single-only/m4-single/m4 +MULTILIB_DIRNAMES= +MULTILIB_MATCHES = m2=m3 + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sh/xm-sh.h b/gcc/config/sh/xm-sh.h new file mode 100755 index 0000000..f51b787 --- /dev/null +++ b/gcc/config/sh/xm-sh.h @@ -0,0 +1,42 @@ +/* Configuration for GNU C-compiler for Hitachi SH. + Copyright (C) 1993, 1997 Free Software Foundation, Inc. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* #defines that need visibility everywhere. */ +#define FALSE 0 +#define TRUE 1 + +/* This describes the machine the compiler is hosted on. */ +#define HOST_BITS_PER_CHAR 8 +#define HOST_BITS_PER_SHORT 16 +#define HOST_BITS_PER_INT 32 +#define HOST_BITS_PER_LONG 32 + +/* If compiled with GNU C, use the built-in alloca. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#endif + +/* target machine dependencies. + tm.h is a symbolic link to the actual target specific file. */ +#include "tm.h" + +/* Arguments to use with `exit'. */ +#define SUCCESS_EXIT_CODE 0 +#define FATAL_EXIT_CODE 33 |