summaryrefslogtreecommitdiff
path: root/gcc/config/sh
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/sh')
-rwxr-xr-xgcc/config/sh/elf.h123
-rwxr-xr-xgcc/config/sh/lib1funcs.asm1206
-rwxr-xr-xgcc/config/sh/rtems.h35
-rwxr-xr-xgcc/config/sh/rtemself.h33
-rwxr-xr-xgcc/config/sh/sh.c4786
-rwxr-xr-xgcc/config/sh/sh.h2232
-rwxr-xr-xgcc/config/sh/sh.md4654
-rwxr-xr-xgcc/config/sh/t-sh29
-rwxr-xr-xgcc/config/sh/xm-sh.h42
9 files changed, 13140 insertions, 0 deletions
diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h
new file mode 100755
index 0000000..68cc691
--- /dev/null
+++ b/gcc/config/sh/elf.h
@@ -0,0 +1,123 @@
+/* Definitions of target machine for gcc for Hitachi Super-H using ELF.
+ Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ Contributed by Ian Lance Taylor <ian@cygnus.com>.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* Mostly like the regular SH configuration. */
+#include "sh/sh.h"
+
+/* No SDB debugging info. */
+#undef SDB_DEBUGGING_INFO
+
+/* Undefine some macros defined in both sh.h and svr4.h. */
+#undef IDENT_ASM_OP
+#undef ASM_FILE_END
+#undef ASM_OUTPUT_SOURCE_LINE
+#undef DBX_OUTPUT_MAIN_SOURCE_FILE_END
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#undef ASM_OUTPUT_SECTION_NAME
+#undef ASM_OUTPUT_CONSTRUCTOR
+#undef ASM_OUTPUT_DESTRUCTOR
+#undef ASM_DECLARE_FUNCTION_NAME
+#undef PREFERRED_DEBUGGING_TYPE
+#undef MAX_OFILE_ALIGNMENT
+
+/* Be ELF-like. */
+#include "svr4.h"
+
+/* The prefix to add to user-visible assembler symbols.
+ Note that svr4.h redefined it from the original value (that we want)
+ in sh.h */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+#undef ASM_FILE_START
+#define ASM_FILE_START(FILE) do { \
+ output_file_directive ((FILE), main_input_filename); \
+ if (TARGET_LITTLE_ENDIAN) \
+ fprintf ((FILE), "\t.little\n"); \
+} while (0)
+
+
+
+/* Let code know that this is ELF. */
+#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Acpu(sh) -Amachine(sh)"
+
+/* Pass -ml and -mrelax to the assembler and linker. */
+#undef ASM_SPEC
+#define ASM_SPEC "%{ml:-little} %{mrelax:-relax}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{ml:-m shlelf} %{mrelax:-relax}"
+
+/* svr4.h undefined DBX_REGISTER_NUMBER, so we need to define it
+ again. */
+#define DBX_REGISTER_NUMBER(REGNO) \
+ (((REGNO) >= 22 && (REGNO) <= 39) ? ((REGNO) + 1) : (REGNO))
+
+/* SH ELF, unlike most ELF implementations, uses underscores before
+ symbol names. */
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM,NAME) \
+ asm_fprintf (STREAM, "%U%s", NAME)
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+ sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM))
+
+#undef ASM_OUTPUT_INTERNAL_LABEL
+#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
+ asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM))
+
+#undef ASM_OUTPUT_SOURCE_LINE
+#define ASM_OUTPUT_SOURCE_LINE(file, line) \
+do \
+ { \
+ static int sym_lineno = 1; \
+ asm_fprintf ((file), ".stabn 68,0,%d,%LLM%d-", \
+ (line), sym_lineno); \
+ assemble_name ((file), \
+ XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));\
+ asm_fprintf ((file), "\n%LLM%d:\n", sym_lineno); \
+ sym_lineno += 1; \
+ } \
+while (0)
+
+#undef DBX_OUTPUT_MAIN_SOURCE_FILE_END
+#define DBX_OUTPUT_MAIN_SOURCE_FILE_END(FILE, FILENAME) \
+do { \
+ text_section (); \
+ fprintf ((FILE), "\t.stabs \"\",%d,0,0,Letext\nLetext:\n", N_SO); \
+} while (0)
+
+/* Arrange to call __main, rather than using crtbegin.o and crtend.o
+ and relying on .init and .fini being executed at appropriate times. */
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+/* HANDLE_SYSV_PRAGMA (defined by svr4.h) takes precedence over HANDLE_PRAGMA.
+ We want to use the HANDLE_PRAGMA from sh.h. */
+#undef HANDLE_SYSV_PRAGMA
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm
new file mode 100755
index 0000000..bf9ea9a
--- /dev/null
+++ b/gcc/config/sh/lib1funcs.asm
@@ -0,0 +1,1206 @@
+/* Copyright (C) 1994, 1995, 1997, 1998 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file. (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you link this library with other files,
+ some of which are compiled with GCC, to produce an executable,
+ this library does not by itself cause the resulting executable
+ to be covered by the GNU General Public License.
+ This exception does not however invalidate any other reasons why
+ the executable file might be covered by the GNU General Public License. */
+
+
+!! libgcc1 routines for the Hitachi SH cpu.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+ ELF local label prefixes by J"orn Rennecke
+ amylaar@cygnus.com */
+
+#ifdef __ELF__
+#define LOCAL(X) .L_##X
+#else
+#define LOCAL(X) L_##X
+#endif
+
+#ifdef L_ashiftrt
+ .global ___ashiftrt_r4_0
+ .global ___ashiftrt_r4_1
+ .global ___ashiftrt_r4_2
+ .global ___ashiftrt_r4_3
+ .global ___ashiftrt_r4_4
+ .global ___ashiftrt_r4_5
+ .global ___ashiftrt_r4_6
+ .global ___ashiftrt_r4_7
+ .global ___ashiftrt_r4_8
+ .global ___ashiftrt_r4_9
+ .global ___ashiftrt_r4_10
+ .global ___ashiftrt_r4_11
+ .global ___ashiftrt_r4_12
+ .global ___ashiftrt_r4_13
+ .global ___ashiftrt_r4_14
+ .global ___ashiftrt_r4_15
+ .global ___ashiftrt_r4_16
+ .global ___ashiftrt_r4_17
+ .global ___ashiftrt_r4_18
+ .global ___ashiftrt_r4_19
+ .global ___ashiftrt_r4_20
+ .global ___ashiftrt_r4_21
+ .global ___ashiftrt_r4_22
+ .global ___ashiftrt_r4_23
+ .global ___ashiftrt_r4_24
+ .global ___ashiftrt_r4_25
+ .global ___ashiftrt_r4_26
+ .global ___ashiftrt_r4_27
+ .global ___ashiftrt_r4_28
+ .global ___ashiftrt_r4_29
+ .global ___ashiftrt_r4_30
+ .global ___ashiftrt_r4_31
+ .global ___ashiftrt_r4_32
+
+ .align 1
+___ashiftrt_r4_32:
+___ashiftrt_r4_31:
+ rotcl r4
+ rts
+ subc r4,r4
+
+___ashiftrt_r4_30:
+ shar r4
+___ashiftrt_r4_29:
+ shar r4
+___ashiftrt_r4_28:
+ shar r4
+___ashiftrt_r4_27:
+ shar r4
+___ashiftrt_r4_26:
+ shar r4
+___ashiftrt_r4_25:
+ shar r4
+___ashiftrt_r4_24:
+ shlr16 r4
+ shlr8 r4
+ rts
+ exts.b r4,r4
+
+___ashiftrt_r4_23:
+ shar r4
+___ashiftrt_r4_22:
+ shar r4
+___ashiftrt_r4_21:
+ shar r4
+___ashiftrt_r4_20:
+ shar r4
+___ashiftrt_r4_19:
+ shar r4
+___ashiftrt_r4_18:
+ shar r4
+___ashiftrt_r4_17:
+ shar r4
+___ashiftrt_r4_16:
+ shlr16 r4
+ rts
+ exts.w r4,r4
+
+___ashiftrt_r4_15:
+ shar r4
+___ashiftrt_r4_14:
+ shar r4
+___ashiftrt_r4_13:
+ shar r4
+___ashiftrt_r4_12:
+ shar r4
+___ashiftrt_r4_11:
+ shar r4
+___ashiftrt_r4_10:
+ shar r4
+___ashiftrt_r4_9:
+ shar r4
+___ashiftrt_r4_8:
+ shar r4
+___ashiftrt_r4_7:
+ shar r4
+___ashiftrt_r4_6:
+ shar r4
+___ashiftrt_r4_5:
+ shar r4
+___ashiftrt_r4_4:
+ shar r4
+___ashiftrt_r4_3:
+ shar r4
+___ashiftrt_r4_2:
+ shar r4
+___ashiftrt_r4_1:
+ rts
+ shar r4
+
+___ashiftrt_r4_0:
+ rts
+ nop
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! ___ashrsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+
+ .global ___ashrsi3
+ .align 2
+___ashrsi3:
+ mov #31,r0
+ and r0,r5
+ mova LOCAL(ashrsi3_table),r0
+ mov.b @(r0,r5),r5
+#ifdef __sh1__
+ add r5,r0
+ jmp @r0
+#else
+ braf r5
+#endif
+ mov r4,r0
+
+ .align 2
+LOCAL(ashrsi3_table):
+ .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+ rotcl r0
+ rts
+ subc r0,r0
+
+LOCAL(ashrsi3_30):
+ shar r0
+LOCAL(ashrsi3_29):
+ shar r0
+LOCAL(ashrsi3_28):
+ shar r0
+LOCAL(ashrsi3_27):
+ shar r0
+LOCAL(ashrsi3_26):
+ shar r0
+LOCAL(ashrsi3_25):
+ shar r0
+LOCAL(ashrsi3_24):
+ shlr16 r0
+ shlr8 r0
+ rts
+ exts.b r0,r0
+
+LOCAL(ashrsi3_23):
+ shar r0
+LOCAL(ashrsi3_22):
+ shar r0
+LOCAL(ashrsi3_21):
+ shar r0
+LOCAL(ashrsi3_20):
+ shar r0
+LOCAL(ashrsi3_19):
+ shar r0
+LOCAL(ashrsi3_18):
+ shar r0
+LOCAL(ashrsi3_17):
+ shar r0
+LOCAL(ashrsi3_16):
+ shlr16 r0
+ rts
+ exts.w r0,r0
+
+LOCAL(ashrsi3_15):
+ shar r0
+LOCAL(ashrsi3_14):
+ shar r0
+LOCAL(ashrsi3_13):
+ shar r0
+LOCAL(ashrsi3_12):
+ shar r0
+LOCAL(ashrsi3_11):
+ shar r0
+LOCAL(ashrsi3_10):
+ shar r0
+LOCAL(ashrsi3_9):
+ shar r0
+LOCAL(ashrsi3_8):
+ shar r0
+LOCAL(ashrsi3_7):
+ shar r0
+LOCAL(ashrsi3_6):
+ shar r0
+LOCAL(ashrsi3_5):
+ shar r0
+LOCAL(ashrsi3_4):
+ shar r0
+LOCAL(ashrsi3_3):
+ shar r0
+LOCAL(ashrsi3_2):
+ shar r0
+LOCAL(ashrsi3_1):
+ rts
+ shar r0
+
+LOCAL(ashrsi3_0):
+ rts
+ nop
+
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! ___ashlsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+ .global ___ashlsi3
+ .align 2
+___ashlsi3:
+ mov #31,r0
+ and r0,r5
+ mova LOCAL(ashlsi3_table),r0
+ mov.b @(r0,r5),r5
+#ifdef __sh1__
+ add r5,r0
+ jmp @r0
+#else
+ braf r5
+#endif
+ mov r4,r0
+
+ .align 2
+LOCAL(ashlsi3_table):
+ .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
+
+LOCAL(ashlsi3_6):
+ shll2 r0
+LOCAL(ashlsi3_4):
+ shll2 r0
+LOCAL(ashlsi3_2):
+ rts
+ shll2 r0
+
+LOCAL(ashlsi3_7):
+ shll2 r0
+LOCAL(ashlsi3_5):
+ shll2 r0
+LOCAL(ashlsi3_3):
+ shll2 r0
+LOCAL(ashlsi3_1):
+ rts
+ shll r0
+
+LOCAL(ashlsi3_14):
+ shll2 r0
+LOCAL(ashlsi3_12):
+ shll2 r0
+LOCAL(ashlsi3_10):
+ shll2 r0
+LOCAL(ashlsi3_8):
+ rts
+ shll8 r0
+
+LOCAL(ashlsi3_15):
+ shll2 r0
+LOCAL(ashlsi3_13):
+ shll2 r0
+LOCAL(ashlsi3_11):
+ shll2 r0
+LOCAL(ashlsi3_9):
+ shll8 r0
+ rts
+ shll r0
+
+LOCAL(ashlsi3_22):
+ shll2 r0
+LOCAL(ashlsi3_20):
+ shll2 r0
+LOCAL(ashlsi3_18):
+ shll2 r0
+LOCAL(ashlsi3_16):
+ rts
+ shll16 r0
+
+LOCAL(ashlsi3_23):
+ shll2 r0
+LOCAL(ashlsi3_21):
+ shll2 r0
+LOCAL(ashlsi3_19):
+ shll2 r0
+LOCAL(ashlsi3_17):
+ shll16 r0
+ rts
+ shll r0
+
+LOCAL(ashlsi3_30):
+ shll2 r0
+LOCAL(ashlsi3_28):
+ shll2 r0
+LOCAL(ashlsi3_26):
+ shll2 r0
+LOCAL(ashlsi3_24):
+ shll16 r0
+ rts
+ shll8 r0
+
+LOCAL(ashlsi3_31):
+ shll2 r0
+LOCAL(ashlsi3_29):
+ shll2 r0
+LOCAL(ashlsi3_27):
+ shll2 r0
+LOCAL(ashlsi3_25):
+ shll16 r0
+ shll8 r0
+ rts
+ shll r0
+
+LOCAL(ashlsi3_0):
+ rts
+ nop
+
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! ___lshrsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+ .global ___lshrsi3
+ .align 2
+___lshrsi3:
+ mov #31,r0
+ and r0,r5
+ mova LOCAL(lshrsi3_table),r0
+ mov.b @(r0,r5),r5
+#ifdef __sh1__
+ add r5,r0
+ jmp @r0
+#else
+ braf r5
+#endif
+ mov r4,r0
+
+ .align 2
+LOCAL(lshrsi3_table):
+ .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
+
+LOCAL(lshrsi3_6):
+ shlr2 r0
+LOCAL(lshrsi3_4):
+ shlr2 r0
+LOCAL(lshrsi3_2):
+ rts
+ shlr2 r0
+
+LOCAL(lshrsi3_7):
+ shlr2 r0
+LOCAL(lshrsi3_5):
+ shlr2 r0
+LOCAL(lshrsi3_3):
+ shlr2 r0
+LOCAL(lshrsi3_1):
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_14):
+ shlr2 r0
+LOCAL(lshrsi3_12):
+ shlr2 r0
+LOCAL(lshrsi3_10):
+ shlr2 r0
+LOCAL(lshrsi3_8):
+ rts
+ shlr8 r0
+
+LOCAL(lshrsi3_15):
+ shlr2 r0
+LOCAL(lshrsi3_13):
+ shlr2 r0
+LOCAL(lshrsi3_11):
+ shlr2 r0
+LOCAL(lshrsi3_9):
+ shlr8 r0
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_22):
+ shlr2 r0
+LOCAL(lshrsi3_20):
+ shlr2 r0
+LOCAL(lshrsi3_18):
+ shlr2 r0
+LOCAL(lshrsi3_16):
+ rts
+ shlr16 r0
+
+LOCAL(lshrsi3_23):
+ shlr2 r0
+LOCAL(lshrsi3_21):
+ shlr2 r0
+LOCAL(lshrsi3_19):
+ shlr2 r0
+LOCAL(lshrsi3_17):
+ shlr16 r0
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_30):
+ shlr2 r0
+LOCAL(lshrsi3_28):
+ shlr2 r0
+LOCAL(lshrsi3_26):
+ shlr2 r0
+LOCAL(lshrsi3_24):
+ shlr16 r0
+ rts
+ shlr8 r0
+
+LOCAL(lshrsi3_31):
+ shlr2 r0
+LOCAL(lshrsi3_29):
+ shlr2 r0
+LOCAL(lshrsi3_27):
+ shlr2 r0
+LOCAL(lshrsi3_25):
+ shlr16 r0
+ shlr8 r0
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_0):
+ rts
+ nop
+
+#endif
+
+#ifdef L_movstr
+ .text
+! done all the large groups, do the remainder
+
+! jump to movstr+
+done:
+ add #64,r5
+ mova ___movstrSI0,r0
+ shll2 r6
+ add r6,r0
+ jmp @r0
+ add #64,r4
+ .align 4
+ .global ___movstrSI64
+___movstrSI64:
+ mov.l @(60,r5),r0
+ mov.l r0,@(60,r4)
+ .global ___movstrSI60
+___movstrSI60:
+ mov.l @(56,r5),r0
+ mov.l r0,@(56,r4)
+ .global ___movstrSI56
+___movstrSI56:
+ mov.l @(52,r5),r0
+ mov.l r0,@(52,r4)
+ .global ___movstrSI52
+___movstrSI52:
+ mov.l @(48,r5),r0
+ mov.l r0,@(48,r4)
+ .global ___movstrSI48
+___movstrSI48:
+ mov.l @(44,r5),r0
+ mov.l r0,@(44,r4)
+ .global ___movstrSI44
+___movstrSI44:
+ mov.l @(40,r5),r0
+ mov.l r0,@(40,r4)
+ .global ___movstrSI40
+___movstrSI40:
+ mov.l @(36,r5),r0
+ mov.l r0,@(36,r4)
+ .global ___movstrSI36
+___movstrSI36:
+ mov.l @(32,r5),r0
+ mov.l r0,@(32,r4)
+ .global ___movstrSI32
+___movstrSI32:
+ mov.l @(28,r5),r0
+ mov.l r0,@(28,r4)
+ .global ___movstrSI28
+___movstrSI28:
+ mov.l @(24,r5),r0
+ mov.l r0,@(24,r4)
+ .global ___movstrSI24
+___movstrSI24:
+ mov.l @(20,r5),r0
+ mov.l r0,@(20,r4)
+ .global ___movstrSI20
+___movstrSI20:
+ mov.l @(16,r5),r0
+ mov.l r0,@(16,r4)
+ .global ___movstrSI16
+___movstrSI16:
+ mov.l @(12,r5),r0
+ mov.l r0,@(12,r4)
+ .global ___movstrSI12
+___movstrSI12:
+ mov.l @(8,r5),r0
+ mov.l r0,@(8,r4)
+ .global ___movstrSI8
+___movstrSI8:
+ mov.l @(4,r5),r0
+ mov.l r0,@(4,r4)
+ .global ___movstrSI4
+___movstrSI4:
+ mov.l @(0,r5),r0
+ mov.l r0,@(0,r4)
+___movstrSI0:
+ rts
+ or r0,r0,r0
+
+ .align 4
+
+ .global ___movstr
+___movstr:
+ mov.l @(60,r5),r0
+ mov.l r0,@(60,r4)
+
+ mov.l @(56,r5),r0
+ mov.l r0,@(56,r4)
+
+ mov.l @(52,r5),r0
+ mov.l r0,@(52,r4)
+
+ mov.l @(48,r5),r0
+ mov.l r0,@(48,r4)
+
+ mov.l @(44,r5),r0
+ mov.l r0,@(44,r4)
+
+ mov.l @(40,r5),r0
+ mov.l r0,@(40,r4)
+
+ mov.l @(36,r5),r0
+ mov.l r0,@(36,r4)
+
+ mov.l @(32,r5),r0
+ mov.l r0,@(32,r4)
+
+ mov.l @(28,r5),r0
+ mov.l r0,@(28,r4)
+
+ mov.l @(24,r5),r0
+ mov.l r0,@(24,r4)
+
+ mov.l @(20,r5),r0
+ mov.l r0,@(20,r4)
+
+ mov.l @(16,r5),r0
+ mov.l r0,@(16,r4)
+
+ mov.l @(12,r5),r0
+ mov.l r0,@(12,r4)
+
+ mov.l @(8,r5),r0
+ mov.l r0,@(8,r4)
+
+ mov.l @(4,r5),r0
+ mov.l r0,@(4,r4)
+
+ mov.l @(0,r5),r0
+ mov.l r0,@(0,r4)
+
+ add #-16,r6
+ cmp/pl r6
+ bf done
+
+ add #64,r5
+ bra ___movstr
+ add #64,r4
+#endif
+
+#ifdef L_movstr_i4
+#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+ .text
+ .global ___movstr_i4_even
+ .global ___movstr_i4_odd
+ .global ___movstrSI12_i4
+
+ .p2align 5
+L_movstr_2mod4_end:
+ mov.l r0,@(16,r4)
+ rts
+ mov.l r1,@(20,r4)
+
+ .p2align 2
+
+___movstr_i4_odd:
+ mov.l @r5+,r1
+ add #-4,r4
+ mov.l @r5+,r2
+ mov.l @r5+,r3
+ mov.l r1,@(4,r4)
+ mov.l r2,@(8,r4)
+
+L_movstr_loop:
+ mov.l r3,@(12,r4)
+ dt r6
+ mov.l @r5+,r0
+ bt/s L_movstr_2mod4_end
+ mov.l @r5+,r1
+ add #16,r4
+L_movstr_start_even:
+ mov.l @r5+,r2
+ mov.l @r5+,r3
+ mov.l r0,@r4
+ dt r6
+ mov.l r1,@(4,r4)
+ bf/s L_movstr_loop
+ mov.l r2,@(8,r4)
+ rts
+ mov.l r3,@(12,r4)
+
+___movstr_i4_even:
+ mov.l @r5+,r0
+ bra L_movstr_start_even
+ mov.l @r5+,r1
+
+ .p2align 4
+___movstrSI12_i4:
+ mov.l @r5,r0
+ mov.l @(4,r5),r1
+ mov.l @(8,r5),r2
+ mov.l r0,@r4
+ mov.l r1,@(4,r4)
+ rts
+ mov.l r2,@(8,r4)
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_mulsi3
+
+
+ .global ___mulsi3
+
+! r4 = aabb
+! r5 = ccdd
+! r0 = aabb*ccdd via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+___mulsi3:
+ mulu r4,r5 ! multiply the lsws macl=bb*dd
+ mov r5,r3 ! r3 = ccdd
+ swap.w r4,r2 ! r2 = bbaa
+ xtrct r2,r3 ! r3 = aacc
+ tst r3,r3 ! msws zero ?
+ bf hiset
+ rts ! yes - then we have the answer
+ sts macl,r0
+
+hiset: sts macl,r0 ! r0 = bb*dd
+ mulu r2,r5 | brewing macl = aa*dd
+ sts macl,r1
+ mulu r3,r4 | brewing macl = cc*bb
+ sts macl,r2
+ add r1,r2
+ shll16 r2
+ rts
+ add r2,r0
+
+
+#endif
+#ifdef L_sdivsi3_i4
+ .title "SH DIVIDE"
+!! 4 byte integer Divide code for the Hitachi SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber dr0, dr2
+
+ .global ___sdivsi3_i4
+___sdivsi3_i4:
+ lds r4,fpul
+ float fpul,dr0
+ lds r5,fpul
+ float fpul,dr2
+ fdiv dr2,dr0
+ rts
+ ftrc dr0,fpul
+
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
+
+ .global ___sdivsi3_i4
+___sdivsi3_i4:
+ sts.l fpscr,@-r15
+ mov #8,r2
+ swap.w r2,r2
+ lds r2,fpscr
+ lds r4,fpul
+ float fpul,dr0
+ lds r5,fpul
+ float fpul,dr2
+ fdiv dr2,dr0
+ ftrc dr0,fpul
+ rts
+ lds.l @r15+,fpscr
+
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+ sh3e code. */
+#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1,r2,r3
+
+ .global ___sdivsi3
+___sdivsi3:
+ mov r4,r1
+ mov r5,r0
+
+ tst r0,r0
+ bt div0
+ mov #0,r2
+ div0s r2,r1
+ subc r3,r3
+ subc r2,r1
+ div0s r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ addc r2,r1
+ rts
+ mov r1,r0
+
+
+div0: rts
+ mov #0,r0
+
+#endif /* ! __SH4__ */
+#endif
+#ifdef L_udivsi3_i4
+
+ .title "SH DIVIDE"
+!! 4 byte integer Divide code for the Hitachi SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+ .global ___udivsi3_i4
+___udivsi3_i4:
+ mov #1,r1
+ cmp/hi r1,r5
+ bf trivial
+ rotr r1
+ xor r1,r4
+ lds r4,fpul
+ mova L1,r0
+#ifdef FMOVD_WORKS
+ fmov.d @r0+,dr4
+#else
+#ifdef __LITTLE_ENDIAN__
+ fmov.s @r0+,fr5
+ fmov.s @r0,fr4
+#else
+ fmov.s @r0+,fr4
+ fmov.s @r0,fr5
+#endif
+#endif
+ float fpul,dr0
+ xor r1,r5
+ lds r5,fpul
+ float fpul,dr2
+ fadd dr4,dr0
+ fadd dr4,dr2
+ fdiv dr2,dr0
+ rts
+ ftrc dr0,fpul
+
+trivial:
+ rts
+ lds r4,fpul
+
+ .align 2
+L1:
+ .double 2147483648
+
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+ .global ___udivsi3_i4
+___udivsi3_i4:
+ mov #1,r1
+ cmp/hi r1,r5
+ bf trivial
+ sts.l fpscr,@-r15
+ mova L1,r0
+ lds.l @r0+,fpscr
+ rotr r1
+ xor r1,r4
+ lds r4,fpul
+#ifdef FMOVD_WORKS
+ fmov.d @r0+,dr4
+#else
+#ifdef __LITTLE_ENDIAN__
+ fmov.s @r0+,fr5
+ fmov.s @r0,fr4
+#else
+ fmov.s @r0+,fr4
+ fmov.s @r0,fr5
+#endif
+#endif
+ float fpul,dr0
+ xor r1,r5
+ lds r5,fpul
+ float fpul,dr2
+ fadd dr4,dr0
+ fadd dr4,dr2
+ fdiv dr2,dr0
+ ftrc dr0,fpul
+ rts
+ lds.l @r15+,fpscr
+
+trivial:
+ rts
+ lds r4,fpul
+
+ .align 2
+L1:
+#if defined (__LITTLE_ENDIAN__) || ! defined (FMOVD_WORKS)
+ .long 0x80000
+#else
+ .long 0x180000
+#endif
+ .double 2147483648
+
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+ sh3e code. */
+#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+ .global ___udivsi3
+
+___udivsi3:
+longway:
+ mov #0,r0
+ div0u
+ ! get one bit from the msb of the numerator into the T
+ ! bit and divide it by whats in r5. Put the answer bit
+ ! into the T bit so it can come out again at the bottom
+
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+shortway:
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+
+vshortway:
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4 ; div1 r5,r0
+ rotcl r4
+ret: rts
+ mov r4,r0
+
+#endif /* __SH4__ */
+#endif
+#ifdef L_set_fpscr
+#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
+ .global ___set_fpscr
+___set_fpscr:
+ lds r4,fpscr
+ mov.l ___set_fpscr_L1,r1
+ swap.w r4,r0
+ or #24,r0
+#ifndef FMOVD_WORKS
+ xor #16,r0
+#endif
+#if defined(__SH4__)
+ swap.w r0,r3
+ mov.l r3,@(4,r1)
+#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+ swap.w r0,r2
+ mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+ xor #8,r0
+#else
+ xor #24,r0
+#endif
+#if defined(__SH4__)
+ swap.w r0,r2
+ rts
+ mov.l r2,@r1
+#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+ swap.w r0,r3
+ rts
+ mov.l r3,@(4,r1)
+#endif
+ .align 2
+___set_fpscr_L1:
+ .long ___fpscr_values
+#ifdef __ELF__
+ .comm ___fpscr_values,8,4
+#else
+ .comm ___fpscr_values,8
+#endif /* ELF */
+#endif /* SH3E / SH4 */
+#endif /* L_set_fpscr */
diff --git a/gcc/config/sh/rtems.h b/gcc/config/sh/rtems.h
new file mode 100755
index 0000000..3e3fc7b
--- /dev/null
+++ b/gcc/config/sh/rtems.h
@@ -0,0 +1,35 @@
+/* Definitions for rtems targeting a SH using COFF.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include "sh/sh.h"
+
+/* Specify predefined symbols in preprocessor. */
+
+#undef CPP_PREDEFINES
+#define CPP_PREDEFINES "-D__sh__ -Drtems -D__rtems__ \
+ -Asystem(rtems) -Acpu(sh) -Amachine(sh)"
+
+/* Generate calls to memcpy, memcmp and memset. */
+#ifndef TARGET_MEM_FUNCTIONS
+#define TARGET_MEM_FUNCTIONS
+#endif
+
+/* end of sh/rtems.h */
diff --git a/gcc/config/sh/rtemself.h b/gcc/config/sh/rtemself.h
new file mode 100755
index 0000000..8000a3a
--- /dev/null
+++ b/gcc/config/sh/rtemself.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a SH using elf.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include "sh/elf.h"
+
+/* Specify predefined symbols in preprocessor. */
+
+#undef CPP_PREDEFINES
+#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Drtems -D__rtems__ \
+ -Asystem(rtems) -Acpu(sh) -Amachine(sh)"
+
+/* Generate calls to memcpy, memcmp and memset. */
+#ifndef TARGET_MEM_FUNCTIONS
+#define TARGET_MEM_FUNCTIONS
+#endif
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
new file mode 100755
index 0000000..4d4b5cd
--- /dev/null
+++ b/gcc/config/sh/sh.c
@@ -0,0 +1,4786 @@
+/* Output routines for GCC for Hitachi Super-H.
+ Copyright (C) 1993-1998 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* Contributed by Steve Chamberlain (sac@cygnus.com).
+ Improved by Jim Wilson (wilson@cygnus.com). */
+
+#include "config.h"
+
+#include <stdio.h>
+
+#include "rtl.h"
+#include "tree.h"
+#include "flags.h"
+#include "insn-flags.h"
+#include "expr.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "insn-attr.h"
+
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
+#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
+#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+/* ??? The pragma interrupt support will not work for SH3. */
+/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
+ output code for the next function appropriate for an interrupt handler. */
+int pragma_interrupt;
+
+/* This is set by the trap_exit attribute for functions. It specifies
+ a trap number to be used in a trapa instruction at function exit
+ (instead of an rte instruction). */
+int trap_exit;
+
+/* This is used by the sp_switch attribute for functions. It specifies
+ a variable holding the address of the stack the interrupt function
+ should switch to/from at entry/exit. */
+rtx sp_switch;
+
+/* This is set by #pragma trapa, and is similar to the above, except that
+ the compiler doesn't emit code to preserve all registers. */
+static int pragma_trapa;
+
+/* This is set by #pragma nosave_low_regs. This is useful on the SH3,
+ which has a separate set of low regs for User and Supervisor modes.
+ This should only be used for the lowest level of interrupts. Higher levels
+ of interrupts must save the registers in case they themselves are
+ interrupted. */
+int pragma_nosave_low_regs;
+
+/* This is used for communication between SETUP_INCOMING_VARARGS and
+ sh_expand_prologue. */
+int current_function_anonymous_args;
+
+/* Global variables from toplev.c and final.c that are used within, but
+ not declared in any header file. */
+extern char *version_string;
+extern int *insn_addresses;
+
+/* Global variables for machine-dependent things. */
+
+/* Which cpu are we scheduling for. */
+enum processor_type sh_cpu;
+
+/* Saved operands from the last compare to use when we generate an scc
+ or bcc insn. */
+
+rtx sh_compare_op0;
+rtx sh_compare_op1;
+
+enum machine_mode sh_addr_diff_vec_mode;
+
+/* Provides the class number of the smallest class containing
+ reg number. */
+
+int regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+ R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+ GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+ GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+ GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+ GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
+ MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
+ FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+ DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+ FPSCR_REGS,
+};
+
+char fp_reg_names[][5] =
+{
+ "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
+ "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
+ "fpul",
+ "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
+};
+
+/* Provide reg_class from a letter such as appears in the machine
+ description. */
+
+enum reg_class reg_class_from_letter[] =
+{
+ /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
+ /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
+ /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
+ /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
+ /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
+ /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
+ /* y */ FPUL_REGS, /* z */ R0_REGS
+};
+
+int assembler_dialect;
+
+rtx get_fpscr_rtx ();
+void emit_sf_insn ();
+void emit_df_insn ();
+
+static void split_branches PROTO ((rtx));
+
+/* Print the operand address in x to the stream. */
+
+void
+print_operand_address (stream, x)
+ FILE *stream;
+ rtx x;
+{
+ switch (GET_CODE (x))
+ {
+ case REG:
+ case SUBREG:
+ fprintf (stream, "@%s", reg_names[true_regnum (x)]);
+ break;
+
+ case PLUS:
+ {
+ rtx base = XEXP (x, 0);
+ rtx index = XEXP (x, 1);
+
+ switch (GET_CODE (index))
+ {
+ case CONST_INT:
+ fprintf (stream, "@(%d,%s)", INTVAL (index),
+ reg_names[true_regnum (base)]);
+ break;
+
+ case REG:
+ case SUBREG:
+ {
+ int base_num = true_regnum (base);
+ int index_num = true_regnum (index);
+
+ fprintf (stream, "@(r0,%s)",
+ reg_names[MAX (base_num, index_num)]);
+ break;
+ }
+
+ default:
+ debug_rtx (x);
+ abort ();
+ }
+ }
+ break;
+
+ case PRE_DEC:
+ fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
+ break;
+
+ case POST_INC:
+ fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
+ break;
+
+ default:
+ output_addr_const (stream, x);
+ break;
+ }
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+ according to modifier code.
+
+ '.' print a .s if insn needs delay slot
+ ',' print LOCAL_LABEL_PREFIX
+ '@' print trap, rte or rts depending upon pragma interruptness
+ '#' output a nop if there is nothing to put in the delay slot
+ 'O' print a constant without the #
+ 'R' print the LSW of a dp value - changes if in little endian
+ 'S' print the MSW of a dp value - changes if in little endian
+ 'T' print the next word of a dp value - same as 'R' in big endian mode.
+ 'o' output an operator. */
+
+void
+print_operand (stream, x, code)
+ FILE *stream;
+ rtx x;
+ int code;
+{
+ switch (code)
+ {
+ case '.':
+ if (final_sequence
+ && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+ fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+ break;
+ case ',':
+ fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
+ break;
+ case '@':
+ {
+ int interrupt_handler;
+
+ if ((lookup_attribute
+ ("interrupt_handler",
+ DECL_MACHINE_ATTRIBUTES (current_function_decl)))
+ != NULL_TREE)
+ interrupt_handler = 1;
+ else
+ interrupt_handler = 0;
+
+ if (trap_exit)
+ fprintf (stream, "trapa #%d", trap_exit);
+ else if (interrupt_handler)
+ fprintf (stream, "rte");
+ else
+ fprintf (stream, "rts");
+ break;
+ }
+ case '#':
+ /* Output a nop if there's nothing in the delay slot. */
+ if (dbr_sequence_length () == 0)
+ fprintf (stream, "\n\tnop");
+ break;
+ case 'O':
+ output_addr_const (stream, x);
+ break;
+ case 'R':
+ fputs (reg_names[REGNO (x) + LSW], (stream));
+ break;
+ case 'S':
+ fputs (reg_names[REGNO (x) + MSW], (stream));
+ break;
+ case 'T':
+ /* Next word of a double. */
+ switch (GET_CODE (x))
+ {
+ case REG:
+ fputs (reg_names[REGNO (x) + 1], (stream));
+ break;
+ case MEM:
+ if (GET_CODE (XEXP (x, 0)) != PRE_DEC
+ && GET_CODE (XEXP (x, 0)) != POST_INC)
+ x = adj_offsettable_operand (x, 4);
+ print_operand_address (stream, XEXP (x, 0));
+ break;
+ }
+ break;
+ case 'o':
+ switch (GET_CODE (x))
+ {
+ case PLUS: fputs ("add", stream); break;
+ case MINUS: fputs ("sub", stream); break;
+ case MULT: fputs ("mul", stream); break;
+ case DIV: fputs ("div", stream); break;
+ }
+ break;
+ default:
+ switch (GET_CODE (x))
+ {
+ case REG:
+ if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG
+ && GET_MODE_SIZE (GET_MODE (x)) > 4)
+ fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
+ else
+ fputs (reg_names[REGNO (x)], (stream));
+ break;
+ case MEM:
+ output_address (XEXP (x, 0));
+ break;
+ default:
+ fputc ('#', stream);
+ output_addr_const (stream, x);
+ break;
+ }
+ break;
+ }
+}
+
+/* Emit code to perform a block move. Choose the best method.
+
+ OPERANDS[0] is the destination.
+ OPERANDS[1] is the source.
+ OPERANDS[2] is the size.
+ OPERANDS[3] is the alignment safe to use. */
+
+int
+expand_block_move (operands)
+ rtx *operands;
+{
+ int align = INTVAL (operands[3]);
+ int constp = (GET_CODE (operands[2]) == CONST_INT);
+ int bytes = (constp ? INTVAL (operands[2]) : 0);
+
+ /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
+ alignment, or if it isn't a multiple of 4 bytes, then fail. */
+ if (! constp || align < 4 || (bytes % 4 != 0))
+ return 0;
+
+ if (TARGET_HARD_SH4)
+ {
+ if (bytes < 12)
+ return 0;
+ else if (bytes == 12)
+ {
+ tree entry_name;
+ rtx func_addr_rtx;
+ rtx r4 = gen_rtx (REG, SImode, 4);
+ rtx r5 = gen_rtx (REG, SImode, 5);
+
+ entry_name = get_identifier ("__movstrSI12_i4");
+
+ func_addr_rtx
+ = copy_to_mode_reg (Pmode,
+ gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (entry_name)));
+ emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+ emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+ emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+ return 1;
+ }
+ else if (! TARGET_SMALLCODE)
+ {
+ tree entry_name;
+ rtx func_addr_rtx;
+ int dwords;
+ rtx r4 = gen_rtx (REG, SImode, 4);
+ rtx r5 = gen_rtx (REG, SImode, 5);
+ rtx r6 = gen_rtx (REG, SImode, 6);
+
+ entry_name = get_identifier (bytes & 4
+ ? "__movstr_i4_odd"
+ : "__movstr_i4_even");
+ func_addr_rtx
+ = copy_to_mode_reg (Pmode,
+ gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (entry_name)));
+ emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+ emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+
+ dwords = bytes >> 3;
+ emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+ return 1;
+ }
+ else
+ return 0;
+ }
+ if (bytes < 64)
+ {
+ char entry[30];
+ tree entry_name;
+ rtx func_addr_rtx;
+ rtx r4 = gen_rtx (REG, SImode, 4);
+ rtx r5 = gen_rtx (REG, SImode, 5);
+
+ sprintf (entry, "__movstrSI%d", bytes);
+ entry_name = get_identifier (entry);
+
+ func_addr_rtx
+ = copy_to_mode_reg (Pmode,
+ gen_rtx (SYMBOL_REF, Pmode,
+ IDENTIFIER_POINTER (entry_name)));
+ emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+ emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+ emit_insn (gen_block_move_real (func_addr_rtx));
+ return 1;
+ }
+
+ /* This is the same number of bytes as a memcpy call, but to a different
+ less common function name, so this will occasionally use more space. */
+ if (! TARGET_SMALLCODE)
+ {
+ tree entry_name;
+ rtx func_addr_rtx;
+ int final_switch, while_loop;
+ rtx r4 = gen_rtx (REG, SImode, 4);
+ rtx r5 = gen_rtx (REG, SImode, 5);
+ rtx r6 = gen_rtx (REG, SImode, 6);
+
+ entry_name = get_identifier ("__movstr");
+ func_addr_rtx
+ = copy_to_mode_reg (Pmode,
+ gen_rtx (SYMBOL_REF, Pmode,
+ IDENTIFIER_POINTER (entry_name)));
+ emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+ emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+
+ /* r6 controls the size of the move. 16 is decremented from it
+ for each 64 bytes moved. Then the negative bit left over is used
+ as an index into a list of move instructions. e.g., a 72 byte move
+ would be set up with size(r6) = 14, for one iteration through the
+ big while loop, and a switch of -2 for the last part. */
+
+ final_switch = 16 - ((bytes / 4) % 16);
+ while_loop = ((bytes / 4) / 16 - 1) * 16;
+ emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
+ emit_insn (gen_block_lump_real (func_addr_rtx));
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Prepare operands for a move define_expand; specifically, one of the
+ operands must be in a register. */
+
+int
+prepare_move_operands (operands, mode)
+ rtx operands[];
+ enum machine_mode mode;
+{
+ if (! reload_in_progress && ! reload_completed)
+ {
+ /* Copy the source to a register if both operands aren't registers. */
+ if (! register_operand (operands[0], mode)
+ && ! register_operand (operands[1], mode))
+ operands[1] = copy_to_mode_reg (mode, operands[1]);
+
+ /* This case can happen while generating code to move the result
+ of a library call to the target. Reject `st r0,@(rX,rY)' because
+ reload will fail to find a spill register for rX, since r0 is already
+ being used for the source. */
+ else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
+ && GET_CODE (operands[0]) == MEM
+ && GET_CODE (XEXP (operands[0], 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
+ operands[1] = copy_to_mode_reg (mode, operands[1]);
+ }
+
+ return 0;
+}
+
+/* Prepare the operands for an scc instruction; make sure that the
+ compare has been done. */
+rtx
+prepare_scc_operands (code)
+ enum rtx_code code;
+{
+ rtx t_reg = gen_rtx (REG, SImode, T_REG);
+ enum rtx_code oldcode = code;
+ enum machine_mode mode;
+
+ /* First need a compare insn. */
+ switch (code)
+ {
+ case NE:
+ /* It isn't possible to handle this case. */
+ abort ();
+ case LT:
+ code = GT;
+ break;
+ case LE:
+ code = GE;
+ break;
+ case LTU:
+ code = GTU;
+ break;
+ case LEU:
+ code = GEU;
+ break;
+ }
+ if (code != oldcode)
+ {
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ }
+
+ mode = GET_MODE (sh_compare_op0);
+ if (mode == VOIDmode)
+ mode = GET_MODE (sh_compare_op1);
+
+ sh_compare_op0 = force_reg (mode, sh_compare_op0);
+ if ((code != EQ && code != NE
+ && (sh_compare_op1 != const0_rtx
+ || code == GTU || code == GEU || code == LTU || code == LEU))
+ || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ sh_compare_op1 = force_reg (mode, sh_compare_op1);
+
+ if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ (mode == SFmode ? emit_sf_insn : emit_df_insn)
+ (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
+ gen_rtx (SET, VOIDmode, t_reg,
+ gen_rtx (code, SImode,
+ sh_compare_op0, sh_compare_op1)),
+ gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
+ else
+ emit_insn (gen_rtx (SET, VOIDmode, t_reg,
+ gen_rtx (code, SImode, sh_compare_op0,
+ sh_compare_op1)));
+
+ return t_reg;
+}
+
+/* Called from the md file, set up the operands of a compare instruction. */
+
+void
+from_compare (operands, code)
+ rtx *operands;
+ int code;
+{
+ enum machine_mode mode = GET_MODE (sh_compare_op0);
+ rtx insn;
+ if (mode == VOIDmode)
+ mode = GET_MODE (sh_compare_op1);
+ if (code != EQ
+ || mode == DImode
+ || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+ {
+ /* Force args into regs, since we can't use constants here. */
+ sh_compare_op0 = force_reg (mode, sh_compare_op0);
+ if (sh_compare_op1 != const0_rtx
+ || code == GTU || code == GEU
+ || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+ sh_compare_op1 = force_reg (mode, sh_compare_op1);
+ }
+ if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
+ {
+ from_compare (operands, GT);
+ insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
+ }
+ else
+ insn = gen_rtx (SET, VOIDmode,
+ gen_rtx (REG, SImode, 18),
+ gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
+ if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ insn = gen_rtx (PARALLEL, VOIDmode,
+ gen_rtvec (2, insn,
+ gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
+ (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
+ }
+ else
+ emit_insn (insn);
+}
+
+/* Functions to output assembly code. */
+
+/* Return a sequence of instructions to perform DI or DF move.
+
+ Since the SH cannot move a DI or DF in one instruction, we have
+ to take care when we see overlapping source and dest registers. */
+
+char *
+output_movedouble (insn, operands, mode)
+ rtx insn;
+ rtx operands[];
+ enum machine_mode mode;
+{
+ rtx dst = operands[0];
+ rtx src = operands[1];
+
+ if (GET_CODE (dst) == MEM
+ && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+ return "mov.l %T1,%0\n\tmov.l %1,%0";
+
+ if (register_operand (dst, mode)
+ && register_operand (src, mode))
+ {
+ if (REGNO (src) == MACH_REG)
+ return "sts mach,%S0\n\tsts macl,%R0";
+
+ /* When mov.d r1,r2 do r2->r3 then r1->r2;
+ when mov.d r1,r0 do r1->r0 then r2->r1. */
+
+ if (REGNO (src) + 1 == REGNO (dst))
+ return "mov %T1,%T0\n\tmov %1,%0";
+ else
+ return "mov %1,%0\n\tmov %T1,%T0";
+ }
+ else if (GET_CODE (src) == CONST_INT)
+ {
+ if (INTVAL (src) < 0)
+ output_asm_insn ("mov #-1,%S0", operands);
+ else
+ output_asm_insn ("mov #0,%S0", operands);
+
+ return "mov %1,%R0";
+ }
+ else if (GET_CODE (src) == MEM)
+ {
+ int ptrreg = -1;
+ int dreg = REGNO (dst);
+ rtx inside = XEXP (src, 0);
+
+ if (GET_CODE (inside) == REG)
+ ptrreg = REGNO (inside);
+ else if (GET_CODE (inside) == SUBREG)
+ ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside);
+ else if (GET_CODE (inside) == PLUS)
+ {
+ ptrreg = REGNO (XEXP (inside, 0));
+ /* ??? A r0+REG address shouldn't be possible here, because it isn't
+ an offsettable address. Unfortunately, offsettable addresses use
+ QImode to check the offset, and a QImode offsettable address
+ requires r0 for the other operand, which is not currently
+ supported, so we can't use the 'o' constraint.
+ Thus we must check for and handle r0+REG addresses here.
+ We punt for now, since this is likely very rare. */
+ if (GET_CODE (XEXP (inside, 1)) == REG)
+ abort ();
+ }
+ else if (GET_CODE (inside) == LABEL_REF)
+ return "mov.l %1,%0\n\tmov.l %1+4,%T0";
+ else if (GET_CODE (inside) == POST_INC)
+ return "mov.l %1,%0\n\tmov.l %1,%T0";
+ else
+ abort ();
+
+ /* Work out the safe way to copy. Copy into the second half first. */
+ if (dreg == ptrreg)
+ return "mov.l %T1,%T0\n\tmov.l %1,%0";
+ }
+
+ return "mov.l %1,%0\n\tmov.l %T1,%T0";
+}
+
+/* Print an instruction which would have gone into a delay slot after
+ another instruction, but couldn't because the other instruction expanded
+ into a sequence where putting the slot insn at the end wouldn't work. */
+
+static void
+print_slot (insn)
+ rtx insn;
+{
+ final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
+
+ INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
+}
+
+char *
+output_far_jump (insn, op)
+ rtx insn;
+ rtx op;
+{
+ struct { rtx lab, reg, op; } this;
+ char *jump;
+ int far;
+ int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)];
+
+ this.lab = gen_label_rtx ();
+
+ if (TARGET_SH2
+ && offset >= -32764
+ && offset - get_attr_length (insn) <= 32766)
+ {
+ far = 0;
+ jump = "mov.w %O0,%1;braf %1";
+ }
+ else
+ {
+ far = 1;
+ jump = "mov.l %O0,%1;jmp @%1";
+ }
+ /* If we have a scratch register available, use it. */
+ if (GET_CODE (PREV_INSN (insn)) == INSN
+ && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
+ {
+ this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
+ output_asm_insn (jump, &this.lab);
+ if (dbr_sequence_length ())
+ print_slot (final_sequence);
+ else
+ output_asm_insn ("nop", 0);
+ }
+ else
+ {
+ /* Output the delay slot insn first if any. */
+ if (dbr_sequence_length ())
+ print_slot (final_sequence);
+
+ this.reg = gen_rtx (REG, SImode, 13);
+ output_asm_insn ("mov.l r13,@-r15", 0);
+ output_asm_insn (jump, &this.lab);
+ output_asm_insn ("mov.l @r15+,r13", 0);
+ }
+ if (far)
+ output_asm_insn (".align 2", 0);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
+ this.op = op;
+ output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
+ return "";
+}
+
+/* Local label counter, used for constants in the pool and inside
+ pattern branches. */
+
+static int lf = 100;
+
+/* Output code for ordinary branches. */
+
+char *
+output_branch (logic, insn, operands)
+ int logic;
+ rtx insn;
+ rtx *operands;
+{
+ switch (get_attr_length (insn))
+ {
+ case 6:
+ /* This can happen if filling the delay slot has caused a forward
+ branch to exceed its range (we could reverse it, but only
+ when we know we won't overextend other branches; this should
+ best be handled by relaxation).
+ It can also happen when other condbranches hoist delay slot insn
+ from their destination, thus leading to code size increase.
+ But the branch will still be in the range -4092..+4098 bytes. */
+
+ if (! TARGET_RELAX)
+ {
+ int label = lf++;
+ /* The call to print_slot will clobber the operands. */
+ rtx op0 = operands[0];
+
+ /* If the instruction in the delay slot is annulled (true), then
+ there is no delay slot where we can put it now. The only safe
+ place for it is after the label. final will do that by default. */
+
+ if (final_sequence
+ && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+ {
+ asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+ ASSEMBLER_DIALECT ? "/" : ".", label);
+ print_slot (final_sequence);
+ }
+ else
+ asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
+
+ output_asm_insn ("bra\t%l0", &op0);
+ fprintf (asm_out_file, "\tnop\n");
+ ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
+
+ return "";
+ }
+ /* When relaxing, handle this like a short branch. The linker
+ will fix it up if it still doesn't fit after relaxation. */
+ case 2:
+ return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+ default:
+ abort ();
+ }
+}
+
+char *
+output_branchy_insn (code, template, insn, operands)
+ char *template;
+ enum rtx_code code;
+ rtx insn;
+ rtx *operands;
+{
+ rtx next_insn = NEXT_INSN (insn);
+ int label_nr;
+
+ if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
+ {
+ rtx src = SET_SRC (PATTERN (next_insn));
+ if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+ {
+ /* Following branch not taken */
+ operands[9] = gen_label_rtx ();
+ emit_label_after (operands[9], next_insn);
+ return template;
+ }
+ else
+ {
+ int offset = (branch_dest (next_insn)
+ - insn_addresses[INSN_UID (next_insn)] + 4);
+ if (offset >= -252 && offset <= 258)
+ {
+ if (GET_CODE (src) == IF_THEN_ELSE)
+ /* branch_true */
+ src = XEXP (src, 1);
+ operands[9] = src;
+ return template;
+ }
+ }
+ }
+ operands[9] = gen_label_rtx ();
+ emit_label_after (operands[9], insn);
+ return template;
+}
+
+char *
+output_ieee_ccmpeq (insn, operands)
+ rtx insn, operands;
+{
+ output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
+}
+
+/* Output to FILE the start of the assembler file. */
+
+void
+output_file_start (file)
+ FILE *file;
+{
+ register int pos;
+
+ output_file_directive (file, main_input_filename);
+
+ /* Switch to the data section so that the coffsem symbol and the
+ gcc2_compiled. symbol aren't in the text section. */
+ data_section ();
+
+ if (TARGET_LITTLE_ENDIAN)
+ fprintf (file, "\t.little\n");
+}
+
+/* Actual number of instructions used to make a shift by N. */
+static char ashiftrt_insns[] =
+ { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
+
+/* Left shift and logical right shift are the same. */
+static char shift_insns[] =
+ { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+/* Individual shift amounts needed to get the above length sequences.
+ One bit right shifts clobber the T bit, so when possible, put one bit
+ shifts in the middle of the sequence, so the ends are eligible for
+ branch delay slots. */
+static short shift_amounts[32][5] = {
+ {0}, {1}, {2}, {2, 1},
+ {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
+ {8}, {8, 1}, {8, 2}, {8, 1, 2},
+ {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
+ {16}, {16, 1}, {16, 2}, {16, 1, 2},
+ {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+ {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+ {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Likewise, but for shift amounts < 16, up to three highmost bits
+ might be clobbered. This is typically used when combined with some
+ kind of sign or zero extension. */
+
+static char ext_shift_insns[] =
+ { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+static short ext_shift_amounts[32][4] = {
+ {0}, {1}, {2}, {2, 1},
+ {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
+ {8}, {8, 1}, {8, 2}, {8, 1, 2},
+ {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
+ {16}, {16, 1}, {16, 2}, {16, 1, 2},
+ {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+ {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+ {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Assuming we have a value that has been sign-extended by at least one bit,
+ can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
+ to shift it by N without data loss, and quicker than by other means? */
+#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
+
+/* This is used in length attributes in sh.md to help compute the length
+ of arbitrary constant shift instructions. */
+
+int
+shift_insns_rtx (insn)
+ rtx insn;
+{
+ rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ int shift_count = INTVAL (XEXP (set_src, 1));
+ enum rtx_code shift_code = GET_CODE (set_src);
+
+ switch (shift_code)
+ {
+ case ASHIFTRT:
+ return ashiftrt_insns[shift_count];
+ case LSHIFTRT:
+ case ASHIFT:
+ return shift_insns[shift_count];
+ default:
+ abort();
+ }
+}
+
+/* Return the cost of a shift. */
+
+int
+shiftcosts (x)
+ rtx x;
+{
+ int value = INTVAL (XEXP (x, 1));
+
+ /* If shift by a non constant, then this will be expensive. */
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ return SH_DYNAMIC_SHIFT_COST;
+
+ /* Otherwise, return the true cost in instructions. */
+ if (GET_CODE (x) == ASHIFTRT)
+ {
+ int cost = ashiftrt_insns[value];
+ /* If SH3, then we put the constant in a reg and use shad. */
+ if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+ cost = 1 + SH_DYNAMIC_SHIFT_COST;
+ return cost;
+ }
+ else
+ return shift_insns[value];
+}
+
+/* Return the cost of an AND operation. */
+
+int
+andcosts (x)
+ rtx x;
+{
+ int i;
+
+ /* Anding with a register is a single cycle and instruction. */
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ return 1;
+
+ i = INTVAL (XEXP (x, 1));
+ /* These constants are single cycle extu.[bw] instructions. */
+ if (i == 0xff || i == 0xffff)
+ return 1;
+ /* Constants that can be used in an and immediate instruction is a single
+ cycle, but this requires r0, so make it a little more expensive. */
+ if (CONST_OK_FOR_L (i))
+ return 2;
+ /* Constants that can be loaded with a mov immediate and an and.
+ This case is probably unnecessary. */
+ if (CONST_OK_FOR_I (i))
+ return 2;
+ /* Any other constants requires a 2 cycle pc-relative load plus an and.
+ This case is probably unnecessary. */
+ return 3;
+}
+
+/* Return the cost of a multiply. */
+int
+multcosts (x)
+ rtx x;
+{
+ if (TARGET_SH2)
+ {
+ /* We have a mul insn, so we can never take more than the mul and the
+ read of the mac reg, but count more because of the latency and extra
+ reg usage. */
+ if (TARGET_SMALLCODE)
+ return 2;
+ return 3;
+ }
+
+ /* If we're aiming at small code, then just count the number of
+ insns in a multiply call sequence. */
+ if (TARGET_SMALLCODE)
+ return 5;
+
+ /* Otherwise count all the insns in the routine we'd be calling too. */
+ return 20;
+}
+
+/* Code to expand a shift. */
+
+void
+gen_ashift (type, n, reg)
+ int type;
+ int n;
+ rtx reg;
+{
+ /* Negative values here come from the shift_amounts array. */
+ if (n < 0)
+ {
+ if (type == ASHIFT)
+ type = LSHIFTRT;
+ else
+ type = ASHIFT;
+ n = -n;
+ }
+
+ switch (type)
+ {
+ case ASHIFTRT:
+ emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
+ break;
+ case LSHIFTRT:
+ if (n == 1)
+ emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
+ else
+ emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
+ break;
+ case ASHIFT:
+ emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n)));
+ break;
+ }
+}
+
+/* Same for HImode */
+
+void
+gen_ashift_hi (type, n, reg)
+ int type;
+ int n;
+ rtx reg;
+{
+ /* Negative values here come from the shift_amounts array. */
+ if (n < 0)
+ {
+ if (type == ASHIFT)
+ type = LSHIFTRT;
+ else
+ type = ASHIFT;
+ n = -n;
+ }
+
+ switch (type)
+ {
+ case ASHIFTRT:
+ case LSHIFTRT:
+ /* We don't have HImode right shift operations because using the
+ ordinary 32 bit shift instructions for that doesn't generate proper
+ zero/sign extension.
+ gen_ashift_hi is only called in contexts where we know that the
+ sign extension works out correctly. */
+ {
+ int word = 0;
+ if (GET_CODE (reg) == SUBREG)
+ {
+ word = SUBREG_WORD (reg);
+ reg = SUBREG_REG (reg);
+ }
+ gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
+ break;
+ }
+ case ASHIFT:
+ emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
+ break;
+ }
+}
+
+/* Output RTL to split a constant shift into its component SH constant
+ shift instructions. */
+
+int
+gen_shifty_op (code, operands)
+ int code;
+ rtx *operands;
+{
+ int value = INTVAL (operands[2]);
+ int max, i;
+
+ /* Truncate the shift count in case it is out of bounds. */
+ value = value & 0x1f;
+
+ if (value == 31)
+ {
+ if (code == LSHIFTRT)
+ {
+ emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+ emit_insn (gen_movt (operands[0]));
+ return;
+ }
+ else if (code == ASHIFT)
+ {
+ /* There is a two instruction sequence for 31 bit left shifts,
+ but it requires r0. */
+ if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
+ {
+ emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+ emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+ return;
+ }
+ }
+ }
+ else if (value == 0)
+ {
+ /* This can happen when not optimizing. We must output something here
+ to prevent the compiler from aborting in final.c after the try_split
+ call. */
+ emit_insn (gen_nop ());
+ return;
+ }
+
+ max = shift_insns[value];
+ for (i = 0; i < max; i++)
+ gen_ashift (code, shift_amounts[value][i], operands[0]);
+}
+
+/* Same as above, but optimized for values where the topmost bits don't
+ matter. */
+
+int
+gen_shifty_hi_op (code, operands)
+ int code;
+ rtx *operands;
+{
+ int value = INTVAL (operands[2]);
+ int max, i;
+ void (*gen_fun)();
+
+ /* This operation is used by and_shl for SImode values with a few
+ high bits known to be cleared. */
+ value &= 31;
+ if (value == 0)
+ {
+ emit_insn (gen_nop ());
+ return;
+ }
+
+ gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
+ if (code == ASHIFT)
+ {
+ max = ext_shift_insns[value];
+ for (i = 0; i < max; i++)
+ gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+ }
+ else
+ /* When shifting right, emit the shifts in reverse order, so that
+ solitary negative values come first. */
+ for (i = ext_shift_insns[value] - 1; i >= 0; i--)
+ gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+}
+
+/* Output RTL for an arithmetic right shift. */
+
+/* ??? Rewrite to use super-optimizer sequences. */
+
+int
+expand_ashiftrt (operands)
+ rtx *operands;
+{
+ rtx wrk;
+ char func[18];
+ tree func_name;
+ int value;
+
+ if (TARGET_SH3)
+ {
+ if (GET_CODE (operands[2]) != CONST_INT)
+ {
+ rtx count = copy_to_mode_reg (SImode, operands[2]);
+ emit_insn (gen_negsi2 (count, count));
+ emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+ return 1;
+ }
+ else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+ > 1 + SH_DYNAMIC_SHIFT_COST)
+ {
+ rtx count
+ = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
+ emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+ return 1;
+ }
+ }
+ if (GET_CODE (operands[2]) != CONST_INT)
+ return 0;
+
+ value = INTVAL (operands[2]) & 31;
+
+ if (value == 31)
+ {
+ emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
+ return 1;
+ }
+ else if (value >= 16 && value <= 19)
+ {
+ wrk = gen_reg_rtx (SImode);
+ emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
+ value -= 16;
+ while (value--)
+ gen_ashift (ASHIFTRT, 1, wrk);
+ emit_move_insn (operands[0], wrk);
+ return 1;
+ }
+ /* Expand a short sequence inline, longer call a magic routine. */
+ else if (value <= 5)
+ {
+ wrk = gen_reg_rtx (SImode);
+ emit_move_insn (wrk, operands[1]);
+ while (value--)
+ gen_ashift (ASHIFTRT, 1, wrk);
+ emit_move_insn (operands[0], wrk);
+ return 1;
+ }
+
+ wrk = gen_reg_rtx (Pmode);
+
+ /* Load the value into an arg reg and call a helper. */
+ emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
+ sprintf (func, "__ashiftrt_r4_%d", value);
+ func_name = get_identifier (func);
+ emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode,
+ IDENTIFIER_POINTER (func_name)));
+ emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+ emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
+ return 1;
+}
+
+int sh_dynamicalize_shift_p (count)
+ rtx count;
+{
+ return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
+}
+
+/* Try to find a good way to implement the combiner pattern
+ [(set (match_operand:SI 0 "register_operand" "r")
+ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n"))) .
+ LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
+ return 0 for simple right / left or left/right shift combination.
+ return 1 for a combination of shifts with zero_extend.
+ return 2 for a combination of shifts with an AND that needs r0.
+ return 3 for a combination of shifts with an AND that needs an extra
+ scratch register, when the three highmost bits of the AND mask are clear.
+ return 4 for a combination of shifts with an AND that needs an extra
+ scratch register, when any of the three highmost bits of the AND mask
+ is set.
+ If ATTRP is set, store an initial right shift width in ATTRP[0],
+ and the instruction length in ATTRP[1] . These values are not valid
+ when returning 0.
+ When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
+ shift_amounts for the last shift value that is to be used before the
+ sign extend. */
+int
+shl_and_kind (left_rtx, mask_rtx, attrp)
+ rtx left_rtx, mask_rtx;
+ int *attrp;
+{
+ unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
+ int left = INTVAL (left_rtx), right;
+ int best = 0;
+ int cost, best_cost = 10000;
+ int best_right = 0, best_len = 0;
+ int i;
+ int can_ext;
+
+ if (left < 0 || left > 31)
+ return 0;
+ if (GET_CODE (mask_rtx) == CONST_INT)
+ mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
+ else
+ mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
+ /* Can this be expressed as a right shift / left shift pair ? */
+ lsb = ((mask ^ (mask - 1)) >> 1) + 1;
+ right = exact_log2 (lsb);
+ mask2 = ~(mask + lsb - 1);
+ lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
+ /* mask has no zeroes but trailing zeroes <==> ! mask2 */
+ if (! mask2)
+ best_cost = shift_insns[right] + shift_insns[right + left];
+ /* mask has no trailing zeroes <==> ! right */
+ else if (! right && mask2 == ~(lsb2 - 1))
+ {
+ int late_right = exact_log2 (lsb2);
+ best_cost = shift_insns[left + late_right] + shift_insns[late_right];
+ }
+ /* Try to use zero extend */
+ if (mask2 == ~(lsb2 - 1))
+ {
+ int width, first;
+
+ for (width = 8; width <= 16; width += 8)
+ {
+ /* Can we zero-extend right away? */
+ if (lsb2 == (HOST_WIDE_INT)1 << width)
+ {
+ cost
+ = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
+ if (cost < best_cost)
+ {
+ best = 1;
+ best_cost = cost;
+ best_right = right;
+ best_len = cost;
+ if (attrp)
+ attrp[2] = -1;
+ }
+ continue;
+ }
+ /* ??? Could try to put zero extend into initial right shift,
+ or even shift a bit left before the right shift. */
+ /* Determine value of first part of left shift, to get to the
+ zero extend cut-off point. */
+ first = width - exact_log2 (lsb2) + right;
+ if (first >= 0 && right + left - first >= 0)
+ {
+ cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
+ + ext_shift_insns[right + left - first];
+ if (cost < best_cost)
+ {
+ best = 1;
+ best_cost = cost;
+ best_right = right;
+ best_len = cost;
+ if (attrp)
+ attrp[2] = first;
+ }
+ }
+ }
+ }
+ /* Try to use r0 AND pattern */
+ for (i = 0; i <= 2; i++)
+ {
+ if (i > right)
+ break;
+ if (! CONST_OK_FOR_L (mask >> i))
+ continue;
+ cost = (i != 0) + 2 + ext_shift_insns[left + i];
+ if (cost < best_cost)
+ {
+ best = 2;
+ best_cost = cost;
+ best_right = i;
+ best_len = cost - 1;
+ }
+ }
+ /* Try to use a scratch register to hold the AND operand. */
+ can_ext = ((mask << left) & 0xe0000000) == 0;
+ for (i = 0; i <= 2; i++)
+ {
+ if (i > right)
+ break;
+ cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
+ + (can_ext ? ext_shift_insns : shift_insns)[left + i];
+ if (cost < best_cost)
+ {
+ best = 4 - can_ext;
+ best_cost = cost;
+ best_right = i;
+ best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
+ }
+ }
+
+ if (attrp)
+ {
+ attrp[0] = best_right;
+ attrp[1] = best_len;
+ }
+ return best;
+}
+
+/* This is used in length attributes of the unnamed instructions
+ corresponding to shl_and_kind return values of 1 and 2. */
+int
+shl_and_length (insn)
+ rtx insn;
+{
+ rtx set_src, left_rtx, mask_rtx;
+ int attributes[3];
+
+ set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ left_rtx = XEXP (XEXP (set_src, 0), 1);
+ mask_rtx = XEXP (set_src, 1);
+ shl_and_kind (left_rtx, mask_rtx, attributes);
+ return attributes[1];
+}
+
+/* This is used in length attribute of the and_shl_scratch instruction. */
+
+int
+shl_and_scr_length (insn)
+ rtx insn;
+{
+ rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ int len = shift_insns[INTVAL (XEXP (set_src, 1))];
+ rtx op = XEXP (set_src, 0);
+ len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
+ op = XEXP (XEXP (op, 0), 0);
+ return len + shift_insns[INTVAL (XEXP (op, 1))];
+}
+
+/* Generating rtl? */
+extern int rtx_equal_function_value_matters;
+
+/* Generate rtl for instructions for which shl_and_kind advised a particular
+ method of generating them, i.e. returned zero. */
+
+int
+gen_shl_and (dest, left_rtx, mask_rtx, source)
+ rtx dest, left_rtx, mask_rtx, source;
+{
+ int attributes[3];
+ unsigned HOST_WIDE_INT mask;
+ int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
+ int right, total_shift;
+ int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op;
+
+ right = attributes[0];
+ total_shift = INTVAL (left_rtx) + right;
+ mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
+ switch (kind)
+ {
+ default:
+ return -1;
+ case 1:
+ {
+ int first = attributes[2];
+ rtx operands[3];
+
+ if (first < 0)
+ {
+ emit_insn ((mask << right) <= 0xff
+ ? gen_zero_extendqisi2(dest,
+ gen_lowpart (QImode, source))
+ : gen_zero_extendhisi2(dest,
+ gen_lowpart (HImode, source)));
+ source = dest;
+ }
+ if (source != dest)
+ emit_insn (gen_movsi (dest, source));
+ operands[0] = dest;
+ if (right)
+ {
+ operands[2] = GEN_INT (right);
+ gen_shifty_hi_op (LSHIFTRT, operands);
+ }
+ if (first > 0)
+ {
+ operands[2] = GEN_INT (first);
+ gen_shifty_hi_op (ASHIFT, operands);
+ total_shift -= first;
+ mask <<= first;
+ }
+ if (first >= 0)
+ emit_insn (mask <= 0xff
+ ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
+ : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
+ if (total_shift > 0)
+ {
+ operands[2] = GEN_INT (total_shift);
+ gen_shifty_hi_op (ASHIFT, operands);
+ }
+ break;
+ }
+ case 4:
+ shift_gen_fun = gen_shifty_op;
+ case 3:
+ /* If the topmost bit that matters is set, set the topmost bits
+ that don't matter. This way, we might be able to get a shorter
+ signed constant. */
+ if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift))
+ mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
+ case 2:
+ /* Don't expand fine-grained when combining, because that will
+ make the pattern fail. */
+ if (rtx_equal_function_value_matters
+ || reload_in_progress || reload_completed)
+ {
+ rtx operands[3];
+
+ /* Cases 3 and 4 should be handled by this split
+ only while combining */
+ if (kind > 2)
+ abort ();
+ if (right)
+ {
+ emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
+ source = dest;
+ }
+ emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
+ if (total_shift)
+ {
+ operands[0] = dest;
+ operands[1] = dest;
+ operands[2] = GEN_INT (total_shift);
+ shift_gen_fun (ASHIFT, operands);
+ }
+ break;
+ }
+ else
+ {
+ int neg = 0;
+ if (kind != 4 && total_shift < 16)
+ {
+ neg = -ext_shift_amounts[total_shift][1];
+ if (neg > 0)
+ neg -= ext_shift_amounts[total_shift][2];
+ else
+ neg = 0;
+ }
+ emit_insn (gen_and_shl_scratch (dest, source,
+ GEN_INT (right),
+ GEN_INT (mask),
+ GEN_INT (total_shift + neg),
+ GEN_INT (neg)));
+ emit_insn (gen_movsi (dest, dest));
+ break;
+ }
+ }
+ return 0;
+}
+
+/* Try to find a good way to implement the combiner pattern
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n")
+ (match_operand:SI 3 "const_int_operand" "n")
+ (const_int 0)))
+ (clobber (reg:SI 18))]
+ LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
+ return 0 for simple left / right shift combination.
+ return 1 for left shift / 8 bit sign extend / left shift.
+ return 2 for left shift / 16 bit sign extend / left shift.
+ return 3 for left shift / 8 bit sign extend / shift / sign extend.
+ return 4 for left shift / 16 bit sign extend / shift / sign extend.
+ return 5 for left shift / 16 bit sign extend / right shift
+ return 6 for < 8 bit sign extend / left shift.
+ return 7 for < 8 bit sign extend / left shift / single right shift.
+ If COSTP is nonzero, assign the calculated cost to *COSTP. */
+
+int
+shl_sext_kind (left_rtx, size_rtx, costp)
+ rtx left_rtx, size_rtx;
+ int *costp;
+{
+ int left, size, insize, ext;
+ int cost, best_cost;
+ int kind;
+
+ left = INTVAL (left_rtx);
+ size = INTVAL (size_rtx);
+ insize = size - left;
+ if (insize <= 0)
+ abort ();
+ /* Default to left / right shift. */
+ kind = 0;
+ best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
+ if (size <= 16)
+ {
+ /* 16 bit shift / sign extend / 16 bit shift */
+ cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
+ /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
+ below, by alternative 3 or something even better. */
+ if (cost < best_cost)
+ {
+ kind = 5;
+ best_cost = cost;
+ }
+ }
+ /* Try a plain sign extend between two shifts. */
+ for (ext = 16; ext >= insize; ext -= 8)
+ {
+ if (ext <= size)
+ {
+ cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
+ if (cost < best_cost)
+ {
+ kind = ext / 8U;
+ best_cost = cost;
+ }
+ }
+ /* Check if we can do a sloppy shift with a final signed shift
+ restoring the sign. */
+ if (EXT_SHIFT_SIGNED (size - ext))
+ cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
+ /* If not, maybe it's still cheaper to do the second shift sloppy,
+ and do a final sign extend? */
+ else if (size <= 16)
+ cost = ext_shift_insns[ext - insize] + 1
+ + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
+ else
+ continue;
+ if (cost < best_cost)
+ {
+ kind = ext / 8U + 2;
+ best_cost = cost;
+ }
+ }
+ /* Check if we can sign extend in r0 */
+ if (insize < 8)
+ {
+ cost = 3 + shift_insns[left];
+ if (cost < best_cost)
+ {
+ kind = 6;
+ best_cost = cost;
+ }
+ /* Try the same with a final signed shift. */
+ if (left < 31)
+ {
+ cost = 3 + ext_shift_insns[left + 1] + 1;
+ if (cost < best_cost)
+ {
+ kind = 7;
+ best_cost = cost;
+ }
+ }
+ }
+ if (TARGET_SH3)
+ {
+ /* Try to use a dynamic shift. */
+ cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
+ if (cost < best_cost)
+ {
+ kind = 0;
+ best_cost = cost;
+ }
+ }
+ if (costp)
+ *costp = cost;
+ return kind;
+}
+
+/* Function to be used in the length attribute of the instructions
+ implementing this pattern. */
+
+int
+shl_sext_length (insn)
+ rtx insn;
+{
+ rtx set_src, left_rtx, size_rtx;
+ int cost;
+
+ set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ left_rtx = XEXP (XEXP (set_src, 0), 1);
+ size_rtx = XEXP (set_src, 1);
+ shl_sext_kind (left_rtx, size_rtx, &cost);
+ return cost;
+}
+
+/* Generate rtl for this pattern */
+
+int
+gen_shl_sext (dest, left_rtx, size_rtx, source)
+ rtx dest, left_rtx, size_rtx, source;
+{
+ int kind;
+ int left, size, insize, cost;
+ rtx operands[3];
+
+ kind = shl_sext_kind (left_rtx, size_rtx, &cost);
+ left = INTVAL (left_rtx);
+ size = INTVAL (size_rtx);
+ insize = size - left;
+ switch (kind)
+ {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ {
+ int ext = kind & 1 ? 8 : 16;
+ int shift2 = size - ext;
+
+ /* Don't expand fine-grained when combining, because that will
+ make the pattern fail. */
+ if (! rtx_equal_function_value_matters
+ && ! reload_in_progress && ! reload_completed)
+ {
+ emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+ emit_insn (gen_movsi (dest, source));
+ break;
+ }
+ if (dest != source)
+ emit_insn (gen_movsi (dest, source));
+ operands[0] = dest;
+ if (ext - insize)
+ {
+ operands[2] = GEN_INT (ext - insize);
+ gen_shifty_hi_op (ASHIFT, operands);
+ }
+ emit_insn (kind & 1
+ ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
+ : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
+ if (kind <= 2)
+ {
+ if (shift2)
+ {
+ operands[2] = GEN_INT (shift2);
+ gen_shifty_op (ASHIFT, operands);
+ }
+ }
+ else
+ {
+ if (shift2 > 0)
+ {
+ if (EXT_SHIFT_SIGNED (shift2))
+ {
+ operands[2] = GEN_INT (shift2 + 1);
+ gen_shifty_op (ASHIFT, operands);
+ operands[2] = GEN_INT (1);
+ gen_shifty_op (ASHIFTRT, operands);
+ break;
+ }
+ operands[2] = GEN_INT (shift2);
+ gen_shifty_hi_op (ASHIFT, operands);
+ }
+ else if (shift2)
+ {
+ operands[2] = GEN_INT (-shift2);
+ gen_shifty_hi_op (LSHIFTRT, operands);
+ }
+ emit_insn (size <= 8
+ ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+ : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+ }
+ break;
+ }
+ case 5:
+ {
+ int i = 16 - size;
+ if (! rtx_equal_function_value_matters
+ && ! reload_in_progress && ! reload_completed)
+ emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+ else
+ {
+ operands[0] = dest;
+ operands[2] = GEN_INT (16 - insize);
+ gen_shifty_hi_op (ASHIFT, operands);
+ emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+ }
+ /* Don't use gen_ashrsi3 because it generates new pseudos. */
+ while (--i >= 0)
+ gen_ashift (ASHIFTRT, 1, dest);
+ break;
+ }
+ case 6:
+ case 7:
+ /* Don't expand fine-grained when combining, because that will
+ make the pattern fail. */
+ if (! rtx_equal_function_value_matters
+ && ! reload_in_progress && ! reload_completed)
+ {
+ emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+ emit_insn (gen_movsi (dest, source));
+ break;
+ }
+ emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
+ emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
+ operands[0] = dest;
+ operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
+ gen_shifty_op (ASHIFT, operands);
+ if (kind == 7)
+ emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/* The SH cannot load a large constant into a register, constants have to
+ come from a pc relative load. The reference of a pc relative load
+ instruction must be less than 1k infront of the instruction. This
+ means that we often have to dump a constant inside a function, and
+ generate code to branch around it.
+
+ It is important to minimize this, since the branches will slow things
+ down and make things bigger.
+
+ Worst case code looks like:
+
+ mov.l L1,rn
+ bra L2
+ nop
+ align
+ L1: .long value
+ L2:
+ ..
+
+ mov.l L3,rn
+ bra L4
+ nop
+ align
+ L3: .long value
+ L4:
+ ..
+
+ We fix this by performing a scan before scheduling, which notices which
+ instructions need to have their operands fetched from the constant table
+ and builds the table.
+
+ The algorithm is:
+
+ scan, find an instruction which needs a pcrel move. Look forward, find the
+ last barrier which is within MAX_COUNT bytes of the requirement.
+ If there isn't one, make one. Process all the instructions between
+ the find and the barrier.
+
+ In the above example, we can tell that L3 is within 1k of L1, so
+ the first move can be shrunk from the 3 insn+constant sequence into
+ just 1 insn, and the constant moved to L3 to make:
+
+ mov.l L1,rn
+ ..
+ mov.l L3,rn
+ bra L4
+ nop
+ align
+ L3:.long value
+ L4:.long value
+
+ Then the second move becomes the target for the shortening process. */
+
+typedef struct
+{
+ rtx value; /* Value in table. */
+ rtx label; /* Label of value. */
+ enum machine_mode mode; /* Mode of value. */
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+ the pc relative range is 0...1020 bytes and constants are at least 4
+ bytes long. */
+
+#define MAX_POOL_SIZE (1020/4)
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+
+/* ??? If we need a constant in HImode which is the truncated value of a
+ constant we need in SImode, we could combine the two entries thus saving
+ two bytes. Is this common enough to be worth the effort of implementing
+ it? */
+
+/* ??? This stuff should be done at the same time that we shorten branches.
+ As it is now, we must assume that all branches are the maximum size, and
+ this causes us to almost always output constant pools sooner than
+ necessary. */
+
+/* Add a constant to the pool and return its label. */
+
+static rtx
+add_constant (x, mode, last_value)
+ rtx last_value;
+ rtx x;
+ enum machine_mode mode;
+{
+ int i;
+ rtx lab;
+
+ /* First see if we've already got it. */
+ for (i = 0; i < pool_size; i++)
+ {
+ if (x->code == pool_vector[i].value->code
+ && mode == pool_vector[i].mode)
+ {
+ if (x->code == CODE_LABEL)
+ {
+ if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
+ continue;
+ }
+ if (rtx_equal_p (x, pool_vector[i].value))
+ {
+ lab = 0;
+ if (! last_value
+ || ! i
+ || ! rtx_equal_p (last_value, pool_vector[i-1].value))
+ {
+ lab = pool_vector[i].label;
+ if (! lab)
+ pool_vector[i].label = lab = gen_label_rtx ();
+ }
+ return lab;
+ }
+ }
+ }
+
+ /* Need a new one. */
+ pool_vector[pool_size].value = x;
+ if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
+ lab = 0;
+ else
+ lab = gen_label_rtx ();
+ pool_vector[pool_size].mode = mode;
+ pool_vector[pool_size].label = lab;
+ pool_size++;
+ return lab;
+}
+
+/* Output the literal table. */
+
+static void
+dump_table (scan)
+ rtx scan;
+{
+ int i;
+ int need_align = 1;
+
+ /* Do two passes, first time dump out the HI sized constants. */
+
+ for (i = 0; i < pool_size; i++)
+ {
+ pool_node *p = &pool_vector[i];
+
+ if (p->mode == HImode)
+ {
+ if (need_align)
+ {
+ scan = emit_insn_after (gen_align_2 (), scan);
+ need_align = 0;
+ }
+ scan = emit_label_after (p->label, scan);
+ scan = emit_insn_after (gen_consttable_2 (p->value), scan);
+ }
+ }
+
+ need_align = 1;
+
+ for (i = 0; i < pool_size; i++)
+ {
+ pool_node *p = &pool_vector[i];
+
+ switch (p->mode)
+ {
+ case HImode:
+ break;
+ case SImode:
+ case SFmode:
+ if (need_align)
+ {
+ need_align = 0;
+ scan = emit_label_after (gen_label_rtx (), scan);
+ scan = emit_insn_after (gen_align_4 (), scan);
+ }
+ if (p->label)
+ scan = emit_label_after (p->label, scan);
+ scan = emit_insn_after (gen_consttable_4 (p->value), scan);
+ break;
+ case DFmode:
+ case DImode:
+ if (need_align)
+ {
+ need_align = 0;
+ scan = emit_label_after (gen_label_rtx (), scan);
+ scan = emit_insn_after (gen_align_4 (), scan);
+ }
+ if (p->label)
+ scan = emit_label_after (p->label, scan);
+ scan = emit_insn_after (gen_consttable_8 (p->value), scan);
+ break;
+ default:
+ abort ();
+ break;
+ }
+ }
+
+ scan = emit_insn_after (gen_consttable_end (), scan);
+ scan = emit_barrier_after (scan);
+ pool_size = 0;
+}
+
+/* Return non-zero if constant would be an ok source for a
+ mov.w instead of a mov.l. */
+
+static int
+hi_const (src)
+ rtx src;
+{
+ return (GET_CODE (src) == CONST_INT
+ && INTVAL (src) >= -32768
+ && INTVAL (src) <= 32767);
+}
+
+/* Non-zero if the insn is a move instruction which needs to be fixed. */
+
+/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
+ CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
+ need to fix it if the input value is CONST_OK_FOR_I. */
+
+static int
+broken_move (insn)
+ rtx insn;
+{
+ if (GET_CODE (insn) == INSN)
+ {
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ if (GET_CODE (pat) == SET
+ /* We can load any 8 bit value if we don't care what the high
+ order bits end up as. */
+ && GET_MODE (SET_DEST (pat)) != QImode
+ && CONSTANT_P (SET_SRC (pat))
+ && ! (TARGET_SH3E
+ && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+ && (fp_zero_operand (SET_SRC (pat))
+ || fp_one_operand (SET_SRC (pat)))
+ && GET_CODE (SET_DEST (pat)) == REG
+ && REGNO (SET_DEST (pat)) >= FIRST_FP_REG
+ && REGNO (SET_DEST (pat)) <= LAST_FP_REG)
+ && (GET_CODE (SET_SRC (pat)) != CONST_INT
+ || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+mova_p (insn)
+ rtx insn;
+{
+ return (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+ && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
+}
+
+/* Find the last barrier from insn FROM which is close enough to hold the
+ constant pool. If we can't find one, then create one near the end of
+ the range. */
+
+static rtx
+find_barrier (num_mova, mova, from)
+ int num_mova;
+ rtx mova, from;
+{
+ int count_si = 0;
+ int count_hi = 0;
+ int found_hi = 0;
+ int found_si = 0;
+ int hi_align = 2;
+ int si_align = 2;
+ int leading_mova = num_mova;
+ rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
+ int si_limit;
+ int hi_limit;
+
+ /* For HImode: range is 510, add 4 because pc counts from address of
+ second instruction after this one, subtract 2 for the jump instruction
+ that we may need to emit before the table, subtract 2 for the instruction
+ that fills the jump delay slot (in very rare cases, reorg will take an
+ instruction from after the constant pool or will leave the delay slot
+ empty). This gives 510.
+ For SImode: range is 1020, add 4 because pc counts from address of
+ second instruction after this one, subtract 2 in case pc is 2 byte
+ aligned, subtract 2 for the jump instruction that we may need to emit
+ before the table, subtract 2 for the instruction that fills the jump
+ delay slot. This gives 1018. */
+
+ /* The branch will always be shortened now that the reference address for
+ forward branches is the successor address, thus we need no longer make
+ adjustments to the [sh]i_limit for -O0. */
+
+ si_limit = 1018;
+ hi_limit = 510;
+
+ while (from && count_si < si_limit && count_hi < hi_limit)
+ {
+ int inc = get_attr_length (from);
+ int new_align = 1;
+
+ if (GET_CODE (from) == CODE_LABEL)
+ {
+ if (optimize)
+ new_align = 1 << label_to_alignment (from);
+ else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
+ new_align = 1 << barrier_align (from);
+ else
+ new_align = 1;
+ inc = 0;
+ }
+
+ if (GET_CODE (from) == BARRIER)
+ {
+
+ found_barrier = from;
+
+ /* If we are at the end of the function, or in front of an alignment
+ instruction, we need not insert an extra alignment. We prefer
+ this kind of barrier. */
+ if (barrier_align (from) > 2)
+ good_barrier = from;
+ }
+
+ if (broken_move (from))
+ {
+ rtx pat, src, dst;
+ enum machine_mode mode;
+
+ pat = PATTERN (from);
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ src = SET_SRC (pat);
+ dst = SET_DEST (pat);
+ mode = GET_MODE (dst);
+
+ /* We must explicitly check the mode, because sometimes the
+ front end will generate code to load unsigned constants into
+ HImode targets without properly sign extending them. */
+ if (mode == HImode
+ || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
+ {
+ found_hi += 2;
+ /* We put the short constants before the long constants, so
+ we must count the length of short constants in the range
+ for the long constants. */
+ /* ??? This isn't optimal, but is easy to do. */
+ si_limit -= 2;
+ }
+ else
+ {
+ while (si_align > 2 && found_si + si_align - 2 > count_si)
+ si_align >>= 1;
+ if (found_si > count_si)
+ count_si = found_si;
+ found_si += GET_MODE_SIZE (mode);
+ if (num_mova)
+ si_limit -= GET_MODE_SIZE (mode);
+ }
+ }
+
+ if (mova_p (from))
+ {
+ if (! num_mova++)
+ {
+ leading_mova = 0;
+ mova = from;
+ barrier_before_mova = good_barrier ? good_barrier : found_barrier;
+ }
+ if (found_si > count_si)
+ count_si = found_si;
+ }
+ else if (GET_CODE (from) == JUMP_INSN
+ && (GET_CODE (PATTERN (from)) == ADDR_VEC
+ || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
+ {
+ if (num_mova)
+ num_mova--;
+ if (barrier_align (next_real_insn (from)) == CACHE_LOG)
+ {
+ /* We have just passed the barrier in front of the
+ ADDR_DIFF_VEC, which is stored in found_barrier. Since
+ the ADDR_DIFF_VEC is accessed as data, just like our pool
+ constants, this is a good opportunity to accommodate what
+ we have gathered so far.
+ If we waited any longer, we could end up at a barrier in
+ front of code, which gives worse cache usage for separated
+ instruction / data caches. */
+ good_barrier = found_barrier;
+ break;
+ }
+ else
+ {
+ rtx body = PATTERN (from);
+ inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
+ }
+ }
+
+ if (found_si)
+ {
+ if (new_align > si_align)
+ {
+ si_limit -= count_si - 1 & new_align - si_align;
+ si_align = new_align;
+ }
+ count_si = count_si + new_align - 1 & -new_align;
+ count_si += inc;
+ }
+ if (found_hi)
+ {
+ if (new_align > hi_align)
+ {
+ hi_limit -= count_hi - 1 & new_align - hi_align;
+ hi_align = new_align;
+ }
+ count_hi = count_hi + new_align - 1 & -new_align;
+ count_hi += inc;
+ }
+ from = NEXT_INSN (from);
+ }
+
+ if (num_mova)
+ if (leading_mova)
+ {
+ /* Try as we might, the leading mova is out of range. Change
+ it into a load (which will become a pcload) and retry. */
+ SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+ INSN_CODE (mova) = -1;
+ return find_barrier (0, 0, mova);
+ }
+ else
+ {
+ /* Insert the constant pool table before the mova instruction,
+ to prevent the mova label reference from going out of range. */
+ from = mova;
+ good_barrier = found_barrier = barrier_before_mova;
+ }
+
+ if (found_barrier)
+ {
+ if (good_barrier && next_real_insn (found_barrier))
+ found_barrier = good_barrier;
+ }
+ else
+ {
+ /* We didn't find a barrier in time to dump our stuff,
+ so we'll make one. */
+ rtx label = gen_label_rtx ();
+
+ /* If we exceeded the range, then we must back up over the last
+ instruction we looked at. Otherwise, we just need to undo the
+ NEXT_INSN at the end of the loop. */
+ if (count_hi > hi_limit || count_si > si_limit)
+ from = PREV_INSN (PREV_INSN (from));
+ else
+ from = PREV_INSN (from);
+
+ /* Walk back to be just before any jump or label.
+ Putting it before a label reduces the number of times the branch
+ around the constant pool table will be hit. Putting it before
+ a jump makes it more likely that the bra delay slot will be
+ filled. */
+ while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
+ || GET_CODE (from) == CODE_LABEL)
+ from = PREV_INSN (from);
+
+ from = emit_jump_insn_after (gen_jump (label), from);
+ JUMP_LABEL (from) = label;
+ LABEL_NUSES (label) = 1;
+ found_barrier = emit_barrier_after (from);
+ emit_label_after (label, found_barrier);
+ }
+
+ return found_barrier;
+}
+
+/* If the instruction INSN is implemented by a special function, and we can
+ positively find the register that is used to call the sfunc, and this
+ register is not used anywhere else in this instruction - except as the
+ destination of a set, return this register; else, return 0. */
+rtx
+sfunc_uses_reg (insn)
+ rtx insn;
+{
+ int i;
+ rtx pattern, part, reg_part, reg;
+
+ if (GET_CODE (insn) != INSN)
+ return 0;
+ pattern = PATTERN (insn);
+ if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
+ return 0;
+
+ for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+ {
+ part = XVECEXP (pattern, 0, i);
+ if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
+ reg_part = part;
+ }
+ if (! reg_part)
+ return 0;
+ reg = XEXP (reg_part, 0);
+ for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+ {
+ part = XVECEXP (pattern, 0, i);
+ if (part == reg_part || GET_CODE (part) == CLOBBER)
+ continue;
+ if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
+ && GET_CODE (SET_DEST (part)) == REG)
+ ? SET_SRC (part) : part)))
+ return 0;
+ }
+ return reg;
+}
+
+/* See if the only way in which INSN uses REG is by calling it, or by
+ setting it while calling it. Set *SET to a SET rtx if the register
+ is set by INSN. */
+
+static int
+noncall_uses_reg (reg, insn, set)
+ rtx reg;
+ rtx insn;
+ rtx *set;
+{
+ rtx pattern, reg2;
+
+ *set = NULL_RTX;
+
+ reg2 = sfunc_uses_reg (insn);
+ if (reg2 && REGNO (reg2) == REGNO (reg))
+ {
+ pattern = single_set (insn);
+ if (pattern
+ && GET_CODE (SET_DEST (pattern)) == REG
+ && REGNO (reg) == REGNO (SET_DEST (pattern)))
+ *set = pattern;
+ return 0;
+ }
+ if (GET_CODE (insn) != CALL_INSN)
+ {
+ /* We don't use rtx_equal_p because we don't care if the mode is
+ different. */
+ pattern = single_set (insn);
+ if (pattern
+ && GET_CODE (SET_DEST (pattern)) == REG
+ && REGNO (reg) == REGNO (SET_DEST (pattern)))
+ {
+ rtx par, part;
+ int i;
+
+ *set = pattern;
+ par = PATTERN (insn);
+ if (GET_CODE (par) == PARALLEL)
+ for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+ {
+ part = XVECEXP (par, 0, i);
+ if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
+ return 1;
+ }
+ return reg_mentioned_p (reg, SET_SRC (pattern));
+ }
+
+ return 1;
+ }
+
+ pattern = PATTERN (insn);
+
+ if (GET_CODE (pattern) == PARALLEL)
+ {
+ int i;
+
+ for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+ if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
+ return 1;
+ pattern = XVECEXP (pattern, 0, 0);
+ }
+
+ if (GET_CODE (pattern) == SET)
+ {
+ if (reg_mentioned_p (reg, SET_DEST (pattern)))
+ {
+ /* We don't use rtx_equal_p, because we don't care if the
+ mode is different. */
+ if (GET_CODE (SET_DEST (pattern)) != REG
+ || REGNO (reg) != REGNO (SET_DEST (pattern)))
+ return 1;
+
+ *set = pattern;
+ }
+
+ pattern = SET_SRC (pattern);
+ }
+
+ if (GET_CODE (pattern) != CALL
+ || GET_CODE (XEXP (pattern, 0)) != MEM
+ || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
+ return 1;
+
+ return 0;
+}
+
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+ general registers. Bits 0..15 mean that the respective registers
+ are used as inputs in the instruction. Bits 16..31 mean that the
+ registers 0..15, respectively, are used as outputs, or are clobbered.
+ IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
+int
+regs_used (x, is_dest)
+ rtx x; int is_dest;
+{
+ enum rtx_code code;
+ char *fmt;
+ int i, used = 0;
+
+ if (! x)
+ return used;
+ code = GET_CODE (x);
+ switch (code)
+ {
+ case REG:
+ if (REGNO (x) < 16)
+ return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+ << (REGNO (x) + is_dest));
+ return 0;
+ case SUBREG:
+ {
+ rtx y = SUBREG_REG (x);
+
+ if (GET_CODE (y) != REG)
+ break;
+ if (REGNO (y) < 16)
+ return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+ << (REGNO (y) + SUBREG_WORD (x) + is_dest));
+ return 0;
+ }
+ case SET:
+ return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+ case RETURN:
+ /* If there was a return value, it must have been indicated with USE. */
+ return 0x00ffff00;
+ case CLOBBER:
+ is_dest = 1;
+ break;
+ case MEM:
+ is_dest = 0;
+ break;
+ case CALL:
+ used |= 0x00ff00f0;
+ break;
+ }
+
+ fmt = GET_RTX_FORMAT (code);
+
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ register int j;
+ for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+ used |= regs_used (XVECEXP (x, i, j), is_dest);
+ }
+ else if (fmt[i] == 'e')
+ used |= regs_used (XEXP (x, i), is_dest);
+ }
+ return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+ to the destination of the JUMP with address ADDR.
+ If the branch needs to be implemented as an indirect jump, try to find
+ a scratch register for it.
+ If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+ If any preceding insn that doesn't fit into a delay slot is good enough,
+ pass 1. Pass 2 if a definite blocking insn is needed.
+ -1 is used internally to avoid deep recursion.
+ If a blocking instruction is made or recognized, return it. */
+
+static rtx
+gen_block_redirect (jump, addr, need_block)
+ rtx jump;
+ int addr, need_block;
+{
+ int dead = 0;
+ rtx prev = prev_nonnote_insn (jump);
+ rtx dest;
+
+ /* First, check if we already have an instruction that satisfies our need. */
+ if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
+ {
+ if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+ return prev;
+ if (GET_CODE (PATTERN (prev)) == USE
+ || GET_CODE (PATTERN (prev)) == CLOBBER
+ || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+ prev = jump;
+ else if ((need_block &= ~1) < 0)
+ return prev;
+ else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+ need_block = 0;
+ }
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+ /* If the branch is out of range, try to find a scratch register for it. */
+ if (optimize
+ && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
+ {
+ rtx scan;
+ /* Don't look for the stack pointer as a scratch register,
+ it would cause trouble if an interrupt occurred. */
+ unsigned try = 0x7fff, used;
+ int jump_left = flag_expensive_optimizations + 1;
+
+ /* It is likely that the most recent eligible instruction is wanted for
+ the delay slot. Therefore, find out which registers it uses, and
+ try to avoid using them. */
+
+ for (scan = jump; scan = PREV_INSN (scan); )
+ {
+ enum rtx_code code;
+
+ if (INSN_DELETED_P (scan))
+ continue;
+ code = GET_CODE (scan);
+ if (code == CODE_LABEL || code == JUMP_INSN)
+ break;
+ if (code == INSN
+ && GET_CODE (PATTERN (scan)) != USE
+ && GET_CODE (PATTERN (scan)) != CLOBBER
+ && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+ {
+ try &= ~regs_used (PATTERN (scan), 0);
+ break;
+ }
+ }
+ for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
+ {
+ enum rtx_code code;
+
+ if (INSN_DELETED_P (scan))
+ continue;
+ code = GET_CODE (scan);
+ if (GET_RTX_CLASS (code) == 'i')
+ {
+ used |= regs_used (PATTERN (scan), 0);
+ if (code == CALL_INSN)
+ used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+ dead |= (used >> 16) & ~used;
+ if (dead & try)
+ {
+ dead &= try;
+ break;
+ }
+ if (code == JUMP_INSN)
+ if (jump_left-- && simplejump_p (scan))
+ scan = JUMP_LABEL (scan);
+ else
+ break;
+ }
+ }
+ /* Mask out the stack pointer again, in case it was
+ the only 'free' register we have found. */
+ dead &= 0x7fff;
+ }
+ /* If the immediate destination is still in range, check for possible
+ threading with a jump beyond the delay slot insn.
+ Don't check if we are called recursively; the jump has been or will be
+ checked in a different invocation then. */
+
+ else if (optimize && need_block >= 0)
+ {
+ rtx next = next_active_insn (next_active_insn (dest));
+ if (next && GET_CODE (next) == JUMP_INSN
+ && GET_CODE (PATTERN (next)) == SET
+ && recog_memoized (next) == CODE_FOR_jump)
+ {
+ dest = JUMP_LABEL (next);
+ if (dest
+ && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
+ gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
+ }
+ }
+
+ if (dead)
+ {
+ rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
+
+ /* It would be nice if we could convert the jump into an indirect
+ jump / far branch right now, and thus exposing all constituent
+ instructions to further optimization. However, reorg uses
+ simplejump_p to determine if there is an unconditional jump where
+ it should try to schedule instructions from the target of the
+ branch; simplejump_p fails for indirect jumps even if they have
+ a JUMP_LABEL. */
+ rtx insn = emit_insn_before (gen_indirect_jump_scratch
+ (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
+ , jump);
+ INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+ return insn;
+ }
+ else if (need_block)
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ return emit_insn_before (gen_block_branch_redirect
+ (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
+ , jump);
+ return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+ /* A label (to be placed) in front of the jump
+ that jumps to our ultimate destination. */
+ rtx near_label;
+ /* Where we are going to insert it if we cannot move the jump any farther,
+ or the jump itself if we have picked up an existing jump. */
+ rtx insert_place;
+ /* The ultimate destination. */
+ rtx far_label;
+ struct far_branch *prev;
+ /* If the branch has already been created, its address;
+ else the address of its first prospective user. */
+ int address;
+};
+
+enum mdep_reorg_phase_e mdep_reorg_phase;
+void
+gen_far_branch (bp)
+ struct far_branch *bp;
+{
+ rtx insn = bp->insert_place;
+ rtx jump;
+ rtx label = gen_label_rtx ();
+
+ emit_label_after (label, insn);
+ if (bp->far_label)
+ {
+ jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+ LABEL_NUSES (bp->far_label)++;
+ }
+ else
+ jump = emit_jump_insn_after (gen_return (), insn);
+ /* Emit a barrier so that reorg knows that any following instructions
+ are not reachable via a fall-through path.
+ But don't do this when not optimizing, since we wouldn't supress the
+ alignment for the barrier then, and could end up with out-of-range
+ pc-relative loads. */
+ if (optimize)
+ emit_barrier_after (jump);
+ emit_label_after (bp->near_label, insn);
+ JUMP_LABEL (jump) = bp->far_label;
+ if (! invert_jump (insn, label))
+ abort ();
+ /* Prevent reorg from undoing our splits. */
+ gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+/* Fix up ADDR_DIFF_VECs. */
+void
+fixup_addr_diff_vecs (first)
+ rtx first;
+{
+ rtx insn;
+
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx vec_lab, pat, prev, prevpat, x, braf_label;
+
+ if (GET_CODE (insn) != JUMP_INSN
+ || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ continue;
+ pat = PATTERN (insn);
+ vec_lab = XEXP (XEXP (pat, 0), 0);
+
+ /* Search the matching casesi_jump_2. */
+ for (prev = vec_lab; ; prev = PREV_INSN (prev))
+ {
+ if (GET_CODE (prev) != JUMP_INSN)
+ continue;
+ prevpat = PATTERN (prev);
+ if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+ continue;
+ x = XVECEXP (prevpat, 0, 1);
+ if (GET_CODE (x) != USE)
+ continue;
+ x = XEXP (x, 0);
+ if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+ break;
+ }
+
+ /* Emit the reference label of the braf where it belongs, right after
+ the casesi_jump_2 (i.e. braf). */
+ braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+ emit_label_after (braf_label, prev);
+
+ /* Fix up the ADDR_DIF_VEC to be relative
+ to the reference address of the braf. */
+ XEXP (XEXP (pat, 0), 0) = braf_label;
+ }
+}
+
+/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
+ a barrier. Return the base 2 logarithm of the desired alignment. */
+int
+barrier_align (barrier_or_label)
+ rtx barrier_or_label;
+{
+ rtx next = next_real_insn (barrier_or_label), pat, prev;
+ int slot, credit;
+
+ if (! next)
+ return 0;
+
+ pat = PATTERN (next);
+
+ if (GET_CODE (pat) == ADDR_DIFF_VEC)
+ return 2;
+
+ if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
+ /* This is a barrier in front of a constant table. */
+ return 0;
+
+ prev = prev_real_insn (barrier_or_label);
+ if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
+ {
+ pat = PATTERN (prev);
+ /* If this is a very small table, we want to keep the alignment after
+ the table to the minimum for proper code alignment. */
+ return ((TARGET_SMALLCODE
+ || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+ <= 1 << (CACHE_LOG - 2)))
+ ? 1 : CACHE_LOG);
+ }
+
+ if (TARGET_SMALLCODE)
+ return 0;
+
+ if (! TARGET_SH3 || ! optimize)
+ return CACHE_LOG;
+
+ /* When fixing up pcloads, a constant table might be inserted just before
+ the basic block that ends with the barrier. Thus, we can't trust the
+ instruction lengths before that. */
+ if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
+ {
+ /* Check if there is an immediately preceding branch to the insn beyond
+ the barrier. We must weight the cost of discarding useful information
+ from the current cache line when executing this branch and there is
+ an alignment, against that of fetching unneeded insn in front of the
+ branch target when there is no alignment. */
+
+ /* PREV is presumed to be the JUMP_INSN for the barrier under
+ investigation. Skip to the insn before it. */
+ prev = prev_real_insn (prev);
+
+ for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
+ credit >= 0 && prev && GET_CODE (prev) == INSN;
+ prev = prev_real_insn (prev))
+ {
+ if (GET_CODE (PATTERN (prev)) == USE
+ || GET_CODE (PATTERN (prev)) == CLOBBER)
+ continue;
+ if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+ prev = XVECEXP (PATTERN (prev), 0, 1);
+ if (slot &&
+ get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+ slot = 0;
+ credit -= get_attr_length (prev);
+ }
+ if (prev
+ && GET_CODE (prev) == JUMP_INSN
+ && JUMP_LABEL (prev)
+ && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
+ && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
+ return 0;
+ }
+
+ return CACHE_LOG;
+}
+
+/* If we are inside a phony loop, lmost any kind of label can turn up as the
+ first one in the loop. Aligning a braf label causes incorrect switch
+ destination addresses; we can detect braf labels because they are
+ followed by a BARRIER.
+ Applying loop alignment to small constant or switch tables is a waste
+ of space, so we suppress this too. */
+int
+sh_loop_align (label)
+ rtx label;
+{
+ rtx next = label;
+
+ do
+ next = next_nonnote_insn (next);
+ while (next && GET_CODE (next) == CODE_LABEL);
+
+ if (! next
+ || GET_RTX_CLASS (GET_CODE (next)) != 'i'
+ || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
+ || recog_memoized (next) == CODE_FOR_consttable_2)
+ return 0;
+ return 2;
+}
+
+/* Exported to toplev.c.
+
+ Do a final pass over the function, just before delayed branch
+ scheduling. */
+
+void
+machine_dependent_reorg (first)
+ rtx first;
+{
+ rtx insn, mova;
+ int num_mova;
+ rtx r0_rtx = gen_rtx (REG, Pmode, 0);
+ rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
+
+ /* If relaxing, generate pseudo-ops to associate function calls with
+ the symbols they call. It does no harm to not generate these
+ pseudo-ops. However, when we can generate them, it enables to
+ linker to potentially relax the jsr to a bsr, and eliminate the
+ register load and, possibly, the constant pool entry. */
+
+ mdep_reorg_phase = SH_INSERT_USES_LABELS;
+ if (TARGET_RELAX)
+ {
+ /* Remove all REG_LABEL notes. We want to use them for our own
+ purposes. This works because none of the remaining passes
+ need to look at them.
+
+ ??? But it may break in the future. We should use a machine
+ dependent REG_NOTE, or some other approach entirely. */
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+ {
+ rtx note;
+
+ while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
+ remove_note (insn, note);
+ }
+ }
+
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx pattern, reg, link, set, scan, dies, label;
+ int rescan = 0, foundinsn = 0;
+
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ pattern = PATTERN (insn);
+
+ if (GET_CODE (pattern) == PARALLEL)
+ pattern = XVECEXP (pattern, 0, 0);
+ if (GET_CODE (pattern) == SET)
+ pattern = SET_SRC (pattern);
+
+ if (GET_CODE (pattern) != CALL
+ || GET_CODE (XEXP (pattern, 0)) != MEM)
+ continue;
+
+ reg = XEXP (XEXP (pattern, 0), 0);
+ }
+ else
+ {
+ reg = sfunc_uses_reg (insn);
+ if (! reg)
+ continue;
+ }
+
+ if (GET_CODE (reg) != REG)
+ continue;
+
+ /* This is a function call via REG. If the only uses of REG
+ between the time that it is set and the time that it dies
+ are in function calls, then we can associate all the
+ function calls with the setting of REG. */
+
+ for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+ {
+ if (REG_NOTE_KIND (link) != 0)
+ continue;
+ set = single_set (XEXP (link, 0));
+ if (set && rtx_equal_p (reg, SET_DEST (set)))
+ {
+ link = XEXP (link, 0);
+ break;
+ }
+ }
+
+ if (! link)
+ {
+ /* ??? Sometimes global register allocation will have
+ deleted the insn pointed to by LOG_LINKS. Try
+ scanning backward to find where the register is set. */
+ for (scan = PREV_INSN (insn);
+ scan && GET_CODE (scan) != CODE_LABEL;
+ scan = PREV_INSN (scan))
+ {
+ if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
+ continue;
+
+ if (! reg_mentioned_p (reg, scan))
+ continue;
+
+ if (noncall_uses_reg (reg, scan, &set))
+ break;
+
+ if (set)
+ {
+ link = scan;
+ break;
+ }
+ }
+ }
+
+ if (! link)
+ continue;
+
+ /* The register is set at LINK. */
+
+ /* We can only optimize the function call if the register is
+ being set to a symbol. In theory, we could sometimes
+ optimize calls to a constant location, but the assembler
+ and linker do not support that at present. */
+ if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
+ && GET_CODE (SET_SRC (set)) != LABEL_REF)
+ continue;
+
+ /* Scan forward from LINK to the place where REG dies, and
+ make sure that the only insns which use REG are
+ themselves function calls. */
+
+ /* ??? This doesn't work for call targets that were allocated
+ by reload, since there may not be a REG_DEAD note for the
+ register. */
+
+ dies = NULL_RTX;
+ for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
+ {
+ rtx scanset;
+
+ /* Don't try to trace forward past a CODE_LABEL if we haven't
+ seen INSN yet. Ordinarily, we will only find the setting insn
+ in LOG_LINKS if it is in the same basic block. However,
+ cross-jumping can insert code labels in between the load and
+ the call, and can result in situations where a single call
+ insn may have two targets depending on where we came from. */
+
+ if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
+ break;
+
+ if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
+ continue;
+
+ /* Don't try to trace forward past a JUMP. To optimize
+ safely, we would have to check that all the
+ instructions at the jump destination did not use REG. */
+
+ if (GET_CODE (scan) == JUMP_INSN)
+ break;
+
+ if (! reg_mentioned_p (reg, scan))
+ continue;
+
+ if (noncall_uses_reg (reg, scan, &scanset))
+ break;
+
+ if (scan == insn)
+ foundinsn = 1;
+
+ if (scan != insn
+ && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
+ {
+ /* There is a function call to this register other
+ than the one we are checking. If we optimize
+ this call, we need to rescan again below. */
+ rescan = 1;
+ }
+
+ /* ??? We shouldn't have to worry about SCANSET here.
+ We should just be able to check for a REG_DEAD note
+ on a function call. However, the REG_DEAD notes are
+ apparently not dependable around libcalls; c-torture
+ execute/920501-2 is a test case. If SCANSET is set,
+ then this insn sets the register, so it must have
+ died earlier. Unfortunately, this will only handle
+ the cases in which the register is, in fact, set in a
+ later insn. */
+
+ /* ??? We shouldn't have to use FOUNDINSN here.
+ However, the LOG_LINKS fields are apparently not
+ entirely reliable around libcalls;
+ newlib/libm/math/e_pow.c is a test case. Sometimes
+ an insn will appear in LOG_LINKS even though it is
+ not the most recent insn which sets the register. */
+
+ if (foundinsn
+ && (scanset
+ || find_reg_note (scan, REG_DEAD, reg)))
+ {
+ dies = scan;
+ break;
+ }
+ }
+
+ if (! dies)
+ {
+ /* Either there was a branch, or some insn used REG
+ other than as a function call address. */
+ continue;
+ }
+
+ /* Create a code label, and put it in a REG_LABEL note on
+ the insn which sets the register, and on each call insn
+ which uses the register. In final_prescan_insn we look
+ for the REG_LABEL notes, and output the appropriate label
+ or pseudo-op. */
+
+ label = gen_label_rtx ();
+ REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label,
+ REG_NOTES (link));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
+ REG_NOTES (insn));
+ if (rescan)
+ {
+ scan = link;
+ do
+ {
+ rtx reg2;
+
+ scan = NEXT_INSN (scan);
+ if (scan != insn
+ && ((GET_CODE (scan) == CALL_INSN
+ && reg_mentioned_p (reg, scan))
+ || ((reg2 = sfunc_uses_reg (scan))
+ && REGNO (reg2) == REGNO (reg))))
+ REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL,
+ label, REG_NOTES (scan));
+ }
+ while (scan != dies);
+ }
+ }
+ }
+
+ if (TARGET_SH2)
+ fixup_addr_diff_vecs (first);
+
+ if (optimize)
+ {
+ mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+ shorten_branches (first);
+ }
+ /* Scan the function looking for move instructions which have to be
+ changed to pc-relative loads and insert the literal tables. */
+
+ mdep_reorg_phase = SH_FIXUP_PCLOAD;
+ for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
+ {
+ if (mova_p (insn))
+ {
+ if (! num_mova++)
+ mova = insn;
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+ && num_mova)
+ {
+ rtx scan;
+ int total;
+
+ num_mova--;
+
+ /* Some code might have been inserted between the mova and
+ its ADDR_DIFF_VEC. Check if the mova is still in range. */
+ for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+ total += get_attr_length (scan);
+
+ /* range of mova is 1020, add 4 because pc counts from address of
+ second instruction after this one, subtract 2 in case pc is 2
+ byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
+ cancels out with alignment effects of the mova itself. */
+ if (total > 1022)
+ {
+ /* Change the mova into a load, and restart scanning
+ there. broken_move will then return true for mova. */
+ SET_SRC (PATTERN (mova))
+ = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+ INSN_CODE (mova) = -1;
+ insn = mova;
+ }
+ }
+ if (broken_move (insn))
+ {
+ rtx scan;
+ /* Scan ahead looking for a barrier to stick the constant table
+ behind. */
+ rtx barrier = find_barrier (num_mova, mova, insn);
+ rtx last_float_move, last_float = 0, *last_float_addr;
+
+ if (num_mova && ! mova_p (mova))
+ {
+ /* find_barrier had to change the first mova into a
+ pcload; thus, we have to start with this new pcload. */
+ insn = mova;
+ num_mova = 0;
+ }
+ /* Now find all the moves between the points and modify them. */
+ for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
+ {
+ if (GET_CODE (scan) == CODE_LABEL)
+ last_float = 0;
+ if (broken_move (scan))
+ {
+ rtx *patp = &PATTERN (scan), pat = *patp;
+ rtx src, dst;
+ rtx lab;
+ rtx newinsn;
+ rtx newsrc;
+ enum machine_mode mode;
+
+ if (GET_CODE (pat) == PARALLEL)
+ patp = &XVECEXP (pat, 0, 0), pat = *patp;
+ src = SET_SRC (pat);
+ dst = SET_DEST (pat);
+ mode = GET_MODE (dst);
+
+ if (mode == SImode && hi_const (src)
+ && REGNO (dst) != FPUL_REG)
+ {
+ int offset = 0;
+
+ mode = HImode;
+ while (GET_CODE (dst) == SUBREG)
+ {
+ offset += SUBREG_WORD (dst);
+ dst = SUBREG_REG (dst);
+ }
+ dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
+ }
+
+ if (GET_CODE (dst) == REG
+ && ((REGNO (dst) >= FIRST_FP_REG
+ && REGNO (dst) <= LAST_XD_REG)
+ || REGNO (dst) == FPUL_REG))
+ {
+ if (last_float
+ && reg_set_between_p (r0_rtx, last_float_move, scan))
+ last_float = 0;
+ lab = add_constant (src, mode, last_float);
+ if (lab)
+ emit_insn_before (gen_mova (lab), scan);
+ else
+ *last_float_addr = r0_inc_rtx;
+ last_float_move = scan;
+ last_float = src;
+ newsrc = gen_rtx (MEM, mode,
+ ((TARGET_SH4 && ! TARGET_FMOVD
+ || REGNO (dst) == FPUL_REG)
+ ? r0_inc_rtx
+ : r0_rtx));
+ last_float_addr = &XEXP (newsrc, 0);
+ }
+ else
+ {
+ lab = add_constant (src, mode, 0);
+ newsrc = gen_rtx (MEM, mode,
+ gen_rtx (LABEL_REF, VOIDmode, lab));
+ }
+ RTX_UNCHANGING_P (newsrc) = 1;
+ *patp = gen_rtx (SET, VOIDmode, dst, newsrc);
+ INSN_CODE (scan) = -1;
+ }
+ }
+ dump_table (barrier);
+ insn = barrier;
+ }
+ }
+
+ mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+ insn_addresses = 0;
+ split_branches (first);
+
+ /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+ also has an effect on the register that holds the addres of the sfunc.
+ Insert an extra dummy insn in front of each sfunc that pretends to
+ use this register. */
+ if (flag_delayed_branch)
+ {
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx reg = sfunc_uses_reg (insn);
+
+ if (! reg)
+ continue;
+ emit_insn_before (gen_use_sfunc_addr (reg), insn);
+ }
+ }
+#if 0
+ /* fpscr is not actually a user variable, but we pretend it is for the
+ sake of the previous optimization passes, since we want it handled like
+ one. However, we don't have eny debugging information for it, so turn
+ it into a non-user variable now. */
+ if (TARGET_SH4)
+ REG_USERVAR_P (get_fpscr_rtx ()) = 0;
+#endif
+ if (optimize)
+ sh_flag_remove_dead_before_cse = 1;
+ mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+int
+get_dest_uid (label, max_uid)
+ rtx label;
+ int max_uid;
+{
+ rtx dest = next_real_insn (label);
+ int dest_uid;
+ if (! dest)
+ /* This can happen for an undefined label. */
+ return 0;
+ dest_uid = INSN_UID (dest);
+ /* If this is a newly created branch redirection blocking instruction,
+ we cannot index the branch_uid or insn_addresses arrays with its
+ uid. But then, we won't need to, because the actual destination is
+ the following branch. */
+ while (dest_uid >= max_uid)
+ {
+ dest = NEXT_INSN (dest);
+ dest_uid = INSN_UID (dest);
+ }
+ if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
+ return 0;
+ return dest_uid;
+}
+
+/* Split condbranches that are out of range. Also add clobbers for
+ scratch registers that are needed in far jumps.
+ We do this before delay slot scheduling, so that it can take our
+ newly created instructions into account. It also allows us to
+ find branches with common targets more easily. */
+
+static void
+split_branches (first)
+ rtx first;
+{
+ rtx insn;
+ struct far_branch **uid_branch, *far_branch_list = 0;
+ int max_uid = get_max_uid ();
+
+ /* Find out which branches are out of range. */
+ shorten_branches (first);
+
+ uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+ bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
+
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ continue;
+ else if (INSN_DELETED_P (insn))
+ {
+ /* Shorten_branches would split this instruction again,
+ so transform it into a note. */
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ /* Don't mess with ADDR_DIFF_VEC */
+ && (GET_CODE (PATTERN (insn)) == SET
+ || GET_CODE (PATTERN (insn)) == RETURN))
+ {
+ enum attr_type type = get_attr_type (insn);
+ if (type == TYPE_CBRANCH)
+ {
+ rtx next, beyond;
+
+ if (get_attr_length (insn) > 4)
+ {
+ rtx src = SET_SRC (PATTERN (insn));
+ rtx cond = XEXP (src, 0);
+ rtx olabel = XEXP (XEXP (src, 1), 0);
+ rtx jump;
+ int addr = insn_addresses[INSN_UID (insn)];
+ rtx label = 0;
+ int dest_uid = get_dest_uid (olabel, max_uid);
+ struct far_branch *bp = uid_branch[dest_uid];
+
+ /* redirect_jump needs a valid JUMP_LABEL, and it might delete
+ the label if the LABEL_NUSES count drops to zero. There is
+ always a jump_optimize pass that sets these values, but it
+ proceeds to delete unreferenced code, and then if not
+ optimizing, to un-delete the deleted instructions, thus
+ leaving labels with too low uses counts. */
+ if (! optimize)
+ {
+ JUMP_LABEL (insn) = olabel;
+ LABEL_NUSES (olabel)++;
+ }
+ if (! bp)
+ {
+ bp = (struct far_branch *) alloca (sizeof *bp);
+ uid_branch[dest_uid] = bp;
+ bp->prev = far_branch_list;
+ far_branch_list = bp;
+ bp->far_label
+ = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+ LABEL_NUSES (bp->far_label)++;
+ }
+ else
+ {
+ label = bp->near_label;
+ if (! label && bp->address - addr >= CONDJUMP_MIN)
+ {
+ rtx block = bp->insert_place;
+
+ if (GET_CODE (PATTERN (block)) == RETURN)
+ block = PREV_INSN (block);
+ else
+ block = gen_block_redirect (block,
+ bp->address, 2);
+ label = emit_label_after (gen_label_rtx (),
+ PREV_INSN (block));
+ bp->near_label = label;
+ }
+ else if (label && ! NEXT_INSN (label))
+ if (addr + 2 - bp->address <= CONDJUMP_MAX)
+ bp->insert_place = insn;
+ else
+ gen_far_branch (bp);
+ }
+ if (! label
+ || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
+ {
+ bp->near_label = label = gen_label_rtx ();
+ bp->insert_place = insn;
+ bp->address = addr;
+ }
+ if (! redirect_jump (insn, label))
+ abort ();
+ }
+ else
+ {
+ /* get_attr_length (insn) == 2 */
+ /* Check if we have a pattern where reorg wants to redirect
+ the branch to a label from an unconditional branch that
+ is too far away. */
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ /* A syntax error might cause beyond to be NULL_RTX. */
+ beyond
+ = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+ 0));
+
+ if (beyond
+ && (GET_CODE (beyond) == JUMP_INSN
+ || (GET_CODE (beyond = next_active_insn (beyond))
+ == JUMP_INSN))
+ && GET_CODE (PATTERN (beyond)) == SET
+ && recog_memoized (beyond) == CODE_FOR_jump
+ && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
+ - insn_addresses[INSN_UID (insn)] + 252U)
+ > 252 + 258 + 2))
+ gen_block_redirect (beyond,
+ insn_addresses[INSN_UID (beyond)], 1);
+ }
+
+ next = next_active_insn (insn);
+
+ if ((GET_CODE (next) == JUMP_INSN
+ || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
+ && GET_CODE (PATTERN (next)) == SET
+ && recog_memoized (next) == CODE_FOR_jump
+ && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
+ - insn_addresses[INSN_UID (insn)] + 252U)
+ > 252 + 258 + 2))
+ gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
+ }
+ else if (type == TYPE_JUMP || type == TYPE_RETURN)
+ {
+ int addr = insn_addresses[INSN_UID (insn)];
+ rtx far_label = 0;
+ int dest_uid = 0;
+ struct far_branch *bp;
+
+ if (type == TYPE_JUMP)
+ {
+ far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+ dest_uid = get_dest_uid (far_label, max_uid);
+ if (! dest_uid)
+ {
+ /* Parse errors can lead to labels outside
+ the insn stream. */
+ if (! NEXT_INSN (far_label))
+ continue;
+
+ if (! optimize)
+ {
+ JUMP_LABEL (insn) = far_label;
+ LABEL_NUSES (far_label)++;
+ }
+ redirect_jump (insn, NULL_RTX);
+ far_label = 0;
+ }
+ }
+ bp = uid_branch[dest_uid];
+ if (! bp)
+ {
+ bp = (struct far_branch *) alloca (sizeof *bp);
+ uid_branch[dest_uid] = bp;
+ bp->prev = far_branch_list;
+ far_branch_list = bp;
+ bp->near_label = 0;
+ bp->far_label = far_label;
+ if (far_label)
+ LABEL_NUSES (far_label)++;
+ }
+ else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+ if (addr - bp->address <= CONDJUMP_MAX)
+ emit_label_after (bp->near_label, PREV_INSN (insn));
+ else
+ {
+ gen_far_branch (bp);
+ bp->near_label = 0;
+ }
+ else
+ bp->near_label = 0;
+ bp->address = addr;
+ bp->insert_place = insn;
+ if (! far_label)
+ emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+ else
+ gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+ }
+ }
+ /* Generate all pending far branches,
+ and free our references to the far labels. */
+ while (far_branch_list)
+ {
+ if (far_branch_list->near_label
+ && ! NEXT_INSN (far_branch_list->near_label))
+ gen_far_branch (far_branch_list);
+ if (optimize
+ && far_branch_list->far_label
+ && ! --LABEL_NUSES (far_branch_list->far_label))
+ delete_insn (far_branch_list->far_label);
+ far_branch_list = far_branch_list->prev;
+ }
+
+ /* Instruction length information is no longer valid due to the new
+ instructions that have been generated. */
+ init_insn_lengths ();
+}
+
+/* Dump out instruction addresses, which is useful for debugging the
+ constant pool table stuff.
+
+ If relaxing, output the label and pseudo-ops used to link together
+ calls and the instruction which set the registers. */
+
+/* ??? This is unnecessary, and probably should be deleted. This makes
+ the insn_addresses declaration above unnecessary. */
+
+/* ??? The addresses printed by this routine for insns are nonsense for
+ insns which are inside of a sequence where none of the inner insns have
+ variable length. This is because the second pass of shorten_branches
+ does not bother to update them. */
+
+void
+final_prescan_insn (insn, opvec, noperands)
+ rtx insn;
+ rtx *opvec;
+ int noperands;
+{
+ if (TARGET_DUMPISIZE)
+ fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);
+
+ if (TARGET_RELAX)
+ {
+ rtx note;
+
+ note = find_reg_note (insn, REG_LABEL, NULL_RTX);
+ if (note)
+ {
+ rtx pattern;
+
+ pattern = PATTERN (insn);
+ if (GET_CODE (pattern) == PARALLEL)
+ pattern = XVECEXP (pattern, 0, 0);
+ if (GET_CODE (pattern) == CALL
+ || (GET_CODE (pattern) == SET
+ && (GET_CODE (SET_SRC (pattern)) == CALL
+ || get_attr_type (insn) == TYPE_SFUNC)))
+ asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
+ CODE_LABEL_NUMBER (XEXP (note, 0)));
+ else if (GET_CODE (pattern) == SET)
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (XEXP (note, 0)));
+ else
+ abort ();
+ }
+ }
+}
+
+/* Dump out any constants accumulated in the final pass. These will
+ only be labels. */
+
+char *
+output_jump_label_table ()
+{
+ int i;
+
+ if (pool_size)
+ {
+ fprintf (asm_out_file, "\t.align 2\n");
+ for (i = 0; i < pool_size; i++)
+ {
+ pool_node *p = &pool_vector[i];
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ CODE_LABEL_NUMBER (p->label));
+ output_asm_insn (".long %O0", &p->value);
+ }
+ pool_size = 0;
+ }
+
+ return "";
+}
+
+/* A full frame looks like:
+
+ arg-5
+ arg-4
+ [ if current_function_anonymous_args
+ arg-3
+ arg-2
+ arg-1
+ arg-0 ]
+ saved-fp
+ saved-r10
+ saved-r11
+ saved-r12
+ saved-pr
+ local-n
+ ..
+ local-1
+ local-0 <- fp points here. */
+
+/* Number of bytes pushed for anonymous args, used to pass information
+ between expand_prologue and expand_epilogue. */
+
+static int extra_push;
+
+/* Adjust the stack by SIZE bytes. REG holds the rtl of the register
+ to be adjusted, and TEMP, if nonnegative, holds the register number
+ of a general register that we may clobber. */
+
+static void
+output_stack_adjust (size, reg, temp)
+ int size;
+ rtx reg;
+ int temp;
+{
+ if (size)
+ {
+ if (CONST_OK_FOR_I (size))
+ emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+ /* Try to do it with two partial adjustments; however, we must make
+ sure that the stack is properly aligned at all times, in case
+ an interrupt occurs between the two partial adjustments. */
+ else if (CONST_OK_FOR_I (size / 2 & -4)
+ && CONST_OK_FOR_I (size - (size / 2 & -4)))
+ {
+ emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
+ emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
+ }
+ else
+ {
+ rtx const_reg;
+
+ /* If TEMP is invalid, we could temporarily save a general
+ register to MACL. However, there is currently no need
+ to handle this case, so just abort when we see it. */
+ if (temp < 0)
+ abort ();
+ const_reg = gen_rtx (REG, SImode, temp);
+
+ /* If SIZE is negative, subtract the positive value.
+ This sometimes allows a constant pool entry to be shared
+ between prologue and epilogue code. */
+ if (size < 0)
+ {
+ emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
+ emit_insn (gen_subsi3 (reg, reg, const_reg));
+ }
+ else
+ {
+ emit_insn (gen_movsi (const_reg, GEN_INT (size)));
+ emit_insn (gen_addsi3 (reg, reg, const_reg));
+ }
+ }
+ }
+}
+
+/* Output RTL to push register RN onto the stack. */
+
+static void
+push (rn)
+ int rn;
+{
+ rtx x;
+ if (rn == FPUL_REG)
+ x = gen_push_fpul ();
+ else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+ && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
+ {
+ if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
+ return;
+ x = gen_push_4 (gen_rtx (REG, DFmode, rn));
+ }
+ else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
+ x = gen_push_e (gen_rtx (REG, SFmode, rn));
+ else
+ x = gen_push (gen_rtx (REG, SImode, rn));
+
+ x = emit_insn (x);
+ REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
+ gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
+}
+
+/* Output RTL to pop register RN from the stack. */
+
+static void
+pop (rn)
+ int rn;
+{
+ rtx x;
+ if (rn == FPUL_REG)
+ x = gen_pop_fpul ();
+ else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+ && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
+ {
+ if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
+ return;
+ x = gen_pop_4 (gen_rtx (REG, DFmode, rn));
+ }
+ else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
+ x = gen_pop_e (gen_rtx (REG, SFmode, rn));
+ else
+ x = gen_pop (gen_rtx (REG, SImode, rn));
+
+ x = emit_insn (x);
+ REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
+ gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
+}
+
+/* Generate code to push the regs specified in the mask. */
+
+static void
+push_regs (mask, mask2)
+ int mask, mask2;
+{
+ int i;
+
+ /* Push PR last; this gives better latencies after the prologue, and
+ candidates for the return delay slot when there are no general
+ registers pushed. */
+ for (i = 0; i < 32; i++)
+ if (mask & (1 << i) && i != PR_REG)
+ push (i);
+ for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
+ if (mask2 & (1 << (i - 32)))
+ push (i);
+ if (mask & (1 << PR_REG))
+ push (PR_REG);
+}
+
+/* Work out the registers which need to be saved, both as a mask and a
+ count of saved words.
+
+ If doing a pragma interrupt function, then push all regs used by the
+ function, and if we call another function (we can tell by looking at PR),
+ make sure that all the regs it clobbers are safe too. */
+
+static int
+calc_live_regs (count_ptr, live_regs_mask2)
+ int *count_ptr;
+ int *live_regs_mask2;
+{
+ int reg;
+ int live_regs_mask = 0;
+ int count;
+ int interrupt_handler;
+
+ if ((lookup_attribute
+ ("interrupt_handler",
+ DECL_MACHINE_ATTRIBUTES (current_function_decl)))
+ != NULL_TREE)
+ interrupt_handler = 1;
+ else
+ interrupt_handler = 0;
+
+ *live_regs_mask2 = 0;
+ /* If we can save a lot of saves by switching to double mode, do that. */
+ if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
+ for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+ if (regs_ever_live[reg] && regs_ever_live[reg+1]
+ && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
+ && ++count > 2)
+ {
+ target_flags &= ~FPU_SINGLE_BIT;
+ break;
+ }
+ for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
+ {
+ if ((interrupt_handler && ! pragma_trapa)
+ ? (/* Need to save all the regs ever live. */
+ (regs_ever_live[reg]
+ || (call_used_regs[reg]
+ && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
+ && regs_ever_live[PR_REG]))
+ && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+ && reg != RETURN_ADDRESS_POINTER_REGNUM
+ && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
+ : (/* Only push those regs which are used and need to be saved. */
+ regs_ever_live[reg] && ! call_used_regs[reg]))
+ {
+ if (reg >= 32)
+ *live_regs_mask2 |= 1 << (reg - 32);
+ else
+ live_regs_mask |= 1 << reg;
+ count++;
+ if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG)
+ if (reg <= LAST_FP_REG)
+ {
+ if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
+ {
+ if (reg >= 32)
+ *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
+ else
+ live_regs_mask |= 1 << (reg ^ 1);
+ count++;
+ }
+ }
+ else if (reg <= LAST_XD_REG)
+ {
+ /* Must switch to double mode to access these registers. */
+ target_flags &= ~FPU_SINGLE_BIT;
+ count++;
+ }
+ }
+ }
+
+ *count_ptr = count;
+ return live_regs_mask;
+}
+
+/* Code to generate prologue and epilogue sequences */
+
+void
+sh_expand_prologue ()
+{
+ int live_regs_mask;
+ int d, i;
+ int live_regs_mask2;
+ int save_flags = target_flags;
+ int double_align = 0;
+
+ /* We have pretend args if we had an object sent partially in registers
+ and partially on the stack, e.g. a large structure. */
+ output_stack_adjust (-current_function_pretend_args_size,
+ stack_pointer_rtx, 3);
+
+ extra_push = 0;
+
+ /* This is set by SETUP_VARARGS to indicate that this is a varargs
+ routine. Clear it here so that the next function isn't affected. */
+ if (current_function_anonymous_args)
+ {
+ current_function_anonymous_args = 0;
+
+ /* This is not used by the SH3E calling convention */
+ if (!TARGET_SH3E)
+ {
+ /* Push arg regs as if they'd been provided by caller in stack. */
+ for (i = 0; i < NPARM_REGS(SImode); i++)
+ {
+ int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
+ if (i >= (NPARM_REGS(SImode)
+ - current_function_args_info.arg_count[(int) SH_ARG_INT]
+ ))
+ break;
+ push (rn);
+ extra_push += 4;
+ }
+ }
+ }
+
+ /* If we're supposed to switch stacks at function entry, do so now. */
+ if (sp_switch)
+ emit_insn (gen_sp_switch_1 ());
+
+ live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+ /* ??? Maybe we could save some switching if we can move a mode switch
+ that already happens to be at the function start into the prologue. */
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
+ push_regs (live_regs_mask, live_regs_mask2);
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
+
+ if (TARGET_ALIGN_DOUBLE && d & 1)
+ double_align = 4;
+
+ target_flags = save_flags;
+
+ output_stack_adjust (-get_frame_size () - double_align,
+ stack_pointer_rtx, 3);
+
+ if (frame_pointer_needed)
+ emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+}
+
+void
+sh_expand_epilogue ()
+{
+ int live_regs_mask;
+ int d, i;
+
+ int live_regs_mask2;
+ int save_flags = target_flags;
+ int frame_size = get_frame_size ();
+
+ live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+
+ if (TARGET_ALIGN_DOUBLE && d & 1)
+ frame_size += 4;
+
+ if (frame_pointer_needed)
+ {
+ output_stack_adjust (frame_size, frame_pointer_rtx, 7);
+
+ /* We must avoid moving the stack pointer adjustment past code
+ which reads from the local frame, else an interrupt could
+ occur after the SP adjustment and clobber data in the local
+ frame. */
+ emit_insn (gen_blockage ());
+ emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+ }
+ else if (frame_size)
+ {
+ /* We must avoid moving the stack pointer adjustment past code
+ which reads from the local frame, else an interrupt could
+ occur after the SP adjustment and clobber data in the local
+ frame. */
+ emit_insn (gen_blockage ());
+ output_stack_adjust (frame_size, stack_pointer_rtx, 7);
+ }
+
+ /* Pop all the registers. */
+
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
+ if (live_regs_mask & (1 << PR_REG))
+ pop (PR_REG);
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ {
+ int j = (FIRST_PSEUDO_REGISTER - 1) - i;
+ if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
+ pop (j);
+ else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
+ pop (j);
+ }
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
+ target_flags = save_flags;
+
+ output_stack_adjust (extra_push + current_function_pretend_args_size,
+ stack_pointer_rtx, 7);
+
+ /* Switch back to the normal stack if necessary. */
+ if (sp_switch)
+ emit_insn (gen_sp_switch_2 ());
+}
+
+/* Clear variables at function end. */
+
+void
+function_epilogue (stream, size)
+ FILE *stream;
+ int size;
+{
+ trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
+ sp_switch = NULL_RTX;
+}
+
+rtx
+sh_builtin_saveregs (arglist)
+ tree arglist;
+{
+ tree fntype = TREE_TYPE (current_function_decl);
+ /* First unnamed integer register. */
+ int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
+ /* Number of integer registers we need to save. */
+ int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
+ /* First unnamed SFmode float reg */
+ int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
+ /* Number of SFmode float regs to save. */
+ int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
+ int ptrsize = GET_MODE_SIZE (Pmode);
+ rtx valist, regbuf, fpregs;
+ int bufsize, regno;
+
+ /* Allocate block of memory for the regs. */
+ /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
+ Or can assign_stack_local accept a 0 SIZE argument? */
+ bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
+
+ regbuf = assign_stack_local (BLKmode, bufsize, 0);
+ MEM_SET_IN_STRUCT_P (regbuf, 1);
+
+ /* Save int args.
+ This is optimized to only save the regs that are necessary. Explicitly
+ named args need not be saved. */
+ if (n_intregs > 0)
+ move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
+ gen_rtx (MEM, BLKmode,
+ plus_constant (XEXP (regbuf, 0),
+ n_floatregs * UNITS_PER_WORD)),
+ n_intregs, n_intregs * UNITS_PER_WORD);
+
+ /* Save float args.
+ This is optimized to only save the regs that are necessary. Explicitly
+ named args need not be saved.
+ We explicitly build a pointer to the buffer because it halves the insn
+ count when not optimizing (otherwise the pointer is built for each reg
+ saved).
+ We emit the moves in reverse order so that we can use predecrement. */
+
+ fpregs = gen_reg_rtx (Pmode);
+ emit_move_insn (fpregs, XEXP (regbuf, 0));
+ emit_insn (gen_addsi3 (fpregs, fpregs,
+ GEN_INT (n_floatregs * UNITS_PER_WORD)));
+ if (TARGET_SH4)
+ {
+ for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs,
+ GEN_INT (-2 * UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, DFmode, fpregs),
+ gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
+ }
+ regno = first_floatreg;
+ if (regno & 1)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+ gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
+ - (TARGET_LITTLE_ENDIAN != 0)));
+ }
+ }
+ else
+ for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+ gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
+ }
+
+ /* Return the address of the regbuf. */
+ return XEXP (regbuf, 0);
+}
+
+/* Define the offset between two registers, one to be eliminated, and
+ the other its replacement, at the start of a routine. */
+
+int
+initial_elimination_offset (from, to)
+ int from;
+ int to;
+{
+ int regs_saved;
+ int total_saved_regs_space;
+ int total_auto_space = get_frame_size ();
+ int save_flags = target_flags;
+
+ int live_regs_mask, live_regs_mask2;
+ live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
+ if (TARGET_ALIGN_DOUBLE && regs_saved & 1)
+ total_auto_space += 4;
+ target_flags = save_flags;
+
+ total_saved_regs_space = (regs_saved) * 4;
+
+ if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+ return total_saved_regs_space + total_auto_space;
+
+ if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+ return total_saved_regs_space + total_auto_space;
+
+ /* Initial gap between fp and sp is 0. */
+ if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+ return 0;
+
+ if (from == RETURN_ADDRESS_POINTER_REGNUM
+ && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
+ {
+ int i, n = total_saved_regs_space;
+ for (i = PR_REG-1; i >= 0; i--)
+ if (live_regs_mask & (1 << i))
+ n -= 4;
+ return n + total_auto_space;
+ }
+
+ abort ();
+}
+
+/* Handle machine specific pragmas to be semi-compatible with Hitachi
+ compiler. */
+
+int
+sh_handle_pragma (p_getc, p_ungetc, pname)
+ int (* p_getc) PROTO((void));
+ void (* p_ungetc) PROTO((int));
+ char * pname;
+{
+ int retval = 0;
+
+ if (strcmp (pname, "interrupt") == 0)
+ pragma_interrupt = retval = 1;
+ else if (strcmp (pname, "trapa") == 0)
+ pragma_interrupt = pragma_trapa = retval = 1;
+ else if (strcmp (pname, "nosave_low_regs") == 0)
+ pragma_nosave_low_regs = retval = 1;
+
+ return retval;
+}
+
+/* Generate 'handle_interrupt' attribute for decls */
+
+void
+sh_pragma_insert_attributes (node, attributes, prefix)
+ tree node;
+ tree * attributes;
+ tree * prefix;
+{
+ tree a;
+
+ if (! pragma_interrupt
+ || TREE_CODE (node) != FUNCTION_DECL)
+ return;
+
+ /* We are only interested in fields. */
+ if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
+ return;
+
+ /* Add a 'handle_interrupt' attribute. */
+ * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
+
+ return;
+}
+
+/* Return nonzero if ATTR is a valid attribute for DECL.
+ ATTRIBUTES are any existing attributes and ARGS are the arguments
+ supplied with ATTR.
+
+ Supported attributes:
+
+ interrupt_handler -- specifies this function is an interrupt handler.
+
+ sp_switch -- specifies an alternate stack for an interrupt handler
+ to run on.
+
+ trap_exit -- use a trapa to exit an interrupt function instead of
+ an rte instruction. */
+
+int
+sh_valid_machine_decl_attribute (decl, attributes, attr, args)
+ tree decl;
+ tree attributes;
+ tree attr;
+ tree args;
+{
+ int retval = 0;
+
+ if (TREE_CODE (decl) != FUNCTION_DECL)
+ return 0;
+
+ if (is_attribute_p ("interrupt_handler", attr))
+ {
+ return 1;
+ }
+
+ if (is_attribute_p ("sp_switch", attr))
+ {
+ /* The sp_switch attribute only has meaning for interrupt functions. */
+ if (!pragma_interrupt)
+ return 0;
+
+ /* sp_switch must have an argument. */
+ if (!args || TREE_CODE (args) != TREE_LIST)
+ return 0;
+
+ /* The argument must be a constant string. */
+ if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+ return 0;
+
+ sp_switch = gen_rtx (SYMBOL_REF, VOIDmode,
+ TREE_STRING_POINTER (TREE_VALUE (args)));
+ return 1;
+ }
+
+ if (is_attribute_p ("trap_exit", attr))
+ {
+ /* The trap_exit attribute only has meaning for interrupt functions. */
+ if (!pragma_interrupt)
+ return 0;
+
+ /* trap_exit must have an argument. */
+ if (!args || TREE_CODE (args) != TREE_LIST)
+ return 0;
+
+ /* The argument must be a constant integer. */
+ if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+ return 0;
+
+ trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
+ return 1;
+ }
+}
+
+
+/* Predicates used by the templates. */
+
+/* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
+ Used only in general_movsrc_operand. */
+
+int
+system_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ switch (REGNO (op))
+ {
+ case PR_REG:
+ case MACL_REG:
+ case MACH_REG:
+ return 1;
+ }
+ return 0;
+}
+
+/* Returns 1 if OP can be source of a simple move operation.
+ Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
+ invalid as are subregs of system registers. */
+
+int
+general_movsrc_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_CODE (op) == MEM)
+ {
+ rtx inside = XEXP (op, 0);
+ if (GET_CODE (inside) == CONST)
+ inside = XEXP (inside, 0);
+
+ if (GET_CODE (inside) == LABEL_REF)
+ return 1;
+
+ if (GET_CODE (inside) == PLUS
+ && GET_CODE (XEXP (inside, 0)) == LABEL_REF
+ && GET_CODE (XEXP (inside, 1)) == CONST_INT)
+ return 1;
+
+ /* Only post inc allowed. */
+ if (GET_CODE (inside) == PRE_DEC)
+ return 0;
+ }
+
+ if ((mode == QImode || mode == HImode)
+ && (GET_CODE (op) == SUBREG
+ && GET_CODE (XEXP (op, 0)) == REG
+ && system_reg_operand (XEXP (op, 0), mode)))
+ return 0;
+
+ return general_operand (op, mode);
+}
+
+/* Returns 1 if OP can be a destination of a move.
+ Same as general_operand, but no preinc allowed. */
+
+int
+general_movdst_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ /* Only pre dec allowed. */
+ if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
+ return 0;
+
+ return general_operand (op, mode);
+}
+
+/* Returns 1 if OP is a normal arithmetic register. */
+
+int
+arith_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ {
+ int regno;
+
+ if (GET_CODE (op) == REG)
+ regno = REGNO (op);
+ else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+ regno = REGNO (SUBREG_REG (op));
+ else
+ return 1;
+
+ return (regno != T_REG && regno != PR_REG
+ && (regno != FPUL_REG || TARGET_SH4)
+ && regno != MACH_REG && regno != MACL_REG);
+ }
+ return 0;
+}
+
+int
+fp_arith_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ {
+ int regno;
+
+ if (GET_CODE (op) == REG)
+ regno = REGNO (op);
+ else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+ regno = REGNO (SUBREG_REG (op));
+ else
+ return 1;
+
+ return (regno != T_REG && regno != PR_REG && regno > 15
+ && regno != MACH_REG && regno != MACL_REG);
+ }
+ return 0;
+}
+
+int
+fp_extended_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode)
+ {
+ op = XEXP (op, 0);
+ mode = GET_MODE (op);
+ }
+ return fp_arith_reg_operand (op, mode);
+}
+
+/* Returns 1 if OP is a valid source operand for an arithmetic insn. */
+
+int
+arith_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (arith_reg_operand (op, mode))
+ return 1;
+
+ if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
+ return 1;
+
+ return 0;
+}
+
+/* Returns 1 if OP is a valid source operand for a compare insn. */
+
+int
+arith_reg_or_0_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (arith_reg_operand (op, mode))
+ return 1;
+
+ if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
+ return 1;
+
+ return 0;
+}
+
+/* Returns 1 if OP is a valid source operand for a logical operation. */
+
+int
+logical_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (arith_reg_operand (op, mode))
+ return 1;
+
+ if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
+ return 1;
+
+ return 0;
+}
+
+/* Nonzero if OP is a floating point value with value 0.0. */
+
+int
+fp_zero_operand (op)
+ rtx op;
+{
+ REAL_VALUE_TYPE r;
+
+ if (GET_MODE (op) != SFmode)
+ return 0;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+ return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
+}
+
+/* Nonzero if OP is a floating point value with value 1.0. */
+
+int
+fp_one_operand (op)
+ rtx op;
+{
+ REAL_VALUE_TYPE r;
+
+ if (GET_MODE (op) != SFmode)
+ return 0;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+ return REAL_VALUES_EQUAL (r, dconst1);
+}
+
+int
+tertiary_reload_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ enum rtx_code code = GET_CODE (op);
+ return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
+}
+
+int
+fpscr_operand (op)
+ rtx op;
+{
+ return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
+ && GET_MODE (op) == PSImode);
+}
+
+int
+commutative_float_operator (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ case PLUS:
+ case MULT:
+ return 1;
+ }
+ return 0;
+}
+
+int
+noncommutative_float_operator (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ case MINUS:
+ case DIV:
+ return 1;
+ }
+ return 0;
+}
+
+int
+binary_float_operator (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ case PLUS:
+ case MINUS:
+ case MULT:
+ case DIV:
+ return 1;
+ }
+ return 0;
+}
+
+/* Return the destination address of a branch. */
+
+int
+branch_dest (branch)
+ rtx branch;
+{
+ rtx dest = SET_SRC (PATTERN (branch));
+ int dest_uid;
+
+ if (GET_CODE (dest) == IF_THEN_ELSE)
+ dest = XEXP (dest, 1);
+ dest = XEXP (dest, 0);
+ dest_uid = INSN_UID (dest);
+ return insn_addresses[dest_uid];
+}
+
+/* Return non-zero if REG is not used after INSN.
+ We assume REG is a reload reg, and therefore does
+ not live past labels. It may live past calls or jumps though. */
+int
+reg_unused_after (reg, insn)
+ rtx reg;
+ rtx insn;
+{
+ enum rtx_code code;
+ rtx set;
+
+ /* If the reg is set by this instruction, then it is safe for our
+ case. Disregard the case where this is a store to memory, since
+ we are checking a register used in the store address. */
+ set = single_set (insn);
+ if (set && GET_CODE (SET_DEST (set)) != MEM
+ && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+ return 1;
+
+ while (insn = NEXT_INSN (insn))
+ {
+ code = GET_CODE (insn);
+
+#if 0
+ /* If this is a label that existed before reload, then the register
+ if dead here. However, if this is a label added by reorg, then
+ the register may still be live here. We can't tell the difference,
+ so we just ignore labels completely. */
+ if (code == CODE_LABEL)
+ return 1;
+ /* else */
+#endif
+
+ if (code == JUMP_INSN)
+ return 0;
+
+ /* If this is a sequence, we must handle them all at once.
+ We could have for instance a call that sets the target register,
+ and a insn in a delay slot that uses the register. In this case,
+ we must return 0. */
+ else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+ {
+ int i;
+ int retval = 0;
+
+ for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+ {
+ rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+ rtx set = single_set (this_insn);
+
+ if (GET_CODE (this_insn) == CALL_INSN)
+ code = CALL_INSN;
+ else if (GET_CODE (this_insn) == JUMP_INSN)
+ {
+ if (INSN_ANNULLED_BRANCH_P (this_insn))
+ return 0;
+ code = JUMP_INSN;
+ }
+
+ if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+ return 0;
+ if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+ {
+ if (GET_CODE (SET_DEST (set)) != MEM)
+ retval = 1;
+ else
+ return 0;
+ }
+ if (set == 0
+ && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+ return 0;
+ }
+ if (retval == 1)
+ return 1;
+ else if (code == JUMP_INSN)
+ return 0;
+ }
+ else if (GET_RTX_CLASS (code) == 'i')
+ {
+ rtx set = single_set (insn);
+
+ if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+ return 0;
+ if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+ return GET_CODE (SET_DEST (set)) != MEM;
+ if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+ return 0;
+ }
+
+ if (code == CALL_INSN && call_used_regs[REGNO (reg)])
+ return 1;
+ }
+ return 1;
+}
+
+extern struct obstack permanent_obstack;
+
+rtx
+get_fpscr_rtx ()
+{
+ static rtx fpscr_rtx;
+
+ if (! fpscr_rtx)
+ {
+ push_obstacks (&permanent_obstack, &permanent_obstack);
+ fpscr_rtx = gen_rtx (REG, PSImode, 48);
+ REG_USERVAR_P (fpscr_rtx) = 1;
+ pop_obstacks ();
+ mark_user_reg (fpscr_rtx);
+ }
+ if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
+ mark_user_reg (fpscr_rtx);
+ return fpscr_rtx;
+}
+
+void
+emit_sf_insn (pat)
+ rtx pat;
+{
+ rtx addr;
+ /* When generating reload insns, we must not create new registers. FPSCR
+ should already have the correct value, so do nothing to change it. */
+ if (! TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch0 (addr));
+ }
+ emit_insn (pat);
+ if (! TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch1 (addr));
+ }
+}
+
+void
+emit_df_insn (pat)
+ rtx pat;
+{
+ rtx addr;
+ if (TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch0 (addr));
+ }
+ emit_insn (pat);
+ if (TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch1 (addr));
+ }
+}
+
+void
+expand_sf_unop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_sf_binop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
+ get_fpscr_rtx ()));
+}
+
+void
+expand_df_unop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_df_binop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
+ get_fpscr_rtx ()));
+}
+
+void
+expand_fp_branch (compare, branch)
+ rtx (*compare) (), (*branch) ();
+{
+ (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn)
+ ((*compare) ());
+ emit_jump_insn ((*branch) ());
+}
+
+/* We don't want to make fpscr call-saved, because that would prevent
+ channging it, and it would also cost an exstra instruction to save it.
+ We don't want it to be known as a global register either, because
+ that disables all flow analysis. But it has to be live at the function
+ return. Thus, we need to insert a USE at the end of the function. */
+/* This should best be called at about the time FINALIZE_PIC is called,
+ but not dependent on flag_pic. Alas, there is no suitable hook there,
+ so this gets called from HAVE_RETURN. */
+int
+emit_fpscr_use ()
+{
+ static int fpscr_uses = 0;
+
+ if (rtx_equal_function_value_matters)
+ {
+ emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ()));
+ fpscr_uses++;
+ }
+ else
+ {
+ if (fpscr_uses > 1)
+ {
+ /* Due to he crude way we emit the USEs, we might end up with
+ some extra ones. Delete all but the last one. */
+ rtx insn;
+
+ for (insn = get_last_insn(); insn; insn = PREV_INSN (insn))
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == USE
+ && GET_CODE (XEXP (PATTERN (insn), 0)) == REG
+ && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
+ {
+ insn = PREV_INSN (insn);
+ break;
+ }
+ for (; insn; insn = PREV_INSN (insn))
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == USE
+ && GET_CODE (XEXP (PATTERN (insn), 0)) == REG
+ && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
+ {
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ }
+ }
+ fpscr_uses = 0;
+ }
+}
+
+/* ??? gcc does flow analysis strictly after common subexpression
+ elimination. As a result, common subespression elimination fails
+ when there are some intervening statements setting the same register.
+ If we did nothing about this, this would hurt the precision switching
+ for SH4 badly. There is some cse after reload, but it is unable to
+ undo the extra register pressure from the unused instructions, and
+ it cannot remove auto-increment loads.
+
+ A C code example that shows this flow/cse weakness for (at least) SH
+ and sparc (as of gcc ss-970706) is this:
+
+double
+f(double a)
+{
+ double d;
+ d = 0.1;
+ a += d;
+ d = 1.1;
+ d = 0.1;
+ a *= d;
+ return a;
+}
+
+ So we add another pass before common subexpression elimination, to
+ remove assignments that are dead due to a following assignment in the
+ same basic block. */
+
+int sh_flag_remove_dead_before_cse;
+
+static void
+mark_use (x, reg_set_block)
+ rtx x, *reg_set_block;
+{
+ enum rtx_code code;
+
+ if (! x)
+ return;
+ code = GET_CODE (x);
+ switch (code)
+ {
+ case REG:
+ {
+ int regno = REGNO (x);
+ int nregs = (regno < FIRST_PSEUDO_REGISTER
+ ? HARD_REGNO_NREGS (regno, GET_MODE (x))
+ : 1);
+ do
+ {
+ reg_set_block[regno + nregs - 1] = 0;
+ }
+ while (--nregs);
+ break;
+ }
+ case SET:
+ {
+ rtx dest = SET_DEST (x);
+
+ if (GET_CODE (dest) == SUBREG)
+ dest = SUBREG_REG (dest);
+ if (GET_CODE (dest) != REG)
+ mark_use (dest, reg_set_block);
+ mark_use (SET_SRC (x), reg_set_block);
+ break;
+ }
+ case CLOBBER:
+ break;
+ default:
+ {
+ char *fmt = GET_RTX_FORMAT (code);
+ int i, j;
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'e')
+ mark_use (XEXP (x, i), reg_set_block);
+ else if (fmt[i] == 'E')
+ for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+ mark_use (XVECEXP (x, i, j), reg_set_block);
+ }
+ break;
+ }
+ }
+}
+
+int
+remove_dead_before_cse ()
+{
+ rtx *reg_set_block, last, last_call, insn, set;
+ int in_libcall = 0;
+
+ /* This pass should run just once, after rtl generation. */
+
+ if (! sh_flag_remove_dead_before_cse
+ || rtx_equal_function_value_matters
+ || reload_completed)
+ return;
+
+ sh_flag_remove_dead_before_cse = 0;
+
+ reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx));
+ bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx));
+ last_call = last = get_last_insn ();
+ for (insn = last; insn; insn = PREV_INSN (insn))
+ {
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ continue;
+ if (GET_CODE (insn) == JUMP_INSN)
+ {
+ last_call = last = insn;
+ continue;
+ }
+ set = single_set (insn);
+
+ /* Don't delete parts of libcalls, since that would confuse cse, loop
+ and flow. */
+ if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
+ in_libcall = 1;
+ else if (in_libcall)
+ {
+ if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
+ in_libcall = 0;
+ }
+ else if (set && GET_CODE (SET_DEST (set)) == REG)
+ {
+ int regno = REGNO (SET_DEST (set));
+ rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno]
+ ? last_call
+ : last);
+ if (reg_set_block[regno] == ref_insn
+ && (regno >= FIRST_PSEUDO_REGISTER
+ || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1)
+ && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn)))
+ {
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ continue;
+ }
+ else
+ reg_set_block[REGNO (SET_DEST (set))] = ref_insn;
+ }
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ last_call = insn;
+ mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block);
+ }
+ mark_use (PATTERN (insn), reg_set_block);
+ }
+ return 0;
+}
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
new file mode 100755
index 0000000..eff316a
--- /dev/null
+++ b/gcc/config/sh/sh.h
@@ -0,0 +1,2232 @@
+/* Definitions of target machine for GNU compiler for Hitachi Super-H.
+ Copyright (C) 1993-1998 Free Software Foundation, Inc.
+ Contributed by Steve Chamberlain (sac@cygnus.com).
+ Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+
+#define TARGET_VERSION \
+ fputs (" (Hitachi SH)", stderr);
+
+/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h. We can't
+ include it here, because hconfig.h is also included by gencodes.c . */
+extern int code_for_indirect_jump_scratch;
+
+/* Generate SDB debugging information. */
+
+#define SDB_DEBUGGING_INFO
+
+/* Output DBX (stabs) debugging information if doing -gstabs. */
+
+#include "dbxcoff.h"
+
+#define SDB_DELIM ";"
+
+#define CPP_SPEC "%{ml:-D__LITTLE_ENDIAN__} \
+%{m1:-D__sh1__} \
+%{m2:-D__sh2__} \
+%{m3:-D__sh3__} \
+%{m3e:-D__SH3E__} \
+%{m4-single-only:-D__SH4_SINGLE_ONLY__} \
+%{m4-single:-D__SH4_SINGLE__} \
+%{m4:-D__SH4__} \
+%{!m1:%{!m2:%{!m3:%{!m3e:%{!m4:%{!m4-single:%{!m4-single-only:-D__sh1__}}}}}}}"
+
+#define CPP_PREDEFINES "-D__sh__ -Acpu(sh) -Amachine(sh)"
+
+#define ASM_SPEC "%{ml:-little} %{mrelax:-relax}"
+
+#define LINK_SPEC "%{ml:-m shl} %{mrelax:-relax}"
+
+/* We can not debug without a frame pointer. */
+/* #define CAN_DEBUG_WITHOUT_FP */
+
+#define CONDITIONAL_REGISTER_USAGE \
+ if (! TARGET_SH4 || ! TARGET_FMOVD) \
+ { \
+ int regno; \
+ for (regno = FIRST_XD_REG; regno <= LAST_XD_REG; regno++) \
+ fixed_regs[regno] = call_used_regs[regno] = 1; \
+ if (! TARGET_SH4) \
+ { \
+ if (! TARGET_SH3E) \
+ { \
+ int regno; \
+ for (regno = FIRST_FP_REG; regno <= LAST_FP_REG; regno++) \
+ fixed_regs[regno] = call_used_regs[regno] = 1; \
+ fixed_regs[FPUL_REG] = call_used_regs[FPUL_REG] = 1; \
+ } \
+ } \
+ } \
+ /* Hitachi saves and restores mac registers on call. */ \
+ if (TARGET_HITACHI) \
+ { \
+ call_used_regs[MACH_REG] = 0; \
+ call_used_regs[MACL_REG] = 0; \
+ }
+
+/* ??? Need to write documentation for all SH options and add it to the
+ invoke.texi file. */
+
+/* Run-time compilation parameters selecting different hardware subsets. */
+
+extern int target_flags;
+#define ISIZE_BIT (1<<1)
+#define DALIGN_BIT (1<<6)
+#define SH1_BIT (1<<8)
+#define SH2_BIT (1<<9)
+#define SH3_BIT (1<<10)
+#define SH3E_BIT (1<<11)
+#define HARD_SH4_BIT (1<<5)
+#define FPU_SINGLE_BIT (1<<7)
+#define SH4_BIT (1<<12)
+#define FMOVD_BIT (1<<4)
+#define SPACE_BIT (1<<13)
+#define BIGTABLE_BIT (1<<14)
+#define RELAX_BIT (1<<15)
+#define HITACHI_BIT (1<<22)
+#define PADSTRUCT_BIT (1<<28)
+#define LITTLE_ENDIAN_BIT (1<<29)
+#define IEEE_BIT (1<<30)
+
+/* Nonzero if we should dump out instruction size info. */
+#define TARGET_DUMPISIZE (target_flags & ISIZE_BIT)
+
+/* Nonzero to align doubles on 64 bit boundaries. */
+#define TARGET_ALIGN_DOUBLE (target_flags & DALIGN_BIT)
+
+/* Nonzero if we should generate code using type 1 insns. */
+#define TARGET_SH1 (target_flags & SH1_BIT)
+
+/* Nonzero if we should generate code using type 2 insns. */
+#define TARGET_SH2 (target_flags & SH2_BIT)
+
+/* Nonzero if we should generate code using type 3 insns. */
+#define TARGET_SH3 (target_flags & SH3_BIT)
+
+/* Nonzero if we should generate code using type 3E insns. */
+#define TARGET_SH3E (target_flags & SH3E_BIT)
+
+/* Nonzero if the cache line size is 32. */
+#define TARGET_CACHE32 (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if we schedule for a superscalar implementation. */
+#define TARGET_SUPERSCALAR (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if the target has separate instruction and data caches. */
+#define TARGET_HARVARD (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if compiling for SH4 hardware (to be used for insn costs etc.) */
+#define TARGET_HARD_SH4 (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if the default precision of th FPU is single */
+#define TARGET_FPU_SINGLE (target_flags & FPU_SINGLE_BIT)
+
+/* Nonzero if we should generate code using type 4 insns. */
+#define TARGET_SH4 (target_flags & SH4_BIT)
+
+/* Nonzero if we should generate fmovd. */
+#define TARGET_FMOVD (target_flags & FMOVD_BIT)
+
+/* Nonzero if we respect NANs. */
+#define TARGET_IEEE (target_flags & IEEE_BIT)
+
+/* Nonzero if we should generate smaller code rather than faster code. */
+#define TARGET_SMALLCODE (target_flags & SPACE_BIT)
+
+/* Nonzero to use long jump tables. */
+#define TARGET_BIGTABLE (target_flags & BIGTABLE_BIT)
+
+/* Nonzero to generate pseudo-ops needed by the assembler and linker
+ to do function call relaxing. */
+#define TARGET_RELAX (target_flags & RELAX_BIT)
+
+/* Nonzero if using Hitachi's calling convention. */
+#define TARGET_HITACHI (target_flags & HITACHI_BIT)
+
+/* Nonzero if padding structures to a multiple of 4 bytes. This is
+ incompatible with Hitachi's compiler, and gives unusual structure layouts
+ which confuse programmers.
+ ??? This option is not useful, but is retained in case there are people
+ who are still relying on it. It may be deleted in the future. */
+#define TARGET_PADSTRUCT (target_flags & PADSTRUCT_BIT)
+
+/* Nonzero if generating code for a little endian SH. */
+#define TARGET_LITTLE_ENDIAN (target_flags & LITTLE_ENDIAN_BIT)
+
+#define TARGET_SWITCHES \
+{ {"1", SH1_BIT}, \
+ {"2", SH2_BIT}, \
+ {"3", SH3_BIT|SH2_BIT}, \
+ {"3e", SH3E_BIT|SH3_BIT|SH2_BIT|FPU_SINGLE_BIT}, \
+ {"4-single-only", SH3E_BIT|SH3_BIT|SH2_BIT|SH3E_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT}, \
+ {"4-single", SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT},\
+ {"4", SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT}, \
+ {"b", -LITTLE_ENDIAN_BIT}, \
+ {"bigtable", BIGTABLE_BIT}, \
+ {"dalign", DALIGN_BIT}, \
+ {"fmovd", FMOVD_BIT}, \
+ {"hitachi", HITACHI_BIT}, \
+ {"ieee", IEEE_BIT}, \
+ {"isize", ISIZE_BIT}, \
+ {"l", LITTLE_ENDIAN_BIT}, \
+ {"no-ieee", -IEEE_BIT}, \
+ {"padstruct", PADSTRUCT_BIT}, \
+ {"relax", RELAX_BIT}, \
+ {"space", SPACE_BIT}, \
+ SUBTARGET_SWITCHES \
+ {"", TARGET_DEFAULT} \
+}
+
+/* This are meant to be redefined in the host dependent files */
+#define SUBTARGET_SWITCHES
+
+#define TARGET_DEFAULT (0)
+
+#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) \
+do { \
+ if (LEVEL) \
+ flag_omit_frame_pointer = -1; \
+ if (LEVEL) \
+ sh_flag_remove_dead_before_cse = 1; \
+ if (SIZE) \
+ target_flags |= SPACE_BIT; \
+} while (0)
+
+#define ASSEMBLER_DIALECT assembler_dialect
+
+extern int assembler_dialect;
+
+#define OVERRIDE_OPTIONS \
+do { \
+ sh_cpu = CPU_SH1; \
+ assembler_dialect = 0; \
+ if (TARGET_SH2) \
+ sh_cpu = CPU_SH2; \
+ if (TARGET_SH3) \
+ sh_cpu = CPU_SH3; \
+ if (TARGET_SH3E) \
+ sh_cpu = CPU_SH3E; \
+ if (TARGET_SH4) \
+ { \
+ assembler_dialect = 1; \
+ sh_cpu = CPU_SH4; \
+ } \
+ if (! TARGET_SH4 || ! TARGET_FMOVD) \
+ { \
+ /* Prevent usage of explicit register names for variables \
+ for registers not present / not addressable in the \
+ target architecture. */ \
+ int regno; \
+ for (regno = (TARGET_SH3E) ? 17 : 0; \
+ regno <= 24; regno++) \
+ fp_reg_names[regno][0] = 0; \
+ } \
+ if (flag_omit_frame_pointer < 0) \
+ /* The debugging information is sufficient, \
+ but gdb doesn't implement this yet */ \
+ if (0) \
+ flag_omit_frame_pointer \
+ = (PREFERRED_DEBUGGING_TYPE == DWARF_DEBUG \
+ || PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG); \
+ else \
+ flag_omit_frame_pointer = 0; \
+ \
+ /* Never run scheduling before reload, since that can \
+ break global alloc, and generates slower code anyway due \
+ to the pressure on R0. */ \
+ flag_schedule_insns = 0; \
+ sh_addr_diff_vec_mode = TARGET_BIGTABLE ? SImode : HImode; \
+} while (0)
+
+/* Target machine storage layout. */
+
+/* Define to use software floating point emulator for REAL_ARITHMETIC and
+ decimal <-> binary conversion. */
+#define REAL_ARITHMETIC
+
+/* Define this if most significant bit is lowest numbered
+ in instructions that operate on numbered bit-fields. */
+
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+ numbered. */
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this to set the endianness to use in libgcc2.c, which can
+ not depend on target_flags. */
+#if defined(__LITTLE_ENDIAN__)
+#define LIBGCC2_WORDS_BIG_ENDIAN 0
+#else
+#define LIBGCC2_WORDS_BIG_ENDIAN 1
+#endif
+
+/* Number of bits in an addressable storage unit. */
+#define BITS_PER_UNIT 8
+
+/* Width in bits of a "word", which is the contents of a machine register.
+ Note that this is not necessarily the width of data type `int';
+ if using 16-bit ints on a 68000, this would still be 32.
+ But on a machine with 16-bit registers, this would be 16. */
+#define BITS_PER_WORD 32
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes). */
+#define UNITS_PER_WORD 4
+
+/* Width in bits of a pointer.
+ See also the macro `Pmode' defined below. */
+#define POINTER_SIZE 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list. */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned. */
+#define STACK_BOUNDARY BIGGEST_ALIGNMENT
+
+/* The log (base 2) of the cache line size, in bytes. Processors prior to
+ SH3 have no actual cache, but they fetch code in chunks of 4 bytes. */
+#define CACHE_LOG (TARGET_CACHE32 ? 5 : TARGET_SH3 ? 4 : 2)
+
+/* Allocation boundary (in *bits*) for the code of a function.
+ 32 bit alignment is faster, because instructions are always fetched as a
+ pair from a longword boundary. */
+#define FUNCTION_BOUNDARY (TARGET_SMALLCODE ? 16 : (1 << CACHE_LOG) * 8)
+
+/* Alignment of field after `int : 0' in a structure. */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* No data type wants to be aligned rounder than this. */
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice. */
+#define FASTEST_ALIGNMENT 32
+
+/* Make strings word-aligned so strcpy from constants will be faster. */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) \
+ ((TREE_CODE (EXP) == STRING_CST \
+ && (ALIGN) < FASTEST_ALIGNMENT) \
+ ? FASTEST_ALIGNMENT : (ALIGN))
+
+#ifndef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT 128
+#endif
+
+/* Make arrays of chars word-aligned for the same reasons. */
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+ (TREE_CODE (TYPE) == ARRAY_TYPE \
+ && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \
+ && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Number of bits which any structure or union's size must be a
+ multiple of. Each structure or union's size is rounded up to a
+ multiple of this. */
+#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8)
+
+/* Set this nonzero if move instructions will actually fail to work
+ when given unaligned data. */
+#define STRICT_ALIGNMENT 1
+
+/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm. */
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
+ barrier_align (LABEL_AFTER_BARRIER)
+
+#define LOOP_ALIGN(A_LABEL) \
+ ((! optimize || TARGET_HARVARD || TARGET_SMALLCODE) \
+ ? 0 : sh_loop_align (A_LABEL))
+
+#define LABEL_ALIGN(A_LABEL) \
+( \
+ (PREV_INSN (A_LABEL) \
+ && GET_CODE (PREV_INSN (A_LABEL)) == INSN \
+ && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE \
+ && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == 1) \
+ /* explicit alignment insn in constant tables. */ \
+ ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0)) \
+ : 0)
+
+/* Jump tables must be 32 bit aligned, no matter the size of the element. */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* The base two logarithm of the known minimum alignment of an insn length. */
+#define INSN_LENGTH_ALIGNMENT(A_INSN) \
+ (GET_CODE (A_INSN) == INSN \
+ ? 1 \
+ : GET_CODE (A_INSN) == JUMP_INSN || GET_CODE (A_INSN) == CALL_INSN \
+ ? 1 \
+ : CACHE_LOG)
+
+/* Standard register usage. */
+
+/* Register allocation for the Hitachi calling convention:
+
+ r0 arg return
+ r1..r3 scratch
+ r4..r7 args in
+ r8..r13 call saved
+ r14 frame pointer/call saved
+ r15 stack pointer
+ ap arg pointer (doesn't really exist, always eliminated)
+ pr subroutine return address
+ t t bit
+ mach multiply/accumulate result, high part
+ macl multiply/accumulate result, low part.
+ fpul fp/int communication register
+ rap return address pointer register
+ fr0 fp arg return
+ fr1..fr3 scratch floating point registers
+ fr4..fr11 fp args in
+ fr12..fr15 call saved floating point registers */
+
+/* Number of actual hardware registers.
+ The hardware registers are assigned numbers for the compiler
+ from 0 to just below FIRST_PSEUDO_REGISTER.
+ All registers that the compiler knows about must be given numbers,
+ even those that are not normally considered general registers. */
+
+#define AP_REG 16
+#define PR_REG 17
+#define T_REG 18
+#define GBR_REG 19
+#define MACH_REG 20
+#define MACL_REG 21
+#define SPECIAL_REG(REGNO) ((REGNO) >= 18 && (REGNO) <= 21)
+#define FPUL_REG 22
+#define RAP_REG 23
+#define FIRST_FP_REG 24
+#define LAST_FP_REG 39
+#define FIRST_XD_REG 40
+#define LAST_XD_REG 47
+#define FPSCR_REG 48
+
+#define FIRST_PSEUDO_REGISTER 49
+
+/* 1 for registers that have pervasive standard uses
+ and are not available for the register allocator.
+
+ Mach register is fixed 'cause it's only 10 bits wide for SH1.
+ It is 32 bits wide for SH2. */
+
+#define FIXED_REGISTERS \
+ { 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 0, 1, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 1, \
+}
+
+/* 1 for registers not available across function calls.
+ These must include the FIXED_REGISTERS and also any
+ registers that can be used without being saved.
+ The latter must include the registers where values are returned
+ and the register where structure-value addresses are passed.
+ Aside from that, you can include as many other registers as you like. */
+
+#define CALL_USED_REGISTERS \
+ { 1, 1, 1, 1, \
+ 1, 1, 1, 1, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 1, \
+ 1, 0, 1, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 1, 1, \
+ 0, 0, 0, 0, \
+ 1, 1, 1, 1, \
+ 1, 1, 0, 0, \
+ 1, \
+}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+ to hold something of mode MODE.
+ This is ordinarily the length in words of a value of mode MODE
+ but can be less for certain modes in special long registers.
+
+ On the SH all but the XD regs are UNITS_PER_WORD bits wide. */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+ ((REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \
+ ? (GET_MODE_SIZE (MODE) / (2 * UNITS_PER_WORD)) \
+ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) \
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+ We can allow any mode in any general register. The special registers
+ only allow SImode. Don't allow any mode in the PR. */
+
+/* We cannot hold DCmode values in the XD registers because alter_reg
+ handles subregs of them incorrectly. We could work around this by
+ spacing the XD registers like the DR registers, but this would require
+ additional memory in every compilation to hold larger register vectors.
+ We could hold SFmode / SCmode values in XD registers, but that
+ would require a tertiary reload when reloading from / to memory,
+ and a secondary reload to reload from / to general regs; that
+ seems to be a loosing proposition. */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ (SPECIAL_REG (REGNO) ? (MODE) == SImode \
+ : (REGNO) == FPUL_REG ? (MODE) == SImode || (MODE) == SFmode \
+ : (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG && (MODE) == SFmode \
+ ? 1 \
+ : (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG \
+ ? ((MODE) == SFmode \
+ || (TARGET_SH3E && (MODE) == SCmode) \
+ || (((TARGET_SH4 && (MODE) == DFmode) || (MODE) == DCmode) \
+ && (((REGNO) - FIRST_FP_REG) & 1) == 0)) \
+ : (REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \
+ ? (MODE) == DFmode \
+ : (REGNO) == PR_REG ? 0 \
+ : (REGNO) == FPSCR_REG ? (MODE) == PSImode \
+ : 1)
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+ when one has mode MODE1 and one has mode MODE2.
+ If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+ for any hard reg, then this must be 0 for correct output. */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+ ((MODE1) == (MODE2) || GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+/* Specify the registers used for certain standard purposes.
+ The values of these macros are register numbers. */
+
+/* Define this if the program counter is overloaded on a register. */
+/* #define PC_REGNUM 15*/
+
+/* Register to use for pushing function arguments. */
+#define STACK_POINTER_REGNUM 15
+
+/* Base register for access to local variables of the function. */
+#define FRAME_POINTER_REGNUM 14
+
+/* Fake register that holds the address on the stack of the
+ current function's return address. */
+#define RETURN_ADDRESS_POINTER_REGNUM 23
+
+/* Value should be nonzero if functions must have frame pointers.
+ Zero means the frame pointer need not be set up (and parms may be accessed
+ via the stack pointer) in functions that seem suitable. */
+
+#define FRAME_POINTER_REQUIRED 0
+
+/* Definitions for register eliminations.
+
+ We have three registers that can be eliminated on the SH. First, the
+ frame pointer register can often be eliminated in favor of the stack
+ pointer register. Secondly, the argument pointer register can always be
+ eliminated; it is replaced with either the stack or frame pointer.
+ Third, there is the return address pointer, which can also be replaced
+ with either the stack or the frame pointer. */
+
+/* This is an array of structures. Each structure initializes one pair
+ of eliminable registers. The "from" register number is given first,
+ followed by "to". Eliminations of the same "from" register are listed
+ in order of preference. */
+
+/* If you add any registers here that are not actually hard registers,
+ and that have any alternative of elimination that doesn't always
+ apply, you need to amend calc_live_regs to exclude it, because
+ reload spills all eliminable registers where it sees an
+ can_eliminate == 0 entry, thus making them 'live' .
+ If you add any hard registers that can be eliminated in different
+ ways, you have to patch reload to spill them only when all alternatives
+ of elimination fail. */
+
+#define ELIMINABLE_REGS \
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { RETURN_ADDRESS_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},}
+
+/* Given FROM and TO register numbers, say whether this elimination
+ is allowed. */
+#define CAN_ELIMINATE(FROM, TO) \
+ (!((FROM) == FRAME_POINTER_REGNUM && FRAME_POINTER_REQUIRED))
+
+/* Define the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ OFFSET = initial_elimination_offset ((FROM), (TO))
+
+/* Base register for access to arguments of the function. */
+#define ARG_POINTER_REGNUM 16
+
+/* Register in which the static-chain is passed to a function. */
+#define STATIC_CHAIN_REGNUM 13
+
+/* The register in which a struct value address is passed. */
+
+#define STRUCT_VALUE_REGNUM 2
+
+/* If the structure value address is not passed in a register, define
+ `STRUCT_VALUE' as an expression returning an RTX for the place
+ where the address is passed. If it returns 0, the address is
+ passed as an "invisible" first argument. */
+
+/*#define STRUCT_VALUE ((rtx)0)*/
+
+/* Don't default to pcc-struct-return, because we have already specified
+ exactly how to return structures in the RETURN_IN_MEMORY macro. */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Define the classes of registers for register constraints in the
+ machine description. Also define ranges of constants.
+
+ One of the classes must always be named ALL_REGS and include all hard regs.
+ If there is more than one class, another class must be named NO_REGS
+ and contain no registers.
+
+ The name GENERAL_REGS must be the name of a class (or an alias for
+ another name such as ALL_REGS). This is the class of registers
+ that is allowed by "g" or "r" in a register constraint.
+ Also, registers outside this class are allocated only when
+ instructions express preferences for them.
+
+ The classes must be numbered in nondecreasing order; that is,
+ a larger-numbered class must never be contained completely
+ in a smaller-numbered class.
+
+ For any two classes, it is very desirable that there be another
+ class that represents their union. */
+
+/* The SH has two sorts of general registers, R0 and the rest. R0 can
+ be used as the destination of some of the arithmetic ops. There are
+ also some special purpose registers; the T bit register, the
+ Procedure Return Register and the Multiply Accumulate Registers. */
+/* Place GENERAL_REGS after FPUL_REGS so that it will be preferred by
+ reg_class_subunion. We don't want to have an actual union class
+ of these, because it would only be used when both classes are calculated
+ to give the same cost, but there is only one FPUL register.
+ Besides, regclass fails to notice the different REGISTER_MOVE_COSTS
+ applying to the actual instruction alternative considered. E.g., the
+ y/r alternative of movsi_ie is considered to have no more cost that
+ the r/r alternative, which is patently untrue. */
+
+enum reg_class
+{
+ NO_REGS,
+ R0_REGS,
+ PR_REGS,
+ T_REGS,
+ MAC_REGS,
+ FPUL_REGS,
+ GENERAL_REGS,
+ FP0_REGS,
+ FP_REGS,
+ DF_REGS,
+ FPSCR_REGS,
+ GENERAL_FP_REGS,
+ ALL_REGS,
+ LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file. */
+#define REG_CLASS_NAMES \
+{ \
+ "NO_REGS", \
+ "R0_REGS", \
+ "PR_REGS", \
+ "T_REGS", \
+ "MAC_REGS", \
+ "FPUL_REGS", \
+ "GENERAL_REGS", \
+ "FP0_REGS", \
+ "FP_REGS", \
+ "DF_REGS", \
+ "FPSCR_REGS", \
+ "GENERAL_FP_REGS", \
+ "ALL_REGS", \
+}
+
+/* Define which registers fit in which classes.
+ This is an initializer for a vector of HARD_REG_SET
+ of length N_REG_CLASSES. */
+
+#define REG_CLASS_CONTENTS \
+{ \
+ { 0x00000000, 0x00000000 }, /* NO_REGS */ \
+ { 0x00000001, 0x00000000 }, /* R0_REGS */ \
+ { 0x00020000, 0x00000000 }, /* PR_REGS */ \
+ { 0x00040000, 0x00000000 }, /* T_REGS */ \
+ { 0x00300000, 0x00000000 }, /* MAC_REGS */ \
+ { 0x00400000, 0x00000000 }, /* FPUL_REGS */ \
+ { 0x0081FFFF, 0x00000000 }, /* GENERAL_REGS */ \
+ { 0x01000000, 0x00000000 }, /* FP0_REGS */ \
+ { 0xFF000000, 0x000000FF }, /* FP_REGS */ \
+ { 0xFF000000, 0x0000FFFF }, /* DF_REGS */ \
+ { 0x00000000, 0x00010000 }, /* FPSCR_REGS */ \
+ { 0xFF81FFFF, 0x0000FFFF }, /* GENERAL_FP_REGS */ \
+ { 0xFFFFFFFF, 0x0001FFFF }, /* ALL_REGS */ \
+}
+
+/* The same information, inverted:
+ Return the class number of the smallest class containing
+ reg number REGNO. This could be a conditional expression
+ or could index an array. */
+
+extern int regno_reg_class[];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)]
+
+/* When defined, the compiler allows registers explicitly used in the
+ rtl to be used as spill registers but prevents the compiler from
+ extending the lifetime of these registers. */
+
+#define SMALL_REGISTER_CLASSES 1
+
+/* The order in which register should be allocated. */
+/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo,
+ and GENERAL_FP_REGS the alternate class. Since FP0 is likely to be
+ spilled or used otherwise, we better have the FP_REGS allocated first. */
+#define REG_ALLOC_ORDER \
+ { 25,26,27,28,29,30,31,24,32,33,34,35,36,37,38,39, \
+ 40,41,42,43,44,45,46,47,48, \
+ 1,2,3,7,6,5,4,0,8,9,10,11,12,13,14, \
+ 22,15,16,17,18,19,20,21,23 }
+
+/* The class value for index registers, and the one for base regs. */
+#define INDEX_REG_CLASS R0_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Get reg_class from a letter such as appears in the machine
+ description. */
+extern enum reg_class reg_class_from_letter[];
+
+#define REG_CLASS_FROM_LETTER(C) \
+ ( (C) >= 'a' && (C) <= 'z' ? reg_class_from_letter[(C)-'a'] : NO_REGS )
+
+/* The letters I, J, K, L and M in a register constraint string
+ can be used to stand for particular ranges of immediate operands.
+ This macro defines what the ranges are.
+ C is the letter, and VALUE is a constant value.
+ Return 1 if VALUE is in the range specified by C.
+ I: arithmetic operand -127..128, as used in add, sub, etc
+ K: shift operand 1,2,8 or 16
+ L: logical operand 0..255, as used in and, or, etc.
+ M: constant 1
+ N: constant 0 */
+
+#define CONST_OK_FOR_I(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \
+ && ((HOST_WIDE_INT)(VALUE)) <= 127)
+#define CONST_OK_FOR_K(VALUE) ((VALUE)==1||(VALUE)==2||(VALUE)==8||(VALUE)==16)
+#define CONST_OK_FOR_L(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+ && ((HOST_WIDE_INT)(VALUE)) <= 255)
+#define CONST_OK_FOR_M(VALUE) ((VALUE)==1)
+#define CONST_OK_FOR_N(VALUE) ((VALUE)==0)
+#define CONST_OK_FOR_LETTER_P(VALUE, C) \
+ ((C) == 'I' ? CONST_OK_FOR_I (VALUE) \
+ : (C) == 'K' ? CONST_OK_FOR_K (VALUE) \
+ : (C) == 'L' ? CONST_OK_FOR_L (VALUE) \
+ : (C) == 'M' ? CONST_OK_FOR_M (VALUE) \
+ : (C) == 'N' ? CONST_OK_FOR_N (VALUE) \
+ : 0)
+
+/* Similar, but for floating constants, and defining letters G and H.
+ Here VALUE is the CONST_DOUBLE rtx itself. */
+
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
+((C) == 'G' ? fp_zero_operand (VALUE) \
+ : (C) == 'H' ? fp_one_operand (VALUE) \
+ : (C) == 'F')
+
+/* Given an rtx X being reloaded into a reg required to be
+ in class CLASS, return the class of reg to actually use.
+ In general this is just CLASS; but on some machines
+ in some cases it is preferable to use a more restrictive class. */
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) (CLASS)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS,MODE,X) \
+ ((((((CLASS) == FP_REGS || (CLASS) == FP0_REGS \
+ || (CLASS) == DF_REGS) \
+ && (GET_CODE (X) == REG && REGNO (X) <= AP_REG)) \
+ || (((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS) \
+ && GET_CODE (X) == REG \
+ && REGNO (X) >= FIRST_FP_REG && REGNO (X) <= LAST_FP_REG)) \
+ && MODE == SFmode) \
+ ? FPUL_REGS \
+ : ((CLASS) == FPUL_REGS \
+ && (GET_CODE (X) == MEM \
+ || (GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER)))\
+ ? GENERAL_REGS \
+ : (((CLASS) == MAC_REGS || (CLASS) == PR_REGS) \
+ && GET_CODE (X) == REG && REGNO (X) > 15 \
+ && (CLASS) != REGNO_REG_CLASS (REGNO (X))) \
+ ? GENERAL_REGS : NO_REGS)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X) \
+ ((((CLASS) == FP_REGS || (CLASS) == FP0_REGS || (CLASS) == DF_REGS) \
+ && immediate_operand ((X), (MODE)) \
+ && ! ((fp_zero_operand (X) || fp_one_operand (X)) && (MODE) == SFmode))\
+ ? R0_REGS \
+ : CLASS == FPUL_REGS && immediate_operand ((X), (MODE)) \
+ ? (GET_CODE (X) == CONST_INT && CONST_OK_FOR_I (INTVAL (X)) \
+ ? GENERAL_REGS \
+ : R0_REGS) \
+ : (CLASS == FPSCR_REGS \
+ && ((GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER) \
+ || GET_CODE (X) == MEM && GET_CODE (XEXP ((X), 0)) == PLUS)) \
+ ? GENERAL_REGS \
+ : SECONDARY_OUTPUT_RELOAD_CLASS((CLASS),(MODE),(X)))
+
+/* Return the maximum number of consecutive registers
+ needed to represent mode MODE in a register of class CLASS.
+
+ On SH this is the size of MODE in words. */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* If defined, gives a class of registers that cannot be used as the
+ operand of a SUBREG that changes the size of the object. */
+
+#define CLASS_CANNOT_CHANGE_SIZE DF_REGS
+
+/* Stack layout; function entry, exit and calling. */
+
+/* Define the number of registers that can hold parameters.
+ These macros are used only in other macro definitions below. */
+
+#define NPARM_REGS(MODE) \
+ (TARGET_SH3E && (MODE) == SFmode \
+ ? 8 \
+ : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+ || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+ ? 8 \
+ : 4)
+
+#define FIRST_PARM_REG 4
+#define FIRST_RET_REG 0
+
+#define FIRST_FP_PARM_REG (FIRST_FP_REG + 4)
+#define FIRST_FP_RET_REG FIRST_FP_REG
+
+/* Define this if pushing a word on the stack
+ makes the stack pointer a smaller address. */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this macro if the addresses of local variable slots are at
+ negative offsets from the frame pointer.
+
+ The SH only has positive indexes, so grow the frame up. */
+/* #define FRAME_GROWS_DOWNWARD */
+
+/* Offset from the frame pointer to the first local variable slot to
+ be allocated. */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+ this says how many the stack pointer really advances by. */
+/* Don't define PUSH_ROUNDING, since the hardware doesn't do this.
+ When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to
+ do correct alignment. */
+#if 0
+#define PUSH_ROUNDING(NPUSHED) (((NPUSHED) + 3) & ~3)
+#endif
+
+/* Offset of first parameter from the argument pointer register value. */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Value is the number of byte of arguments automatically
+ popped when returning from a subroutine call.
+ FUNDECL is the declaration node of the function (as a tree),
+ FUNTYPE is the data type of the function (as a tree),
+ or for a library call it is an identifier node for the subroutine name.
+ SIZE is the number of bytes of arguments passed on the stack.
+
+ On the SH, the caller does not pop any of its arguments that were passed
+ on the stack. */
+#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) 0
+
+/* Nonzero if we do not know how to pass TYPE solely in registers.
+ Values that come in registers with inconvenient padding are stored
+ to memory at the function start. */
+
+#define MUST_PASS_IN_STACK(MODE,TYPE) \
+ ((TYPE) != 0 \
+ && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST \
+ || TREE_ADDRESSABLE (TYPE)))
+/* Some subroutine macros specific to this machine. */
+
+#define BASE_RETURN_VALUE_REG(MODE) \
+ ((TARGET_SH3E && ((MODE) == SFmode)) \
+ ? FIRST_FP_RET_REG \
+ : TARGET_SH3E && (MODE) == SCmode \
+ ? FIRST_FP_RET_REG \
+ : (TARGET_SH4 \
+ && ((MODE) == DFmode || (MODE) == SFmode \
+ || (MODE) == DCmode || (MODE) == SCmode )) \
+ ? FIRST_FP_RET_REG \
+ : FIRST_RET_REG)
+
+#define BASE_ARG_REG(MODE) \
+ ((TARGET_SH3E && ((MODE) == SFmode)) \
+ ? FIRST_FP_PARM_REG \
+ : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+ || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\
+ ? FIRST_FP_PARM_REG \
+ : FIRST_PARM_REG)
+
+/* Define how to find the value returned by a function.
+ VALTYPE is the data type of the value (as a tree).
+ If the precise function being called is known, FUNC is its FUNCTION_DECL;
+ otherwise, FUNC is 0.
+ For the SH, this is like LIBCALL_VALUE, except that we must change the
+ mode like PROMOTE_MODE does.
+ ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
+ tested here has to be kept in sync with the one in explow.c:promote_mode. */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+ gen_rtx (REG, \
+ ((GET_MODE_CLASS (TYPE_MODE (VALTYPE)) == MODE_INT \
+ && GET_MODE_SIZE (TYPE_MODE (VALTYPE)) < UNITS_PER_WORD \
+ && (TREE_CODE (VALTYPE) == INTEGER_TYPE \
+ || TREE_CODE (VALTYPE) == ENUMERAL_TYPE \
+ || TREE_CODE (VALTYPE) == BOOLEAN_TYPE \
+ || TREE_CODE (VALTYPE) == CHAR_TYPE \
+ || TREE_CODE (VALTYPE) == REAL_TYPE \
+ || TREE_CODE (VALTYPE) == OFFSET_TYPE)) \
+ ? SImode : TYPE_MODE (VALTYPE)), \
+ BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
+
+/* Define how to find the value returned by a library function
+ assuming the value has mode MODE. */
+#define LIBCALL_VALUE(MODE) \
+ gen_rtx (REG, (MODE), BASE_RETURN_VALUE_REG (MODE))
+
+/* 1 if N is a possible register number for a function value. */
+#define FUNCTION_VALUE_REGNO_P(REGNO) \
+ ((REGNO) == FIRST_RET_REG || (TARGET_SH3E && (REGNO) == FIRST_FP_RET_REG))
+
+/* 1 if N is a possible register number for function argument passing. */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+ (((REGNO) >= FIRST_PARM_REG && (REGNO) < (FIRST_PARM_REG + 4)) \
+ || (TARGET_SH3E \
+ && (REGNO) >= FIRST_FP_PARM_REG && (REGNO) < (FIRST_FP_PARM_REG + 8)))
+
+/* Define a data type for recording info about an argument list
+ during the scan of that argument list. This data type should
+ hold all necessary information about the function itself
+ and about the args processed so far, enough to enable macros
+ such as FUNCTION_ARG to determine where the next arg should go.
+
+ On SH, this is a single integer, which is a number of words
+ of arguments scanned so far (including the invisible argument,
+ if any, which holds the structure-value-address).
+ Thus NARGREGS or more means all following args should go on the stack. */
+
+enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 };
+struct sh_args {
+ int arg_count[2];
+};
+
+#define CUMULATIVE_ARGS struct sh_args
+
+#define GET_SH_ARG_CLASS(MODE) \
+ ((TARGET_SH3E && (MODE) == SFmode) \
+ ? SH_ARG_FLOAT \
+ : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+ || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+ ? SH_ARG_FLOAT : SH_ARG_INT)
+
+#define ROUND_ADVANCE(SIZE) \
+ (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode
+ MODE.
+
+ The SH doesn't care about double alignment, so we only
+ round doubles to even regs when asked to explicitly. */
+
+#define ROUND_REG(CUM, MODE) \
+ (((TARGET_ALIGN_DOUBLE \
+ || (TARGET_SH4 && ((MODE) == DFmode || (MODE) == DCmode) \
+ && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))\
+ && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD) \
+ ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \
+ + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1)) \
+ : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)])
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0.
+
+ On SH, the offset always starts at 0: the first parm reg is always
+ the same reg for a given argument class. */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT) \
+ do { \
+ (CUM).arg_count[(int) SH_ARG_INT] = 0; \
+ (CUM).arg_count[(int) SH_ARG_FLOAT] = 0; \
+ } while (0)
+
+/* Update the data in CUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be
+ available.) */
+
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
+ if (! TARGET_SH4 || PASS_IN_REG_P ((CUM), (MODE), (TYPE))) \
+ ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \
+ = (ROUND_REG ((CUM), (MODE)) \
+ + ((MODE) == BLKmode \
+ ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \
+ : ROUND_ADVANCE (GET_MODE_SIZE (MODE)))))
+
+/* Return boolean indicating arg of mode MODE will be passed in a reg.
+ This macro is only used in this file. */
+
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+ (((TYPE) == 0 || ! TREE_ADDRESSABLE ((tree)(TYPE))) \
+ && (TARGET_SH3E \
+ ? ((MODE) == BLKmode \
+ ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \
+ + int_size_in_bytes (TYPE)) \
+ <= NPARM_REGS (SImode) * UNITS_PER_WORD) \
+ : ((ROUND_REG((CUM), (MODE)) \
+ + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \
+ <= NPARM_REGS (MODE))) \
+ : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE)))
+
+/* Define where to put the arguments to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis).
+
+ On SH the first args are normally in registers
+ and the rest are pushed. Any arg that starts within the first
+ NPARM_REGS words is at least partially passed in a register unless
+ its data type forbids. */
+
+extern int current_function_varargs;
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+ ((PASS_IN_REG_P ((CUM), (MODE), (TYPE)) \
+ && ((NAMED) || TARGET_SH3E || ! current_function_varargs)) \
+ ? gen_rtx (REG, (MODE), \
+ ((BASE_ARG_REG (MODE) + ROUND_REG ((CUM), (MODE))) \
+ ^ ((MODE) == SFmode && TARGET_SH4 \
+ && TARGET_LITTLE_ENDIAN != 0))) \
+ : 0)
+
+/* For an arg passed partly in registers and partly in memory,
+ this is the number of registers used.
+ For args passed entirely in registers or entirely in memory, zero.
+
+ We sometimes split args. */
+
+#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
+ ((PASS_IN_REG_P ((CUM), (MODE), (TYPE)) \
+ && ! TARGET_SH4 \
+ && (ROUND_REG ((CUM), (MODE)) \
+ + ((MODE) != BLKmode \
+ ? ROUND_ADVANCE (GET_MODE_SIZE (MODE)) \
+ : ROUND_ADVANCE (int_size_in_bytes (TYPE))) \
+ - NPARM_REGS (MODE) > 0)) \
+ ? NPARM_REGS (MODE) - ROUND_REG ((CUM), (MODE)) \
+ : 0)
+
+extern int current_function_anonymous_args;
+
+/* Perform any needed actions needed for a function that is receiving a
+ variable number of arguments. */
+
+#define SETUP_INCOMING_VARARGS(ASF, MODE, TYPE, PAS, ST) \
+ current_function_anonymous_args = 1;
+
+/* Call the function profiler with a given profile label.
+ We use two .aligns, so as to make sure that both the .long is aligned
+ on a 4 byte boundary, and that the .long is a fixed distance (2 bytes)
+ from the trapa instruction. */
+
+#define FUNCTION_PROFILER(STREAM,LABELNO) \
+{ \
+ fprintf((STREAM), "\t.align\t2\n"); \
+ fprintf((STREAM), "\ttrapa\t#33\n"); \
+ fprintf((STREAM), "\t.align\t2\n"); \
+ asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO)); \
+}
+
+/* Define this macro if the code for function profiling should come
+ before the function prologue. Normally, the profiling code comes
+ after. */
+
+#define PROFILE_BEFORE_PROLOGUE
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+ the stack pointer does not matter. The value is tested only in
+ functions that have frame pointers.
+ No definition is equivalent to always zero. */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Generate the assembly code for function exit
+ Just dump out any accumulated constant table. */
+
+#define FUNCTION_EPILOGUE(STREAM, SIZE) function_epilogue ((STREAM), (SIZE))
+
+/*
+ On the SH, the trampoline looks like
+ 2 0002 DD02 mov.l l2,r13
+ 1 0000 D301 mov.l l1,r3
+ 3 0004 4D2B jmp @r13
+ 4 0006 0009 nop
+ 5 0008 00000000 l1: .long function
+ 6 000c 00000000 l2: .long area */
+
+/* Length in units of the trampoline for entering a nested function. */
+#define TRAMPOLINE_SIZE 16
+
+/* Alignment required for a trampoline in bits . */
+#define TRAMPOLINE_ALIGNMENT \
+ ((CACHE_LOG < 3 || TARGET_SMALLCODE && ! TARGET_HARVARD) ? 32 : 64)
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+
+#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
+{ \
+ emit_move_insn (gen_rtx (MEM, SImode, (TRAMP)), \
+ GEN_INT (TARGET_LITTLE_ENDIAN ? 0xd301dd02 : 0xdd02d301));\
+ emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 4)), \
+ GEN_INT (TARGET_LITTLE_ENDIAN ? 0x00094d2b : 0x4d2b0009));\
+ emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 8)), \
+ (CXT)); \
+ emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 12)), \
+ (FNADDR)); \
+ if (TARGET_HARVARD) \
+ emit_insn (gen_ic_invalidate_line (TRAMP)); \
+}
+
+/* A C expression whose value is RTL representing the value of the return
+ address for the frame COUNT steps up from the current frame.
+ FRAMEADDR is already the frame pointer of the COUNT frame, so we
+ can ignore COUNT. */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+ (((COUNT) == 0) \
+ ? gen_rtx (MEM, Pmode, gen_rtx (REG, Pmode, RETURN_ADDRESS_POINTER_REGNUM)) \
+ : (rtx) 0)
+
+/* Generate necessary RTL for __builtin_saveregs().
+ ARGLIST is the argument list; see expr.c. */
+extern struct rtx_def *sh_builtin_saveregs ();
+#define EXPAND_BUILTIN_SAVEREGS(ARGLIST) sh_builtin_saveregs (ARGLIST)
+
+/* Addressing modes, and classification of registers for them. */
+#define HAVE_POST_INCREMENT 1
+/*#define HAVE_PRE_INCREMENT 1*/
+/*#define HAVE_POST_DECREMENT 1*/
+#define HAVE_PRE_DECREMENT 1
+
+#define USE_LOAD_POST_INCREMENT(mode) ((mode == SImode || mode == DImode) \
+ ? 0 : 1)
+#define USE_LOAD_PRE_DECREMENT(mode) 0
+#define USE_STORE_POST_INCREMENT(mode) 0
+#define USE_STORE_PRE_DECREMENT(mode) ((mode == SImode || mode == DImode) \
+ ? 0 : 1)
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) (move_by_pieces_ninsns (SIZE, ALIGN) \
+ < (TARGET_SMALLCODE ? 2 : \
+ ((ALIGN >= 4) ? 16 : 2)))
+
+/* Macros to check register numbers against specific register classes. */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+ They give nonzero only if REGNO is a hard reg of the suitable class
+ or a pseudo reg currently allocated to a suitable hard reg.
+ Since they use reg_renumber, they are safe only once reg_renumber
+ has been allocated, which happens in local-alloc.c. */
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+ ((REGNO) < PR_REG || (unsigned) reg_renumber[(REGNO)] < PR_REG)
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+ ((REGNO) == 0 || (unsigned) reg_renumber[(REGNO)] == 0)
+
+/* Maximum number of registers that can appear in a valid memory
+ address. */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address. */
+
+#define CONSTANT_ADDRESS_P(X) (GET_CODE (X) == LABEL_REF)
+
+/* Nonzero if the constant value X is a legitimate general operand. */
+
+#define LEGITIMATE_CONSTANT_P(X) \
+ (GET_CODE (X) != CONST_DOUBLE \
+ || GET_MODE (X) == DFmode || GET_MODE (X) == SFmode \
+ || (TARGET_SH3E && (fp_zero_operand (X) || fp_one_operand (X))))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+ and check its validity for a certain class.
+ We have two alternate definitions for each of them.
+ The usual definition accepts all pseudo regs; the other rejects
+ them unless they have been allocated suitable hard regs.
+ The symbol REG_OK_STRICT causes the latter definition to be used. */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+ or if it is a pseudo reg. */
+#define REG_OK_FOR_BASE_P(X) \
+ (REGNO (X) <= 16 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X is a hard reg that can be used as an index
+ or if it is a pseudo reg. */
+#define REG_OK_FOR_INDEX_P(X) \
+ (REGNO (X) == 0 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X/OFFSET is a hard reg that can be used as an index
+ or if X is a pseudo reg. */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET) \
+ ((REGNO (X) == 0 && OFFSET == 0) || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as a base reg. */
+#define REG_OK_FOR_BASE_P(X) \
+ REGNO_OK_FOR_BASE_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as an index. */
+#define REG_OK_FOR_INDEX_P(X) \
+ REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+/* Nonzero if X/OFFSET is a hard reg that can be used as an index. */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET) \
+ (REGNO_OK_FOR_INDEX_P (REGNO (X)) && (OFFSET) == 0)
+
+#endif
+
+/* The 'Q' constraint is a pc relative load operand. */
+#define EXTRA_CONSTRAINT_Q(OP) \
+ (GET_CODE (OP) == MEM && \
+ ((GET_CODE (XEXP ((OP), 0)) == LABEL_REF) \
+ || (GET_CODE (XEXP ((OP), 0)) == CONST \
+ && GET_CODE (XEXP (XEXP ((OP), 0), 0)) == PLUS \
+ && GET_CODE (XEXP (XEXP (XEXP ((OP), 0), 0), 0)) == LABEL_REF \
+ && GET_CODE (XEXP (XEXP (XEXP ((OP), 0), 0), 1)) == CONST_INT)))
+
+#define EXTRA_CONSTRAINT(OP, C) \
+ ((C) == 'Q' ? EXTRA_CONSTRAINT_Q (OP) \
+ : 0)
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+ that is a valid memory address for an instruction.
+ The MODE argument is the machine mode for the MEM expression
+ that wants to use this address.
+
+ The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS. */
+
+#define MODE_DISP_OK_4(X,MODE) \
+(GET_MODE_SIZE (MODE) == 4 && (unsigned) INTVAL (X) < 64 \
+ && ! (INTVAL (X) & 3) && ! (TARGET_SH3E && (MODE) == SFmode))
+
+#define MODE_DISP_OK_8(X,MODE) \
+((GET_MODE_SIZE(MODE)==8) && ((unsigned)INTVAL(X)<60) \
+ && ! (INTVAL(X) & 3) && ! (TARGET_SH4 && (MODE) == DFmode))
+
+#define BASE_REGISTER_RTX_P(X) \
+ ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \
+ || (GET_CODE (X) == SUBREG \
+ && GET_CODE (SUBREG_REG (X)) == REG \
+ && REG_OK_FOR_BASE_P (SUBREG_REG (X))))
+
+/* Since this must be r0, which is a single register class, we must check
+ SUBREGs more carefully, to be sure that we don't accept one that extends
+ outside the class. */
+#define INDEX_REGISTER_RTX_P(X) \
+ ((GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X)) \
+ || (GET_CODE (X) == SUBREG \
+ && GET_CODE (SUBREG_REG (X)) == REG \
+ && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_WORD (X))))
+
+/* Jump to LABEL if X is a valid address RTX. This must also take
+ REG_OK_STRICT into account when deciding about valid registers, but it uses
+ the above macros so we are in luck.
+
+ Allow REG
+ REG+disp
+ REG+r0
+ REG++
+ --REG */
+
+/* ??? The SH3e does not have the REG+disp addressing mode when loading values
+ into the FRx registers. We implement this by setting the maximum offset
+ to zero when the value is SFmode. This also restricts loading of SFmode
+ values into the integer registers, but that can't be helped. */
+
+/* The SH allows a displacement in a QI or HI amode, but only when the
+ other operand is R0. GCC doesn't handle this very well, so we forgo
+ all of that.
+
+ A legitimate index for a QI or HI is 0, SI can be any number 0..63,
+ DI can be any number 0..60. */
+
+#define GO_IF_LEGITIMATE_INDEX(MODE, OP, LABEL) \
+ do { \
+ if (GET_CODE (OP) == CONST_INT) \
+ { \
+ if (MODE_DISP_OK_4 ((OP), (MODE))) goto LABEL; \
+ if (MODE_DISP_OK_8 ((OP), (MODE))) goto LABEL; \
+ } \
+ } while(0)
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \
+{ \
+ if (BASE_REGISTER_RTX_P (X)) \
+ goto LABEL; \
+ else if ((GET_CODE (X) == POST_INC || GET_CODE (X) == PRE_DEC) \
+ && BASE_REGISTER_RTX_P (XEXP ((X), 0))) \
+ goto LABEL; \
+ else if (GET_CODE (X) == PLUS \
+ && ((MODE) != PSImode || reload_completed)) \
+ { \
+ rtx xop0 = XEXP ((X), 0); \
+ rtx xop1 = XEXP ((X), 1); \
+ if (GET_MODE_SIZE (MODE) <= 8 && BASE_REGISTER_RTX_P (xop0)) \
+ GO_IF_LEGITIMATE_INDEX ((MODE), xop1, LABEL); \
+ if (GET_MODE_SIZE (MODE) <= 4 \
+ || TARGET_SH4 && TARGET_FMOVD && MODE == DFmode) \
+ { \
+ if (BASE_REGISTER_RTX_P (xop1) && INDEX_REGISTER_RTX_P (xop0))\
+ goto LABEL; \
+ if (INDEX_REGISTER_RTX_P (xop1) && BASE_REGISTER_RTX_P (xop0))\
+ goto LABEL; \
+ } \
+ } \
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the SH, if X is almost suitable for indexing, but the offset is
+ out of range, convert it into a normal form so that cse has a chance
+ of reducing the number of address registers used. */
+
+#define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN) \
+{ \
+ if (GET_CODE (X) == PLUS \
+ && (GET_MODE_SIZE (MODE) == 4 \
+ || GET_MODE_SIZE (MODE) == 8) \
+ && GET_CODE (XEXP ((X), 1)) == CONST_INT \
+ && BASE_REGISTER_RTX_P (XEXP ((X), 0)) \
+ && ! (TARGET_SH4 && (MODE) == DFmode) \
+ && ! (TARGET_SH3E && (MODE) == SFmode)) \
+ { \
+ rtx index_rtx = XEXP ((X), 1); \
+ HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; \
+ rtx sum; \
+ \
+ GO_IF_LEGITIMATE_INDEX ((MODE), index_rtx, WIN); \
+ /* On rare occasions, we might get an unaligned pointer \
+ that is indexed in a way to give an aligned address. \
+ Therefore, keep the lower two bits in offset_base. */ \
+ /* Instead of offset_base 128..131 use 124..127, so that \
+ simple add suffices. */ \
+ if (offset > 127) \
+ { \
+ offset_base = ((offset + 4) & ~60) - 4; \
+ } \
+ else \
+ offset_base = offset & ~60; \
+ /* Sometimes the normal form does not suit DImode. We \
+ could avoid that by using smaller ranges, but that \
+ would give less optimized code when SImode is \
+ prevalent. */ \
+ if (GET_MODE_SIZE (MODE) + offset - offset_base <= 64) \
+ { \
+ sum = expand_binop (Pmode, add_optab, XEXP ((X), 0), \
+ GEN_INT (offset_base), NULL_RTX, 0, \
+ OPTAB_LIB_WIDEN); \
+ \
+ (X) = gen_rtx (PLUS, Pmode, sum, GEN_INT (offset - offset_base)); \
+ goto WIN; \
+ } \
+ } \
+}
+
+/* A C compound statement that attempts to replace X, which is an address
+ that needs reloading, with a valid memory address for an operand of
+ mode MODE. WIN is a C statement label elsewhere in the code.
+
+ Like for LEGITIMIZE_ADDRESS, for the SH we try to get a normal form
+ of the address. That will allow inheritance of the address reloads. */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \
+{ \
+ if (GET_CODE (X) == PLUS \
+ && (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8) \
+ && GET_CODE (XEXP (X, 1)) == CONST_INT \
+ && BASE_REGISTER_RTX_P (XEXP (X, 0)) \
+ && ! (TARGET_SH4 && (MODE) == DFmode) \
+ && ! ((MODE) == PSImode && (TYPE) == RELOAD_FOR_INPUT_ADDRESS)) \
+ { \
+ rtx index_rtx = XEXP (X, 1); \
+ HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; \
+ rtx sum; \
+ \
+ if (TARGET_SH3E && MODE == SFmode) \
+ { \
+ X = copy_rtx (X); \
+ push_reload (index_rtx, NULL_RTX, &XEXP (X, 1), NULL_PTR, \
+ INDEX_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), \
+ (TYPE)); \
+ goto WIN; \
+ } \
+ /* Instead of offset_base 128..131 use 124..127, so that \
+ simple add suffices. */ \
+ if (offset > 127) \
+ { \
+ offset_base = ((offset + 4) & ~60) - 4; \
+ } \
+ else \
+ offset_base = offset & ~60; \
+ /* Sometimes the normal form does not suit DImode. We \
+ could avoid that by using smaller ranges, but that \
+ would give less optimized code when SImode is \
+ prevalent. */ \
+ if (GET_MODE_SIZE (MODE) + offset - offset_base <= 64) \
+ { \
+ sum = gen_rtx (PLUS, Pmode, XEXP (X, 0), \
+ GEN_INT (offset_base)); \
+ X = gen_rtx (PLUS, Pmode, sum, GEN_INT (offset - offset_base));\
+ push_reload (sum, NULL_RTX, &XEXP (X, 0), NULL_PTR, \
+ BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), \
+ (TYPE)); \
+ goto WIN; \
+ } \
+ } \
+ /* We must re-recognize what we created before. */ \
+ else if (GET_CODE (X) == PLUS \
+ && (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8) \
+ && GET_CODE (XEXP (X, 0)) == PLUS \
+ && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
+ && BASE_REGISTER_RTX_P (XEXP (XEXP (X, 0), 0)) \
+ && GET_CODE (XEXP (X, 1)) == CONST_INT \
+ && ! (TARGET_SH3E && MODE == SFmode)) \
+ { \
+ /* Because this address is so complex, we know it must have \
+ been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, \
+ it is already unshared, and needs no further unsharing. */ \
+ push_reload (XEXP ((X), 0), NULL_RTX, &XEXP ((X), 0), NULL_PTR, \
+ BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), (TYPE));\
+ goto WIN; \
+ } \
+}
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+ has an effect that depends on the machine mode it is used for.
+
+ ??? Strictly speaking, we should also include all indexed addressing,
+ because the index scale factor is the length of the operand.
+ However, the impact of GO_IF_MODE_DEPENDENT_ADDRESS would be to
+ high if we did that. So we rely on reload to fix things up. */
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL) \
+{ \
+ if (GET_CODE(ADDR) == PRE_DEC || GET_CODE(ADDR) == POST_INC) \
+ goto LABEL; \
+}
+
+/* Specify the machine mode that this machine uses
+ for the index in the tablejump instruction. */
+#define CASE_VECTOR_MODE (TARGET_BIGTABLE ? SImode : HImode)
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \
+ : SImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+ instruction expects the table to contain offsets from the address of the
+ table.
+ Do not define this if the table should contain absolute addresses. */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Specify the tree operation to be used to convert reals to integers. */
+#define IMPLICIT_FIX_EXPR FIX_ROUND_EXPR
+
+/* This is the kind of divide that is easiest to do in the general case. */
+#define EASY_DIV_EXPR TRUNC_DIV_EXPR
+
+/* Since the SH3e has only `float' support, it is desirable to make all
+ floating point types equivalent to `float'. */
+#define DOUBLE_TYPE_SIZE ((TARGET_SH3E && ! TARGET_SH4) ? 32 : 64)
+
+/* 'char' is signed by default. */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* The type of size_t unsigned int. */
+#define SIZE_TYPE "unsigned int"
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Don't cse the address of the function being compiled. */
+/*#define NO_RECURSIVE_FUNCTION_CSE 1*/
+
+/* Max number of bytes we can move from memory to memory
+ in one reasonably fast instruction. */
+#define MOVE_MAX 4
+
+/* Max number of bytes we want move_by_pieces to be able to copy
+ efficiently. */
+#define MOVE_MAX_PIECES (TARGET_SH4 ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+ on the full register even if a narrower mode is specified. */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+ will either zero-extend or sign-extend. The value of this macro should
+ be the code that says which one of the two operations is implicitly
+ done, NIL if none. */
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+/* Define if loading short immediate values into registers sign extends. */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Define this if zero-extension is slow (more than one real instruction).
+ On the SH, it's only one instruction. */
+/* #define SLOW_ZERO_EXTEND */
+
+/* Nonzero if access to memory by bytes is slow and undesirable. */
+#define SLOW_BYTE_ACCESS 0
+
+/* We assume that the store-condition-codes instructions store 0 for false
+ and some other value for true. This is the value stored for true. */
+
+#define STORE_FLAG_VALUE 1
+
+/* Immediate shift counts are truncated by the output routines (or was it
+ the assembler?). Shift counts in a register are truncated by SH. Note
+ that the native compiler puts too large (> 32) immediate shift counts
+ into a register and shifts by the register, letting the SH decide what
+ to do instead of doing that itself. */
+/* ??? The library routines in lib1funcs.asm truncate the shift count.
+ However, the SH3 has hardware shifts that do not truncate exactly as gcc
+ expects - the sign bit is significant - so it appears that we need to
+ leave this zero for correct SH3 code. */
+#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3)
+
+/* All integers have the same format so truncation is easy. */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) 1
+
+/* Define this if addresses of constant functions
+ shouldn't be put through pseudo regs where they can be cse'd.
+ Desirable on machines where ordinary constants are expensive
+ but a CALL with constant address is cheap. */
+/*#define NO_FUNCTION_CSE 1*/
+
+/* Chars and shorts should be passed as ints. */
+#define PROMOTE_PROTOTYPES 1
+
+/* The machine modes of pointers and functions. */
+#define Pmode SImode
+#define FUNCTION_MODE Pmode
+
+/* The relative costs of various types of constants. Note that cse.c defines
+ REG = 1, SUBREG = 2, any node = (2 + sum of subnodes). */
+
+#define CONST_COSTS(RTX, CODE, OUTER_CODE) \
+ case CONST_INT: \
+ if (INTVAL (RTX) == 0) \
+ return 0; \
+ else if (CONST_OK_FOR_I (INTVAL (RTX))) \
+ return 1; \
+ else if (((OUTER_CODE) == AND || (OUTER_CODE) == IOR || (OUTER_CODE) == XOR) \
+ && CONST_OK_FOR_L (INTVAL (RTX))) \
+ return 1; \
+ else \
+ return 8; \
+ case CONST: \
+ case LABEL_REF: \
+ case SYMBOL_REF: \
+ return 5; \
+ case CONST_DOUBLE: \
+ return 10;
+
+#define RTX_COSTS(X, CODE, OUTER_CODE) \
+ case PLUS: \
+ return (COSTS_N_INSNS (1) \
+ + rtx_cost (XEXP ((X), 0), PLUS) \
+ + (rtx_equal_p (XEXP ((X), 0), XEXP ((X), 1))\
+ ? 0 : rtx_cost (XEXP ((X), 1), PLUS)));\
+ case AND: \
+ return COSTS_N_INSNS (andcosts (X)); \
+ case MULT: \
+ return COSTS_N_INSNS (multcosts (X)); \
+ case ASHIFT: \
+ case ASHIFTRT: \
+ case LSHIFTRT: \
+ /* Add one extra unit for the matching constraint. \
+ Otherwise loop strength reduction would think that\
+ a shift with different sourc and destination is \
+ as cheap as adding a constant to a register. */ \
+ return (COSTS_N_INSNS (shiftcosts (X)) \
+ + rtx_cost (XEXP ((X), 0), (CODE)) \
+ + 1); \
+ case DIV: \
+ case UDIV: \
+ case MOD: \
+ case UMOD: \
+ return COSTS_N_INSNS (20); \
+ case FLOAT: \
+ case FIX: \
+ return 100;
+
+/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2
+ are actually function calls with some special constraints on arguments
+ and register usage.
+
+ These macros tell reorg that the references to arguments and
+ register clobbers for insns of type sfunc do not appear to happen
+ until after the millicode call. This allows reorg to put insns
+ which set the argument registers into the delay slot of the millicode
+ call -- thus they act more like traditional CALL_INSNs.
+
+ get_attr_is_sfunc will try to recognize the given insn, so make sure to
+ filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+ in particular. */
+
+#define INSN_SETS_ARE_DELAYED(X) \
+ ((GET_CODE (X) == INSN \
+ && GET_CODE (PATTERN (X)) != SEQUENCE \
+ && GET_CODE (PATTERN (X)) != USE \
+ && GET_CODE (PATTERN (X)) != CLOBBER \
+ && get_attr_is_sfunc (X)))
+
+#define INSN_REFERENCES_ARE_DELAYED(X) \
+ ((GET_CODE (X) == INSN \
+ && GET_CODE (PATTERN (X)) != SEQUENCE \
+ && GET_CODE (PATTERN (X)) != USE \
+ && GET_CODE (PATTERN (X)) != CLOBBER \
+ && get_attr_is_sfunc (X)))
+
+/* Compute the cost of an address. For the SH, all valid addresses are
+ the same cost. */
+/* ??? Perhaps we should make reg+reg addresses have higher cost because
+ they add to register pressure on r0. */
+
+#define ADDRESS_COST(RTX) 1
+
+/* Compute extra cost of moving data between one register class
+ and another. */
+
+/* Regclass always uses 2 for moves in the same register class;
+ If SECONDARY*_RELOAD_CLASS says something about the src/dst pair,
+ it uses this information. Hence, the general register <-> floating point
+ register information here is not used for SFmode. */
+#define REGISTER_MOVE_COST(SRCCLASS, DSTCLASS) \
+ ((((DSTCLASS) == T_REGS) || ((DSTCLASS) == PR_REG)) ? 10 \
+ : ((((DSTCLASS) == FP0_REGS || (DSTCLASS) == FP_REGS || (DSTCLASS) == DF_REGS) \
+ && ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS)) \
+ || (((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS) \
+ && ((SRCCLASS) == FP0_REGS || (SRCCLASS) == FP_REGS \
+ || (SRCCLASS) == DF_REGS))) \
+ ? TARGET_FMOVD ? 8 : 12 \
+ : (((DSTCLASS) == FPUL_REGS \
+ && ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS)) \
+ || (SRCCLASS == FPUL_REGS \
+ && ((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS))) \
+ ? 5 \
+ : (((DSTCLASS) == FPUL_REGS \
+ && ((SRCCLASS) == PR_REGS || (SRCCLASS) == MAC_REGS)) \
+ || ((SRCCLASS) == FPUL_REGS \
+ && ((DSTCLASS) == PR_REGS || (DSTCLASS) == MAC_REGS))) \
+ ? 7 \
+ : 2)
+
+/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option? This
+ would be so that people would slow memory systems could generate
+ different code that does fewer memory accesses. */
+
+/* Assembler output control. */
+
+/* A C string constant describing how to begin a comment in the target
+ assembler language. The compiler assumes that the comment will end at
+ the end of the line. */
+#define ASM_COMMENT_START "!"
+
+/* The text to go at the start of the assembler file. */
+#define ASM_FILE_START(STREAM) \
+ output_file_start (STREAM)
+
+#define ASM_FILE_END(STREAM)
+
+#define ASM_APP_ON ""
+#define ASM_APP_OFF ""
+#define FILE_ASM_OP "\t.file\n"
+#define IDENT_ASM_OP "\t.ident\n"
+#define SET_ASM_OP ".set"
+
+/* How to change between sections. */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define CTORS_SECTION_ASM_OP "\t.section\t.ctors\n"
+#define DTORS_SECTION_ASM_OP "\t.section\t.dtors\n"
+#define EXTRA_SECTIONS in_ctors, in_dtors
+#define EXTRA_SECTION_FUNCTIONS \
+void \
+ctors_section() \
+{ \
+ if (in_section != in_ctors) \
+ { \
+ fprintf (asm_out_file, "%s\n", CTORS_SECTION_ASM_OP); \
+ in_section = in_ctors; \
+ } \
+} \
+void \
+dtors_section() \
+{ \
+ if (in_section != in_dtors) \
+ { \
+ fprintf (asm_out_file, "%s\n", DTORS_SECTION_ASM_OP); \
+ in_section = in_dtors; \
+ } \
+}
+
+/* If defined, a C expression whose value is a string containing the
+ assembler operation to identify the following data as
+ uninitialized global data. If not defined, and neither
+ `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+ uninitialized global data will be output in the data section if
+ `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+ used. */
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP ".section\t.bss"
+#endif
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+ separate, explicit argument. If you define this macro, it is used
+ in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+ handling the required alignment of the variable. The alignment is
+ specified as the number of bits.
+
+ Try to use function `asm_output_aligned_bss' defined in file
+ `varasm.c' when defining this macro. */
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
+
+/* Define this so that jump tables go in same section as the current function,
+ which could be text or it could be a user defined section. */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* A C statement to output something to the assembler file to switch to section
+ NAME for object DECL which is either a FUNCTION_DECL, a VAR_DECL or
+ NULL_TREE. Some target formats do not support arbitrary sections. Do not
+ define this macro in such cases. */
+
+#define ASM_OUTPUT_SECTION_NAME(FILE, DECL, NAME, RELOC) \
+ do { fprintf (FILE, ".section\t%s\n", NAME); } while (0)
+
+#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME) \
+ do { ctors_section(); asm_fprintf((FILE),"\t.long\t%U%s\n", (NAME)); } while (0)
+
+#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME) \
+ do { dtors_section(); asm_fprintf((FILE),"\t.long\t%U%s\n", (NAME)); } while (0)
+
+#undef DO_GLOBAL_CTORS_BODY
+
+#define DO_GLOBAL_CTORS_BODY \
+{ \
+ typedef (*pfunc)(); \
+ extern pfunc __ctors[]; \
+ extern pfunc __ctors_end[]; \
+ pfunc *p; \
+ for (p = __ctors_end; p > __ctors; ) \
+ { \
+ (*--p)(); \
+ } \
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY \
+{ \
+ typedef (*pfunc)(); \
+ extern pfunc __dtors[]; \
+ extern pfunc __dtors_end[]; \
+ pfunc *p; \
+ for (p = __dtors; p < __dtors_end; p++) \
+ { \
+ (*p)(); \
+ } \
+}
+
+#define ASM_OUTPUT_REG_PUSH(file, v) \
+ fprintf ((file), "\tmov.l\tr%s,-@r15\n", (v));
+
+#define ASM_OUTPUT_REG_POP(file, v) \
+ fprintf ((file), "\tmov.l\t@r15+,r%s\n", (v));
+
+/* The assembler's names for the registers. RFP need not always be used as
+ the Real framepointer; it can also be used as a normal general register.
+ Note that the name `fp' is horribly misleading since `fp' is in fact only
+ the argument-and-return-context pointer. */
+
+extern char fp_reg_names[][5];
+
+#define REGISTER_NAMES \
+{ \
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
+ "ap", "pr", "t", "gbr", "mach","macl", fp_reg_names[16], "rap", \
+ fp_reg_names[0], fp_reg_names[1] , fp_reg_names[2], fp_reg_names[3], \
+ fp_reg_names[4], fp_reg_names[5], fp_reg_names[6], fp_reg_names[7], \
+ fp_reg_names[8], fp_reg_names[9], fp_reg_names[10], fp_reg_names[11], \
+ fp_reg_names[12], fp_reg_names[13], fp_reg_names[14], fp_reg_names[15], \
+ fp_reg_names[17], fp_reg_names[18], fp_reg_names[19], fp_reg_names[20], \
+ fp_reg_names[21], fp_reg_names[22], fp_reg_names[23], fp_reg_names[24], \
+ "fpscr", \
+}
+
+#define DEBUG_REGISTER_NAMES \
+{ \
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
+ "ap", "pr", "t", "gbr", "mach","macl", "fpul","rap", \
+ "fr0","fr1","fr2", "fr3", "fr4", "fr5", "fr6", "fr7", \
+ "fr8","fr9","fr10","fr11","fr12","fr13","fr14","fr15",\
+ "xd0","xd2","xd4", "xd6", "xd8", "xd10","xd12","xd14", \
+ "fpscr", \
+}
+
+/* DBX register number for a given compiler register number. */
+/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers
+ to match gdb. */
+#define DBX_REGISTER_NUMBER(REGNO) \
+ (((REGNO) >= 22 && (REGNO) <= 39) ? ((REGNO) + 1) : (REGNO))
+
+/* Output a label definition. */
+#define ASM_OUTPUT_LABEL(FILE,NAME) \
+ do { assemble_name ((FILE), (NAME)); fputs (":\n", (FILE)); } while (0)
+
+/* This is how to output an assembler line
+ that says to advance the location counter
+ to a multiple of 2**LOG bytes. */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG) != 0) \
+ fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* Output a function label definition. */
+#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \
+ ASM_OUTPUT_LABEL((STREAM), (NAME))
+
+/* Output a globalising directive for a label. */
+#define ASM_GLOBALIZE_LABEL(STREAM,NAME) \
+ (fprintf ((STREAM), "\t.global\t"), \
+ assemble_name ((STREAM), (NAME)), \
+ fputc ('\n', (STREAM)))
+
+/* The prefix to add to user-visible assembler symbols. */
+
+#define USER_LABEL_PREFIX "_"
+
+/* The prefix to add to an internally generated label. */
+
+#define LOCAL_LABEL_PREFIX ""
+
+/* Make an internal label into a string. */
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+ sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM))
+
+/* Output an internal label definition. */
+#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
+ asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM))
+
+/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE) */
+
+/* Construct a private name. */
+#define ASM_FORMAT_PRIVATE_NAME(OUTVAR,NAME,NUMBER) \
+ ((OUTVAR) = (char *) alloca (strlen (NAME) + 10), \
+ sprintf ((OUTVAR), "%s.%d", (NAME), (NUMBER)))
+
+/* Output a relative address table. */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL) \
+ switch (GET_MODE (BODY)) \
+ { \
+ case SImode: \
+ asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL)); \
+ break; \
+ case HImode: \
+ asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL)); \
+ break; \
+ case QImode: \
+ asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL)); \
+ break; \
+ }
+
+/* Output an absolute table element. */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE) \
+ if (TARGET_BIGTABLE) \
+ asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); \
+ else \
+ asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE)); \
+
+/* Output various types of constants. */
+
+/* This is how to output an assembler line defining a `double'. */
+
+#define ASM_OUTPUT_DOUBLE(FILE,VALUE) \
+do { char dstr[30]; \
+ REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", dstr); \
+ fprintf ((FILE), "\t.double %s\n", dstr); \
+ } while (0)
+
+/* This is how to output an assembler line defining a `float' constant. */
+#define ASM_OUTPUT_FLOAT(FILE,VALUE) \
+do { char dstr[30]; \
+ REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", dstr); \
+ fprintf ((FILE), "\t.float %s\n", dstr); \
+ } while (0)
+
+#define ASM_OUTPUT_INT(STREAM, EXP) \
+ (fprintf ((STREAM), "\t.long\t"), \
+ output_addr_const ((STREAM), (EXP)), \
+ fputc ('\n', (STREAM)))
+
+#define ASM_OUTPUT_SHORT(STREAM, EXP) \
+ (fprintf ((STREAM), "\t.short\t"), \
+ output_addr_const ((STREAM), (EXP)), \
+ fputc ('\n', (STREAM)))
+
+#define ASM_OUTPUT_CHAR(STREAM, EXP) \
+ (fprintf ((STREAM), "\t.byte\t"), \
+ output_addr_const ((STREAM), (EXP)), \
+ fputc ('\n', (STREAM)))
+
+#define ASM_OUTPUT_BYTE(STREAM, VALUE) \
+ fprintf ((STREAM), "\t.byte\t%d\n", (VALUE)) \
+
+/* The next two are used for debug info when compiling with -gdwarf. */
+#define UNALIGNED_SHORT_ASM_OP ".uaword"
+#define UNALIGNED_INT_ASM_OP ".ualong"
+
+/* Loop alignment is now done in machine_dependent_reorg, so that
+ branch shortening can know about it. */
+
+/* This is how to output an assembler line
+ that says to advance the location counter by SIZE bytes. */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE) \
+ fprintf ((FILE), "\t.space %d\n", (SIZE))
+
+/* This says how to output an assembler line
+ to define a global common symbol. */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \
+( fputs ("\t.comm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%d\n", (SIZE)))
+
+/* This says how to output an assembler line
+ to define a local common symbol. */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \
+( fputs ("\t.lcomm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%d\n", (SIZE)))
+
+/* The assembler's parentheses characters. */
+#define ASM_OPEN_PAREN "("
+#define ASM_CLOSE_PAREN ")"
+
+/* Target characters. */
+#define TARGET_BELL 007
+#define TARGET_BS 010
+#define TARGET_TAB 011
+#define TARGET_NEWLINE 012
+#define TARGET_VT 013
+#define TARGET_FF 014
+#define TARGET_CR 015
+
+/* A C statement to be executed just prior to the output of
+ assembler code for INSN, to modify the extracted operands so
+ they will be output differently.
+
+ Here the argument OPVEC is the vector containing the operands
+ extracted from INSN, and NOPERANDS is the number of elements of
+ the vector which contain meaningful data for this insn.
+ The contents of this vector are what will be used to convert the insn
+ template into assembler code, so you can change the assembler output
+ by changing the contents of the vector. */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+ final_prescan_insn ((INSN), (OPVEC), (NOPERANDS))
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+ CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+ For `%' followed by punctuation, CODE is the punctuation and X is null. */
+
+#define PRINT_OPERAND(STREAM, X, CODE) print_operand ((STREAM), (X), (CODE))
+
+/* Print a memory address as an operand to reference that memory location. */
+
+#define PRINT_OPERAND_ADDRESS(STREAM,X) print_operand_address ((STREAM), (X))
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+ ((CHAR) == '.' || (CHAR) == '#' || (CHAR) == '@' || (CHAR) == ',' \
+ || (CHAR) == '$')
+
+extern struct rtx_def *sh_compare_op0;
+extern struct rtx_def *sh_compare_op1;
+extern struct rtx_def *prepare_scc_operands();
+
+/* Which processor to schedule for. The elements of the enumeration must
+ match exactly the cpu attribute in the sh.md file. */
+
+enum processor_type {
+ PROCESSOR_SH1,
+ PROCESSOR_SH2,
+ PROCESSOR_SH3,
+ PROCESSOR_SH3E,
+ PROCESSOR_SH4
+};
+
+#define sh_cpu_attr ((enum attr_cpu)sh_cpu)
+extern enum processor_type sh_cpu;
+
+extern enum machine_mode sh_addr_diff_vec_mode;
+
+extern int optimize; /* needed for gen_casesi. */
+
+/* Declare functions defined in sh.c and used in templates. */
+
+extern char *output_branch();
+extern char *output_ieee_ccmpeq();
+extern char *output_branchy_insn();
+extern char *output_shift();
+extern char *output_movedouble();
+extern char *output_movepcrel();
+extern char *output_jump_label_table();
+extern char *output_far_jump();
+
+enum mdep_reorg_phase_e
+{
+ SH_BEFORE_MDEP_REORG,
+ SH_INSERT_USES_LABELS,
+ SH_SHORTEN_BRANCHES0,
+ SH_FIXUP_PCLOAD,
+ SH_SHORTEN_BRANCHES1,
+ SH_AFTER_MDEP_REORG
+};
+
+extern enum mdep_reorg_phase_e mdep_reorg_phase;
+
+void machine_dependent_reorg ();
+struct rtx_def *sfunc_uses_reg ();
+int barrier_align ();
+int sh_loop_align ();
+
+#define MACHINE_DEPENDENT_REORG(X) machine_dependent_reorg(X)
+
+/* Generate calls to memcpy, memcmp and memset. */
+
+#define TARGET_MEM_FUNCTIONS
+
+/* Define this macro if you want to implement any pragmas. If defined, it
+ is a C expression whose value is 1 if the pragma was handled by the
+ macro, zero otherwise. */
+#define HANDLE_PRAGMA(GETC, UNGETC, NODE) sh_handle_pragma (GETC, UNGETC, NODE)
+extern int sh_handle_pragma ();
+
+/* Set when processing a function with pragma interrupt turned on. */
+
+extern int pragma_interrupt;
+
+/* Set to an RTX containing the address of the stack to switch to
+ for interrupt functions. */
+extern struct rtx_def *sp_switch;
+
+/* A C expression whose value is nonzero if IDENTIFIER with arguments ARGS
+ is a valid machine specific attribute for DECL.
+ The attributes in ATTRIBUTES have previously been assigned to DECL. */
+extern int sh_valid_machine_decl_attribute ();
+#define VALID_MACHINE_DECL_ATTRIBUTE(DECL, ATTRIBUTES, IDENTIFIER, ARGS) \
+sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS)
+
+extern void sh_pragma_insert_attributes ();
+#define PRAGMA_INSERT_ATTRIBUTES(node, pattr, prefix_attr) \
+ sh_pragma_insert_attributes (node, pattr, prefix_attr)
+
+extern int sh_flag_remove_dead_before_cse;
+extern int rtx_equal_function_value_matters;
+extern struct rtx_def *fpscr_rtx;
+extern struct rtx_def *get_fpscr_rtx ();
+
+
+/* Instructions with unfilled delay slots take up an extra two bytes for
+ the nop in the delay slot. */
+
+#define ADJUST_INSN_LENGTH(X, LENGTH) \
+ if (((GET_CODE (X) == INSN \
+ && GET_CODE (PATTERN (X)) != USE \
+ && GET_CODE (PATTERN (X)) != CLOBBER) \
+ || GET_CODE (X) == CALL_INSN \
+ || (GET_CODE (X) == JUMP_INSN \
+ && GET_CODE (PATTERN (X)) != ADDR_DIFF_VEC \
+ && GET_CODE (PATTERN (X)) != ADDR_VEC)) \
+ && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (X)))) != SEQUENCE \
+ && get_attr_needs_delay_slot (X) == NEEDS_DELAY_SLOT_YES) \
+ (LENGTH) += 2;
+
+/* Define the codes that are matched by predicates in sh.c. */
+#define PREDICATE_CODES \
+ {"arith_operand", {SUBREG, REG, CONST_INT}}, \
+ {"arith_reg_operand", {SUBREG, REG}}, \
+ {"arith_reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \
+ {"binary_float_operator", {PLUS, MULT}}, \
+ {"commutative_float_operator", {PLUS, MULT}}, \
+ {"fp_arith_reg_operand", {SUBREG, REG}}, \
+ {"fp_extended_operand", {SUBREG, REG, FLOAT_EXTEND}}, \
+ {"fpscr_operand", {REG}}, \
+ {"general_movsrc_operand", {SUBREG, REG, CONST_INT, MEM}}, \
+ {"general_movdst_operand", {SUBREG, REG, CONST_INT, MEM}}, \
+ {"logical_operand", {SUBREG, REG, CONST_INT}}, \
+ {"noncommutative_float_operator", {MINUS, DIV}}, \
+ {"register_operand", {SUBREG, REG}},
+
+/* Define this macro if it is advisable to hold scalars in registers
+ in a wider mode than that declared by the program. In such cases,
+ the value is constrained to be within the bounds of the declared
+ type, but kept valid in the wider mode. The signedness of the
+ extension may differ from that of the type.
+
+ Leaving the unsignedp unchanged gives better code than always setting it
+ to 0. This is despite the fact that we have only signed char and short
+ load instructions. */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+ if (GET_MODE_CLASS (MODE) == MODE_INT \
+ && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+ (MODE) = SImode;
+
+/* Defining PROMOTE_FUNCTION_ARGS eliminates some unnecessary zero/sign
+ extensions applied to char/short functions arguments. Defining
+ PROMOTE_FUNCTION_RETURN does the same for function returns. */
+
+#define PROMOTE_FUNCTION_ARGS
+#define PROMOTE_FUNCTION_RETURN
+
+/* ??? Define ACCUMULATE_OUTGOING_ARGS? This is more efficient than pushing
+ and poping arguments. However, we do have push/pop instructions, and
+ rather limited offsets (4 bits) in load/store instructions, so it isn't
+ clear if this would give better code. If implemented, should check for
+ compatibility problems. */
+
+/* A C statement (sans semicolon) to update the integer variable COST
+ based on the relationship between INSN that is dependent on
+ DEP_INSN through the dependence LINK. The default is to make no
+ adjustment to COST. This can be used for example to specify to
+ the scheduler that an output- or anti-dependence does not incur
+ the same cost as a data-dependence. */
+
+#define ADJUST_COST(insn,link,dep_insn,cost) \
+do { \
+ rtx reg; \
+ \
+ if (GET_CODE(insn) == CALL_INSN) \
+ { \
+ /* The only input for a call that is timing-critical is the \
+ function's address. */ \
+ rtx call = PATTERN (insn); \
+ \
+ if (GET_CODE (call) == PARALLEL) \
+ call = XVECEXP (call, 0 ,0); \
+ if (GET_CODE (call) == SET) \
+ call = SET_SRC (call); \
+ if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM \
+ && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)) \
+ (cost) = 0; \
+ } \
+ /* All sfunc calls are parallels with at least four components. \
+ Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ \
+ else if (GET_CODE (PATTERN (insn)) == PARALLEL \
+ && XVECLEN (PATTERN (insn), 0) >= 4 \
+ && (reg = sfunc_uses_reg (insn))) \
+ { \
+ /* Likewise, the most timing critical input for an sfuncs call \
+ is the function address. However, sfuncs typically start \
+ using their arguments pretty quickly. \
+ Assume a four cycle delay before they are needed. */ \
+ if (! reg_set_p (reg, dep_insn)) \
+ cost -= TARGET_SUPERSCALAR ? 40 : 4; \
+ } \
+ /* Adjust load_si / pcload_si type insns latency. Use the known \
+ nominal latency and form of the insn to speed up the check. */ \
+ else if (cost == 3 \
+ && GET_CODE (PATTERN (dep_insn)) == SET \
+ /* Latency for dmpy type insns is also 3, so check the that \
+ it's actually a move insn. */ \
+ && general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))\
+ cost = 2; \
+ else if (cost == 30 \
+ && GET_CODE (PATTERN (dep_insn)) == SET \
+ && GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode) \
+ cost = 20; \
+} while (0) \
+
+/* For the sake of libgcc2.c, indicate target supports atexit. */
+#define HAVE_ATEXIT
+
+#define SH_DYNAMIC_SHIFT_COST \
+ (TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20)
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
new file mode 100755
index 0000000..7e417ab
--- /dev/null
+++ b/gcc/config/sh/sh.md
@@ -0,0 +1,4654 @@
+;; CYGNUS LOCAL SH4 Phase III: REG_LIBCALL / REG_RETVAL wrapping of
+;; MACH_REG / MACL_REG usage.
+;;- Machine description for the Hitachi SH.
+;; Copyright (C) 1993 - 1999 Free Software Foundation, Inc.
+;; Contributed by Steve Chamberlain (sac@cygnus.com).
+;; Improved by Jim Wilson (wilson@cygnus.com).
+
+;; This file is part of GNU CC.
+
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+
+;; ??? Should prepend a * to all pattern names which are not used.
+;; This will make the compiler smaller, and rebuilds after changes faster.
+
+;; ??? Should be enhanced to include support for many more GNU superoptimizer
+;; sequences. Especially the sequences for arithmetic right shifts.
+
+;; ??? Should check all DImode patterns for consistency and usefulness.
+
+;; ??? The MAC.W and MAC.L instructions are not supported. There is no
+;; way to generate them.
+
+;; ??? The cmp/str instruction is not supported. Perhaps it can be used
+;; for a str* inline function.
+
+;; BSR is not generated by the compiler proper, but when relaxing, it
+;; generates .uses pseudo-ops that allow linker relaxation to create
+;; BSR. This is actually implemented in bfd/{coff,elf32}-sh.c
+
+;; Special constraints for SH machine description:
+;;
+;; t -- T
+;; x -- mac
+;; l -- pr
+;; z -- r0
+;;
+;; Special formats used for outputting SH instructions:
+;;
+;; %. -- print a .s if insn needs delay slot
+;; %@ -- print rte/rts if is/isn't an interrupt function
+;; %# -- output a nop if there is nothing to put in the delay slot
+;; %O -- print a constant without the #
+;; %R -- print the lsw reg of a double
+;; %S -- print the msw reg of a double
+;; %T -- print next word of a double REG or MEM
+;;
+;; Special predicates:
+;;
+;; arith_operand -- operand is valid source for arithmetic op
+;; arith_reg_operand -- operand is valid register for arithmetic op
+;; general_movdst_operand -- operand is valid move destination
+;; general_movsrc_operand -- operand is valid move source
+;; logical_operand -- operand is valid source for logical op
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+;; Target CPU.
+
+(define_attr "cpu"
+ "sh1,sh2,sh3,sh3e,sh4"
+ (const (symbol_ref "sh_cpu_attr")))
+
+(define_attr "endian" "big,little"
+ (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
+ (const_string "little") (const_string "big"))))
+
+(define_attr "fmovd" "yes,no"
+ (const (if_then_else (symbol_ref "TARGET_FMOVD")
+ (const_string "yes") (const_string "no"))))
+;; issues/clock
+(define_attr "issues" "1,2"
+ (const (if_then_else (symbol_ref "TARGET_SUPERSCALAR") (const_string "2") (const_string "1"))))
+
+;; cbranch conditional branch instructions
+;; jump unconditional jumps
+;; arith ordinary arithmetic
+;; arith3 a compound insn that behaves similarly to a sequence of
+;; three insns of type arith
+;; arith3b like above, but might end with a redirected branch
+;; load from memory
+;; load_si Likewise, SImode variant for general register.
+;; store to memory
+;; move register to register
+;; fmove register to register, floating point
+;; smpy word precision integer multiply
+;; dmpy longword or doublelongword precision integer multiply
+;; return rts
+;; pload load of pr reg, which can't be put into delay slot of rts
+;; pstore store of pr reg, which can't be put into delay slot of jsr
+;; pcload pc relative load of constant value
+;; pcload_si Likewise, SImode variant for general register.
+;; rte return from exception
+;; sfunc special function call with known used registers
+;; call function call
+;; fp floating point
+;; fdiv floating point divide (or square root)
+;; gp_fpul move between general purpose register and fpul
+;; dfp_arith, dfp_cmp,dfp_conv
+;; dfdiv double precision floating point divide (or square root)
+;; nil no-op move, will be deleted.
+
+(define_attr "type"
+ "cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil"
+ (const_string "other"))
+
+; If a conditional branch destination is within -252..258 bytes away
+; from the instruction it can be 2 bytes long. Something in the
+; range -4090..4100 bytes can be 6 bytes long. All other conditional
+; branches are initially assumed to be 16 bytes long.
+; In machine_dependent_reorg, we split all branches that are longer than
+; 2 bytes.
+
+;; The maximum range used for SImode constant pool entrys is 1018. A final
+;; instruction can add 8 bytes while only being 4 bytes in size, thus we
+;; can have a total of 1022 bytes in the pool. Add 4 bytes for a branch
+;; instruction around the pool table, 2 bytes of alignment before the table,
+;; and 30 bytes of alignment after the table. That gives a maximum total
+;; pool size of 1058 bytes.
+;; Worst case code/pool content size ratio is 1:2 (using asms).
+;; Thus, in the worst case, there is one instruction in front of a maximum
+;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of
+;; code. For the last n bytes of code, there are 2n + 36 bytes of pool.
+;; If we have a forward branch, the initial table will be put after the
+;; unconditional branch.
+;;
+;; ??? We could do much better by keeping track of the actual pcloads within
+;; the branch range and in the pcload range in front of the branch range.
+
+;; ??? This looks ugly because genattrtab won't allow if_then_else or cond
+;; inside an le.
+(define_attr "short_cbranch_p" "no,yes"
+ (cond [(ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506))
+ (const_string "yes")
+ (ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) != insn") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508))
+ (const_string "yes")
+ ] (const_string "no")))
+
+(define_attr "med_branch_p" "no,yes"
+ (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990))
+ (const_int 1988))
+ (const_string "yes")
+ (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 4092))
+ (const_int 8186))
+ (const_string "yes")
+ ] (const_string "no")))
+
+(define_attr "med_cbranch_p" "no,yes"
+ (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988))
+ (const_int 1986))
+ (const_string "yes")
+ (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 4090))
+ (const_int 8184))
+ (const_string "yes")
+ ] (const_string "no")))
+
+(define_attr "braf_branch_p" "no,yes"
+ (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 10330))
+ (const_int 20660))
+ (const_string "yes")
+ (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 32764))
+ (const_int 65530))
+ (const_string "yes")
+ ] (const_string "no")))
+
+(define_attr "braf_cbranch_p" "no,yes"
+ (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 10328))
+ (const_int 20658))
+ (const_string "yes")
+ (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+ (const_string "no")
+ (leu (plus (minus (match_dup 0) (pc)) (const_int 32762))
+ (const_int 65528))
+ (const_string "yes")
+ ] (const_string "no")))
+
+; An unconditional jump in the range -4092..4098 can be 2 bytes long.
+; For wider ranges, we need a combination of a code and a data part.
+; If we can get a scratch register for a long range jump, the code
+; part can be 4 bytes long; otherwise, it must be 8 bytes long.
+; If the jump is in the range -32764..32770, the data part can be 2 bytes
+; long; otherwise, it must be 6 bytes long.
+
+; All other instructions are two bytes long by default.
+
+;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)),
+;; but getattrtab doesn't understand this.
+(define_attr "length" ""
+ (cond [(eq_attr "type" "cbranch")
+ (cond [(eq_attr "short_cbranch_p" "yes")
+ (const_int 2)
+ (eq_attr "med_cbranch_p" "yes")
+ (const_int 6)
+ (eq_attr "braf_cbranch_p" "yes")
+ (const_int 12)
+;; ??? using pc is not computed transitively.
+ (ne (match_dup 0) (match_dup 0))
+ (const_int 14)
+ ] (const_int 16))
+ (eq_attr "type" "jump")
+ (cond [(eq_attr "med_branch_p" "yes")
+ (const_int 2)
+ (and (eq (symbol_ref "GET_CODE (PREV_INSN (insn))")
+ (symbol_ref "INSN"))
+ (eq (symbol_ref "INSN_CODE (PREV_INSN (insn))")
+ (symbol_ref "code_for_indirect_jump_scratch")))
+ (if_then_else (eq_attr "braf_branch_p" "yes")
+ (const_int 6)
+ (const_int 10))
+ (eq_attr "braf_branch_p" "yes")
+ (const_int 10)
+;; ??? using pc is not computed transitively.
+ (ne (match_dup 0) (match_dup 0))
+ (const_int 12)
+ ] (const_int 14))
+ ] (const_int 2)))
+
+;; (define_function_unit {name} {num-units} {n-users} {test}
+;; {ready-delay} {issue-delay} [{conflict-list}])
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary. Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; Loads have a latency of two.
+;; However, call insns can have a delay slot, so that we want one more
+;; insn to be scheduled between the load of the function address and the call.
+;; This is equivalent to a latency of three.
+;; We cannot use a conflict list for this, because we need to distinguish
+;; between the actual call address and the function arguments.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+(define_function_unit "memory" 1 0
+ (and (eq_attr "issues" "1")
+ (eq_attr "type" "load_si,pcload_si"))
+ 3 2)
+(define_function_unit "memory" 1 0
+ (and (eq_attr "issues" "1")
+ (eq_attr "type" "load,pcload,pload,store,pstore"))
+ 2 2)
+
+(define_function_unit "int" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "arith3,arith3b")) 3 3)
+
+(define_function_unit "int" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "dyn_shift")) 2 2)
+
+(define_function_unit "int" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1)
+
+;; ??? These are approximations.
+(define_function_unit "mpy" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "smpy")) 2 2)
+(define_function_unit "mpy" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "dmpy")) 3 3)
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "fp,fmove")) 2 1)
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "1") (eq_attr "type" "fdiv")) 13 12)
+
+
+;; SH4 scheduling
+;; The SH4 is a dual-issue implementation, thus we have to multiply all
+;; costs by at least two.
+;; There will be single increments of the modeled that don't correspond
+;; to the actual target ;; whenever two insns to be issued depend one a
+;; single resource, and the scheduler picks to be the first one.
+;; If we multiplied the costs just by two, just two of these single
+;; increments would amount to an actual cycle. By picking a larger
+;; factor, we can ameliorate the effect; However, we then have to make sure
+;; that only two insns are modeled as issued per actual cycle.
+;; Moreover, we need a way to specify the latency of insns that don't
+;; use an actual function unit.
+;; We use an 'issue' function unit to do that, and a cost factor of 10.
+
+(define_function_unit "issue" 2 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "!nil,arith3"))
+ 10 10)
+
+(define_function_unit "issue" 2 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "arith3"))
+ 30 30)
+
+;; There is no point in providing exact scheduling information about branches,
+;; because they are at the starts / ends of basic blocks anyways.
+
+;; Some insns cannot be issued before/after another insn in the same cycle,
+;; irrespective of the type of the other insn.
+
+;; default is dual-issue, but can't be paired with an insn that
+;; uses multiple function units.
+(define_function_unit "single_issue" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "!smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul,call,sfunc,arith3,arith3b"))
+ 1 10
+ [(eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")])
+
+(define_function_unit "single_issue" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul"))
+ 10 10
+ [(const_int 1)])
+
+;; arith3 insns are always pairable at the start, but not inecessarily at
+;; the end; however, there doesn;t seem to be a way to express that.
+(define_function_unit "single_issue" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "arith3"))
+ 30 20
+ [(const_int 1)])
+
+;; arith3b insn are pairable at the end and have latency that prevents pairing
+;; with the following branch, but we don't want this latency be respected;
+;; When the following branch is immediately adjacent, we can redirect the
+;; internal branch, which is likly to be a larger win.
+(define_function_unit "single_issue" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "arith3b"))
+ 20 20
+ [(const_int 1)])
+
+;; calls introduce a longisch delay that is likely to flush the pipelines.
+(define_function_unit "single_issue" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "call,sfunc"))
+ 160 160
+ [(eq_attr "type" "!call") (eq_attr "type" "call")])
+
+;; Load and store instructions have no alignment peculiarities for the SH4,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of two.
+;; However, call insns can only paired with a preceding insn, and have
+;; a delay slot, so that we want two more insns to be scheduled between the
+;; load of the function address and the call. This is equivalent to a
+;; latency of three.
+;; We cannot use a conflict list for this, because we need to distinguish
+;; between the actual call address and the function arguments.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here, which gets multiplied by 10 to yield 30.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+
+;; When specifying different latencies for different insns using the
+;; the same function unit, genattrtab.c assumes a 'FIFO constraint'
+;; so that the blockage is at least READY-COST (E) + 1 - READY-COST (C)
+;; for an executing insn E and a candidate insn C.
+;; Therefore, we define three different function units for load_store:
+;; load_store, load and load_si.
+
+(define_function_unit "load_si" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "load_si,pcload_si")) 30 10)
+(define_function_unit "load" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "load,pcload,pload")) 20 10)
+(define_function_unit "load_store" 1 0
+ (and (eq_attr "issues" "2")
+ (eq_attr "type" "load_si,pcload_si,load,pcload,pload,store,pstore,fmove"))
+ 10 10)
+
+(define_function_unit "int" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "arith,dyn_shift")) 10 10)
+
+;; Again, we have to pretend a lower latency for the "int" unit to avoid a
+;; spurious FIFO constraint; the multiply instructions use the "int"
+;; unit actually only for two cycles.
+(define_function_unit "int" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 20 20)
+
+;; We use a fictous "mpy" unit to express the actual latency.
+(define_function_unit "mpy" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 20)
+
+;; Again, we have to pretend a lower latency for the "int" unit to avoid a
+;; spurious FIFO constraint.
+(define_function_unit "int" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 10 10)
+
+;; We use a fictous "gp_fpul" unit to express the actual latency.
+(define_function_unit "gp_fpul" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 20 10)
+
+;; ??? multiply uses the floating point unit, but with a two cycle delay.
+;; Thus, a simple single-precision fp operation could finish if issued in
+;; the very next cycle, but stalls when issued two or three cycles later.
+;; Similarily, a divide / sqrt can work without stalls if issued in
+;; the very next cycle, while it would have to block if issued two or
+;; three cycles later.
+;; There is no way to model this with gcc's function units. This problem is
+;; actually mentioned in md.texi. Tackling this problem requires first that
+;; it is possible to speak about the target in an open discussion.
+;;
+;; However, simple double-precision operations always conflict.
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 40
+ [(eq_attr "type" "dfp_cmp,dfp_conv,dfp_arith")])
+
+;; The "fp" unit is for pipeline stages F1 and F2.
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "fp")) 30 10)
+
+;; Again, we have to pretend a lower latency for the "fp" unit to avoid a
+;; spurious FIFO constraint; the bulk of the fdiv type insns executes in
+;; the F3 stage.
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 30 10)
+
+;; The "fdiv" function unit models the aggregate effect of the F1, F2 and F3
+;; pipeline stages on the pipelining of fdiv/fsqrt insns.
+;; We also use it to give the actual latency here.
+;; fsqrt is actually one cycle faster than fdiv (and the value used here),
+;; but that will hardly matter in practice for scheduling.
+(define_function_unit "fdiv" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 120 100)
+
+;; There is again a late use of the "fp" unit by [d]fdiv type insns
+;; that we can't express.
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "dfp_cmp,dfp_conv")) 40 20)
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "dfp_arith")) 80 60)
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 10)
+
+(define_function_unit "fdiv" 1 0
+ (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 210)
+
+; Definitions for filling branch delay slots.
+
+(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
+
+;; ??? This should be (nil) instead of (const_int 0)
+(define_attr "hit_stack" "yes,no"
+ (cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, 15)") (const_int 0))
+ (const_string "no")]
+ (const_string "yes")))
+
+(define_attr "interrupt_function" "no,yes"
+ (const (symbol_ref "pragma_interrupt")))
+
+(define_attr "in_delay_slot" "yes,no"
+ (cond [(eq_attr "type" "cbranch") (const_string "no")
+ (eq_attr "type" "pcload,pcload_si") (const_string "no")
+ (eq_attr "needs_delay_slot" "yes") (const_string "no")
+ (eq_attr "length" "2") (const_string "yes")
+ ] (const_string "no")))
+
+(define_attr "is_sfunc" ""
+ (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))
+
+(define_delay
+ (eq_attr "needs_delay_slot" "yes")
+ [(eq_attr "in_delay_slot" "yes") (nil) (nil)])
+
+;; On the SH and SH2, the rte instruction reads the return pc from the stack,
+;; and thus we can't put a pop instruction in its delay slot.
+;; ??? On the SH3, the rte instruction does not use the stack, so a pop
+;; instruction can go in the delay slot.
+
+;; Since a normal return (rts) implicitly uses the PR register,
+;; we can't allow PR register loads in an rts delay slot.
+
+(define_delay
+ (eq_attr "type" "return")
+ [(and (eq_attr "in_delay_slot" "yes")
+ (ior (and (eq_attr "interrupt_function" "no")
+ (eq_attr "type" "!pload"))
+ (and (eq_attr "interrupt_function" "yes")
+ (eq_attr "hit_stack" "no")))) (nil) (nil)])
+
+;; Since a call implicitly uses the PR register, we can't allow
+;; a PR register store in a jsr delay slot.
+
+(define_delay
+ (ior (eq_attr "type" "call") (eq_attr "type" "sfunc"))
+ [(and (eq_attr "in_delay_slot" "yes")
+ (eq_attr "type" "!pstore")) (nil) (nil)])
+
+;; Say that we have annulled true branches, since this gives smaller and
+;; faster code when branches are predicted as not taken.
+
+(define_delay
+ (and (eq_attr "type" "cbranch")
+ (ne (symbol_ref "TARGET_SH2") (const_int 0)))
+ [(eq_attr "in_delay_slot" "yes") (eq_attr "in_delay_slot" "yes") (nil)])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+ [(set (reg:SI 18)
+ (eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")
+ (match_operand:SI 1 "arith_operand" "L,r"))
+ (const_int 0)))]
+ ""
+ "tst %1,%0")
+
+;; ??? Perhaps should only accept reg/constant if the register is reg 0.
+;; That would still allow reload to create cmpi instructions, but would
+;; perhaps allow forcing the constant into a register when that is better.
+;; Probably should use r0 for mem/imm compares, but force constant into a
+;; register for pseudo/imm compares.
+
+(define_insn "cmpeqsi_t"
+ [(set (reg:SI 18) (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r")
+ (match_operand:SI 1 "arith_operand" "N,rI,r")))]
+ ""
+ "@
+ tst %0,%0
+ cmp/eq %1,%0
+ cmp/eq %1,%0")
+
+(define_insn "cmpgtsi_t"
+ [(set (reg:SI 18) (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+ (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]
+ ""
+ "@
+ cmp/gt %1,%0
+ cmp/pl %0")
+
+(define_insn "cmpgesi_t"
+ [(set (reg:SI 18) (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+ (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]
+ ""
+ "@
+ cmp/ge %1,%0
+ cmp/pz %0")
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeusi_t"
+ [(set (reg:SI 18) (geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+ (match_operand:SI 1 "arith_reg_operand" "r")))]
+ ""
+ "cmp/hs %1,%0")
+
+(define_insn "cmpgtusi_t"
+ [(set (reg:SI 18) (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+ (match_operand:SI 1 "arith_reg_operand" "r")))]
+ ""
+ "cmp/hi %1,%0")
+
+;; We save the compare operands in the cmpxx patterns and use them when
+;; we generate the branch.
+
+(define_expand "cmpsi"
+ [(set (reg:SI 18) (compare (match_operand:SI 0 "arith_operand" "")
+ (match_operand:SI 1 "arith_operand" "")))]
+ ""
+ "
+{
+ sh_compare_op0 = operands[0];
+ sh_compare_op1 = operands[1];
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+;; ??? Could get better scheduling by splitting the initial test from the
+;; rest of the insn after reload. However, the gain would hardly justify
+;; the sh.md size increase necessary to do that.
+
+(define_insn ""
+ [(set (reg:SI 18)
+ (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")
+ (match_operand:DI 1 "arith_operand" "r"))
+ (const_int 0)))]
+ ""
+ "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\",
+ insn, operands);"
+ [(set_attr "length" "6")
+ (set_attr "type" "arith3b")])
+
+(define_insn "cmpeqdi_t"
+ [(set (reg:SI 18) (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+ (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]
+ ""
+ "*
+ return output_branchy_insn
+ (EQ,
+ (which_alternative
+ ? \"cmp/eq\\t%S1,%S0\;bf\\t%l9\;cmp/eq\\t%R1,%R0\"
+ : \"tst\\t%S0,%S0\;bf\\t%l9\;tst\\t%R0,%R0\"),
+ insn, operands);"
+ [(set_attr "length" "6")
+ (set_attr "type" "arith3b")])
+
+(define_insn "cmpgtdi_t"
+ [(set (reg:SI 18) (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+ (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+ "TARGET_SH2"
+ "@
+ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:
+ tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:"
+ [(set_attr "length" "8")
+ (set_attr "type" "arith3")])
+
+(define_insn "cmpgedi_t"
+ [(set (reg:SI 18) (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+ (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+ "TARGET_SH2"
+ "@
+ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:
+ cmp/pz\\t%S0"
+ [(set_attr "length" "8,2")
+ (set_attr "type" "arith3,arith")])
+
+;; -------------------------------------------------------------------------
+;; DImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeudi_t"
+ [(set (reg:SI 18) (geu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+ (match_operand:DI 1 "arith_reg_operand" "r")))]
+ "TARGET_SH2"
+ "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:"
+ [(set_attr "length" "8")
+ (set_attr "type" "arith3")])
+
+(define_insn "cmpgtudi_t"
+ [(set (reg:SI 18) (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+ (match_operand:DI 1 "arith_reg_operand" "r")))]
+ "TARGET_SH2"
+ "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:"
+ [(set_attr "length" "8")
+ (set_attr "type" "arith3")])
+
+;; We save the compare operands in the cmpxx patterns and use them when
+;; we generate the branch.
+
+(define_expand "cmpdi"
+ [(set (reg:SI 18) (compare (match_operand:DI 0 "arith_operand" "")
+ (match_operand:DI 1 "arith_operand" "")))]
+ "TARGET_SH2"
+ "
+{
+ sh_compare_op0 = operands[0];
+ sh_compare_op1 = operands[1];
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Addition instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+
+(define_insn "adddi3"
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0")
+ (match_operand:DI 2 "arith_reg_operand" "r")))
+ (clobber (reg:SI 18))]
+ ""
+ "#"
+ [(set_attr "length" "6")])
+
+(define_split
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0")
+ (match_operand:DI 2 "arith_reg_operand" "r")))
+ (clobber (reg:SI 18))]
+ "reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+ high0 = gen_rtx (REG, SImode,
+ true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ high2 = gen_rtx (REG, SImode,
+ true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ emit_insn (gen_clrt ());
+ emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2])));
+ emit_insn (gen_addc1 (high0, high0, high2));
+ DONE;
+}")
+
+(define_insn "addc"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (set (reg:SI 18)
+ (ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+ ""
+ "addc %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "addc1"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (clobber (reg:SI 18))]
+ ""
+ "addc %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "addsi3"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (plus:SI (match_operand:SI 1 "arith_operand" "%0")
+ (match_operand:SI 2 "arith_operand" "rI")))]
+ ""
+ "add %2,%0"
+ [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Subtraction instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+
+(define_insn "subdi3"
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (minus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+ (match_operand:DI 2 "arith_reg_operand" "r")))
+ (clobber (reg:SI 18))]
+ ""
+ "#"
+ [(set_attr "length" "6")])
+
+(define_split
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (minus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+ (match_operand:DI 2 "arith_reg_operand" "r")))
+ (clobber (reg:SI 18))]
+ "reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+ high0 = gen_rtx (REG, SImode,
+ true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ high2 = gen_rtx (REG, SImode,
+ true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ emit_insn (gen_clrt ());
+ emit_insn (gen_subc (low0, low0, gen_lowpart (SImode, operands[2])));
+ emit_insn (gen_subc1 (high0, high0, high2));
+ DONE;
+}")
+
+(define_insn "subc"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (set (reg:SI 18)
+ (gtu:SI (minus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+ ""
+ "subc %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "subc1"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (clobber (reg:SI 18))]
+ ""
+ "subc %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "*subsi3_internal"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r")))]
+ ""
+ "sub %2,%0"
+ [(set_attr "type" "arith")])
+
+;; Convert `constant - reg' to `neg rX; add rX, #const' since this
+;; will sometimes save one instruction. Otherwise we might get
+;; `mov #const, rY; sub rY,rX; mov rX, rY' if the source and dest regs
+;; are the same.
+
+(define_expand "subsi3"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (minus:SI (match_operand:SI 1 "arith_operand" "")
+ (match_operand:SI 2 "arith_reg_operand" "")))]
+ ""
+ "
+{
+ if (GET_CODE (operands[1]) == CONST_INT)
+ {
+ emit_insn (gen_negsi2 (operands[0], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+}")
+
+;; -------------------------------------------------------------------------
+;; Division instructions
+;; -------------------------------------------------------------------------
+
+;; We take advantage of the library routines which don't clobber as many
+;; registers as a normal function call would.
+
+;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+;; also has an effect on the register that holds the address of the sfunc.
+;; To make this work, we have an extra dummy insns that shows the use
+;; of this register for reorg.
+
+(define_insn "use_sfunc_addr"
+ [(set (reg:SI 17) (unspec [(match_operand:SI 0 "register_operand" "r")] 5))]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
+;; hard register 0. If we used hard register 0, then the next instruction
+;; would be a move from hard register 0 to a pseudo-reg. If the pseudo-reg
+;; gets allocated to a stack slot that needs its address reloaded, then
+;; there is nothing to prevent reload from using r0 to reload the address.
+;; This reload would clobber the value in r0 we are trying to store.
+;; If we let reload allocate r0, then this problem can never happen.
+
+(define_insn "udivsi3_i1"
+ [(set (match_operand:SI 0 "register_operand" "=z")
+ (udiv:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 4))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "! TARGET_SH4"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4"
+ [(set (match_operand:SI 0 "register_operand" "=y")
+ (udiv:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 17))
+ (clobber (reg:DF 24))
+ (clobber (reg:DF 26))
+ (clobber (reg:DF 28))
+ (clobber (reg:SI 0))
+ (clobber (reg:SI 1))
+ (clobber (reg:SI 4))
+ (clobber (reg:SI 5))
+ (use (reg:PSI 48))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH4 && ! TARGET_FPU_SINGLE"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_single"
+ [(set (match_operand:SI 0 "register_operand" "=y")
+ (udiv:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 17))
+ (clobber (reg:DF 24))
+ (clobber (reg:DF 26))
+ (clobber (reg:DF 28))
+ (clobber (reg:SI 0))
+ (clobber (reg:SI 1))
+ (clobber (reg:SI 4))
+ (clobber (reg:SI 5))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_HARD_SH4 && TARGET_FPU_SINGLE"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "udivsi3"
+ [(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
+ (set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
+ (set (match_dup 3) (symbol_ref:SI "__udivsi3"))
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (udiv:SI (reg:SI 4)
+ (reg:SI 5)))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 4))
+ (use (match_dup 3))])]
+ ""
+ "
+{
+ rtx first, last;
+
+ first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+ emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+ operands[3] = gen_reg_rtx(SImode);
+ if (TARGET_HARD_SH4)
+ {
+ emit_move_insn (operands[3],
+ gen_rtx_SYMBOL_REF (SImode, \"__udivsi3_i4\"));
+ if (TARGET_FPU_SINGLE)
+ last = emit_insn (gen_udivsi3_i4_single (operands[0], operands[3]));
+ else
+ last = emit_insn (gen_udivsi3_i4 (operands[0], operands[3]));
+ }
+ else
+ {
+ emit_move_insn (operands[3],
+ gen_rtx_SYMBOL_REF (SImode, \"__udivsi3\"));
+ last = emit_insn (gen_udivsi3_i1 (operands[0], operands[3]));
+ }
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+(define_insn "divsi3_i1"
+ [(set (match_operand:SI 0 "register_operand" "=z")
+ (div:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 1))
+ (clobber (reg:SI 2))
+ (clobber (reg:SI 3))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "! TARGET_SH4"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4"
+ [(set (match_operand:SI 0 "register_operand" "=y")
+ (div:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 17))
+ (clobber (reg:DF 24))
+ (clobber (reg:DF 26))
+ (use (reg:PSI 48))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_SH4 && ! TARGET_FPU_SINGLE"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_single"
+ [(set (match_operand:SI 0 "register_operand" "=y")
+ (div:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 17))
+ (clobber (reg:DF 24))
+ (clobber (reg:DF 26))
+ (clobber (reg:SI 2))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ "TARGET_HARD_SH4 && TARGET_FPU_SINGLE"
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "divsi3"
+ [(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
+ (set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
+ (set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (div:SI (reg:SI 4)
+ (reg:SI 5)))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 1))
+ (clobber (reg:SI 2))
+ (clobber (reg:SI 3))
+ (use (match_dup 3))])]
+ ""
+ "
+{
+ rtx first, last;
+
+ first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+ emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+ operands[3] = gen_reg_rtx(SImode);
+ if (TARGET_HARD_SH4)
+ {
+ emit_move_insn (operands[3],
+ gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\"));
+ if (TARGET_FPU_SINGLE)
+ last = emit_insn (gen_divsi3_i4_single (operands[0], operands[3]));
+ else
+ last = emit_insn (gen_divsi3_i4 (operands[0], operands[3]));
+ }
+ else
+ {
+ emit_move_insn (operands[3], gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3\"));
+ last = emit_insn (gen_divsi3_i1 (operands[0], operands[3]));
+ }
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "umulhisi3_i"
+ [(set (reg:SI 21)
+ (mult:SI (zero_extend:SI (match_operand:HI 0 "arith_reg_operand" "r"))
+ (zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r"))))]
+ ""
+ "mulu %1,%0"
+ [(set_attr "type" "smpy")])
+
+(define_insn "mulhisi3_i"
+ [(set (reg:SI 21)
+ (mult:SI (sign_extend:SI
+ (match_operand:HI 0 "arith_reg_operand" "r"))
+ (sign_extend:SI
+ (match_operand:HI 1 "arith_reg_operand" "r"))))]
+ ""
+ "muls %1,%0"
+ [(set_attr "type" "smpy")])
+
+(define_expand "mulhisi3"
+ [(set (reg:SI 21)
+ (mult:SI (sign_extend:SI
+ (match_operand:HI 1 "arith_reg_operand" ""))
+ (sign_extend:SI
+ (match_operand:HI 2 "arith_reg_operand" ""))))
+ (set (match_operand:SI 0 "arith_reg_operand" "")
+ (reg:SI 21))]
+ ""
+ "
+{
+ rtx first, last;
+
+ first = emit_insn (gen_mulhisi3_i (operands[1], operands[2]));
+ last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 21));
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+(define_expand "umulhisi3"
+ [(set (reg:SI 21)
+ (mult:SI (zero_extend:SI
+ (match_operand:HI 1 "arith_reg_operand" ""))
+ (zero_extend:SI
+ (match_operand:HI 2 "arith_reg_operand" ""))))
+ (set (match_operand:SI 0 "arith_reg_operand" "")
+ (reg:SI 21))]
+ ""
+ "
+{
+ rtx first, last;
+
+ first = emit_insn (gen_umulhisi3_i (operands[1], operands[2]));
+ last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 21));
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate
+;; a call to a routine which clobbers known registers.
+
+(define_insn ""
+ [(set (match_operand:SI 1 "register_operand" "=z")
+ (mult:SI (reg:SI 4) (reg:SI 5)))
+ (clobber (reg:SI 21))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 3))
+ (clobber (reg:SI 2))
+ (clobber (reg:SI 1))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+ ""
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "mulsi3_call"
+ [(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
+ (set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
+ (parallel[(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (reg:SI 4)
+ (reg:SI 5)))
+ (clobber (reg:SI 21))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 3))
+ (clobber (reg:SI 2))
+ (clobber (reg:SI 1))
+ (use (match_operand:SI 3 "register_operand" ""))])]
+ ""
+ "")
+
+(define_insn "mul_l"
+ [(set (reg:SI 21)
+ (mult:SI (match_operand:SI 0 "arith_reg_operand" "r")
+ (match_operand:SI 1 "arith_reg_operand" "r")))]
+ "TARGET_SH2"
+ "mul.l %1,%0"
+ [(set_attr "type" "dmpy")])
+
+(define_expand "mulsi3"
+ [(set (reg:SI 21)
+ (mult:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "arith_reg_operand" "")))
+ (set (match_operand:SI 0 "arith_reg_operand" "")
+ (reg:SI 21))]
+ ""
+ "
+{
+ rtx first, last;
+
+ if (!TARGET_SH2)
+ {
+ /* The address must be set outside the libcall,
+ since it goes into a pseudo. */
+ rtx addr = force_reg (SImode, gen_rtx_SYMBOL_REF (SImode, \"__mulsi3\"));
+ rtx insns = gen_mulsi3_call (operands[0], operands[1], operands[2], addr);
+ first = XVECEXP (insns, 0, 0);
+ last = XVECEXP (insns, 0, XVECLEN (insns, 0) - 1);
+ emit_insn (insns);
+ }
+ else
+ {
+ rtx macl = gen_rtx_REG (SImode, MACL_REG);
+ first = emit_insn (gen_mul_l (operands[1], operands[2]));
+ last = emit_insn (gen_movsi_i ((operands[0]), macl));
+ }
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+(define_insn "mulsidi3_i"
+ [(set (reg:SI 20)
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+ (const_int 32))))
+ (set (reg:SI 21)
+ (mult:SI (match_dup 0)
+ (match_dup 1)))]
+ "TARGET_SH2"
+ "dmuls.l %1,%0"
+ [(set_attr "type" "dmpy")])
+
+(define_insn "mulsidi3"
+ [(set (match_operand:DI 0 "arith_reg_operand" "r")
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+ (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+ (clobber (reg:DI 20))]
+ "TARGET_SH2"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "arith_reg_operand" "")
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+ (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))
+ (clobber (reg:DI 20))]
+ "TARGET_SH2"
+ [(const_int 0)]
+ "
+{
+ rtx low_dst = gen_lowpart (SImode, operands[0]);
+ rtx high_dst = gen_highpart (SImode, operands[0]);
+
+ emit_insn (gen_mulsidi3_i (operands[1], operands[2]));
+
+ emit_move_insn (low_dst, gen_rtx_REG (SImode, 21));
+ emit_move_insn (high_dst, gen_rtx_REG (SImode, 20));
+ /* We need something to tag the possible REG_EQUAL notes on to. */
+ emit_move_insn (operands[0], operands[0]);
+ DONE;
+}")
+
+(define_insn "umulsidi3_i"
+ [(set (reg:SI 20)
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+ (const_int 32))))
+ (set (reg:SI 21)
+ (mult:SI (match_dup 0)
+ (match_dup 1)))]
+ "TARGET_SH2"
+ "dmulu.l %1,%0"
+ [(set_attr "type" "dmpy")])
+
+(define_insn "umulsidi3"
+ [(set (match_operand:DI 0 "arith_reg_operand" "r")
+ (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+ (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+ (clobber (reg:DI 20))]
+ "TARGET_SH2"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "arith_reg_operand" "")
+ (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+ (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))
+ (clobber (reg:DI 20))]
+ "TARGET_SH2"
+ [(const_int 0)]
+ "
+{
+ rtx low_dst = gen_lowpart (SImode, operands[0]);
+ rtx high_dst = gen_highpart (SImode, operands[0]);
+
+ emit_insn (gen_umulsidi3_i (operands[1], operands[2]));
+
+ emit_move_insn (low_dst, gen_rtx_REG (SImode, 21));
+ emit_move_insn (high_dst, gen_rtx_REG (SImode, 20));
+ /* We need something to tag the possible REG_EQUAL notes on to. */
+ emit_move_insn (operands[0], operands[0]);
+ DONE;
+}")
+
+(define_insn "smulsi3_highpart_i"
+ [(set (reg:SI 20)
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+ (const_int 32))))
+ (clobber (reg:SI 21))]
+ "TARGET_SH2"
+ "dmuls.l %1,%0"
+ [(set_attr "type" "dmpy")])
+
+(define_expand "smulsi3_highpart"
+ [(parallel [(set (reg:SI 20)
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+ (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+ (const_int 32))))
+ (clobber (reg:SI 21))])
+ (set (match_operand:SI 0 "arith_reg_operand" "")
+ (reg:SI 20))]
+ "TARGET_SH2"
+ "
+{
+ rtx first, last;
+
+ first = emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2]));
+ last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 20));
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+(define_insn "umulsi3_highpart_i"
+ [(set (reg:SI 20)
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+ (const_int 32))))
+ (clobber (reg:SI 21))]
+ "TARGET_SH2"
+ "dmulu.l %1,%0"
+ [(set_attr "type" "dmpy")])
+
+(define_expand "umulsi3_highpart"
+ [(parallel [(set (reg:SI 20)
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+ (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+ (const_int 32))))
+ (clobber (reg:SI 21))])
+ (set (match_operand:SI 0 "arith_reg_operand" "")
+ (reg:SI 20))]
+ "TARGET_SH2"
+ "
+{
+ rtx first, last;
+
+ first = emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2]));
+ last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 20));
+ /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+ invariant code motion can move it. */
+ REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+ REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r,z")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+ (match_operand:SI 2 "logical_operand" "r,L")))]
+ ""
+ "and %2,%0"
+ [(set_attr "type" "arith")])
+
+;; If the constant is 255, then emit a extu.b instruction instead of an
+;; and, since that will give better code.
+
+(define_expand "andsi3"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "logical_operand" "")))]
+ ""
+ "
+{
+ if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 255)
+ {
+ emit_insn (gen_zero_extendqisi2 (operands[0],
+ gen_lowpart (QImode, operands[1])));
+ DONE;
+ }
+}")
+
+(define_insn "iorsi3"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r,z")
+ (ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+ (match_operand:SI 2 "logical_operand" "r,L")))]
+ ""
+ "or %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "xorsi3"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=z,r")
+ (xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+ (match_operand:SI 2 "logical_operand" "L,r")))]
+ ""
+ "xor %2,%0"
+ [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+(define_insn "rotlsi3_1"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 1)))
+ (set (reg:SI 18)
+ (lshiftrt:SI (match_dup 1) (const_int 31)))]
+ ""
+ "rotl %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_31"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 31)))
+ (clobber (reg:SI 18))]
+ ""
+ "rotr %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_16"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (rotate:SI (match_operand:SI 1 "arith_reg_operand" "r")
+ (const_int 16)))]
+ ""
+ "swap.w %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_expand "rotlsi3"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (rotate:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")))]
+ ""
+ "
+{
+ static char rot_tab[] = {
+ 000, 000, 000, 000, 000, 000, 010, 001,
+ 001, 001, 011, 013, 003, 003, 003, 003,
+ 003, 003, 003, 003, 003, 013, 012, 002,
+ 002, 002, 010, 000, 000, 000, 000, 000,
+ };
+
+ int count, choice;
+
+ if (GET_CODE (operands[2]) != CONST_INT)
+ FAIL;
+ count = INTVAL (operands[2]);
+ choice = rot_tab[count];
+ if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1)
+ FAIL;
+ choice &= 7;
+ switch (choice)
+ {
+ case 0:
+ emit_move_insn (operands[0], operands[1]);
+ count -= (count & 16) * 2;
+ break;
+ case 3:
+ emit_insn (gen_rotlsi3_16 (operands[0], operands[1]));
+ count -= 16;
+ break;
+ case 1:
+ case 2:
+ {
+ rtx parts[2];
+ parts[0] = gen_reg_rtx (SImode);
+ parts[1] = gen_reg_rtx (SImode);
+ emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1]));
+ parts[choice-1] = operands[1];
+ emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8)));
+ emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8)));
+ emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1]));
+ count = (count & ~16) - 8;
+ }
+ }
+
+ for (; count > 0; count--)
+ emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+ for (; count < 0; count++)
+ emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+
+ DONE;
+}")
+
+(define_insn "*rotlhi3_8"
+ [(set (match_operand:HI 0 "arith_reg_operand" "=r")
+ (rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
+ (const_int 8)))]
+ ""
+ "swap.b %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_expand "rotlhi3"
+ [(set (match_operand:HI 0 "arith_reg_operand" "")
+ (rotate:HI (match_operand:HI 1 "arith_reg_operand" "")
+ (match_operand:HI 2 "immediate_operand" "")))]
+ ""
+ "
+{
+ if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 8)
+ FAIL;
+}")
+
+;;
+;; shift left
+
+(define_insn "ashlsi3_d"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r")))]
+ "TARGET_SH3"
+ "shld %2,%0"
+ [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashlsi3_k"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0")
+ (match_operand:SI 2 "const_int_operand" "M,K")))]
+ "CONST_OK_FOR_K (INTVAL (operands[2]))"
+ "@
+ add %0,%0
+ shll%O2 %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "ashlhi3_k"
+ [(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
+ (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0")
+ (match_operand:HI 2 "const_int_operand" "M,K")))]
+ "CONST_OK_FOR_K (INTVAL (operands[2]))"
+ "@
+ add %0,%0
+ shll%O2 %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "ashlsi3_n"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ "! sh_dynamicalize_shift_p (operands[2])"
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+ (const_string "2")
+ (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+ (const_string "4")
+ (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
+ (const_string "6")]
+ (const_string "8")))
+ (set_attr "type" "arith")])
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ ""
+ [(use (reg:SI 0))]
+ "
+{
+ gen_shifty_op (ASHIFT, operands);
+ DONE;
+}")
+
+(define_expand "ashlsi3"
+ [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")))
+ (clobber (reg:SI 18))])]
+ ""
+ "
+{
+ if (GET_CODE (operands[2]) == CONST_INT
+ && sh_dynamicalize_shift_p (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
+ if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
+ {
+ emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ if (! immediate_operand (operands[2], GET_MODE (operands[2])))
+ FAIL;
+}")
+
+(define_insn "ashlhi3"
+ [(set (match_operand:HI 0 "arith_reg_operand" "=r")
+ (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0")
+ (match_operand:HI 2 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ ""
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+ (const_string "2")
+ (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+ (const_string "4")]
+ (const_string "6")))
+ (set_attr "type" "arith")])
+
+(define_split
+ [(set (match_operand:HI 0 "arith_reg_operand" "")
+ (ashift:HI (match_operand:HI 1 "arith_reg_operand" "")
+ (match_operand:HI 2 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ ""
+ [(use (reg:SI 0))]
+ "
+{
+ gen_shifty_hi_op (ASHIFT, operands);
+ DONE;
+}")
+
+;
+; arithmetic shift right
+;
+
+(define_insn "ashrsi3_k"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "M")))
+ (clobber (reg:SI 18))]
+ "INTVAL (operands[2]) == 1"
+ "shar %0"
+ [(set_attr "type" "arith")])
+
+;; We can't do HImode right shifts correctly unless we start out with an
+;; explicit zero / sign extension; doing that would result in worse overall
+;; code, so just let the machine independent code widen the mode.
+;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 .
+
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrsi2_16"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+ (const_int 16)))]
+ ""
+ "#"
+ [(set_attr "length" "4")])
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+ (const_int 16)))]
+ ""
+ [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16)))
+ (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+ "operands[2] = gen_lowpart (HImode, operands[0]);")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrsi2_31"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 31)))
+ (clobber (reg:SI 18))]
+ ""
+ "#"
+ [(set_attr "length" "4")])
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 31)))
+ (clobber (reg:SI 18))]
+ ""
+ [(const_int 0)]
+ "
+{
+ emit_insn (gen_ashlsi_c (operands[0], operands[1]));
+ emit_insn (gen_subc1 (operands[0], operands[0], operands[0]));
+ DONE;
+}")
+
+(define_insn "ashlsi_c"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
+ (set (reg:SI 18) (lt:SI (match_dup 1)
+ (const_int 0)))]
+ ""
+ "shll %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "ashrsi3_d"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+ "TARGET_SH3"
+ "shad %2,%0"
+ [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashrsi3_n"
+ [(set (reg:SI 4)
+ (ashiftrt:SI (reg:SI 4)
+ (match_operand:SI 0 "const_int_operand" "i")))
+ (clobber (reg:SI 18))
+ (clobber (reg:SI 17))
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+ ""
+ "jsr @%1%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "ashrsi3"
+ [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")))
+ (clobber (reg:SI 18))])]
+ ""
+ "if (expand_ashiftrt (operands)) DONE; else FAIL;")
+
+;; logical shift right
+
+(define_insn "lshrsi3_d"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+ "TARGET_SH3"
+ "shld %2,%0"
+ [(set_attr "type" "dyn_shift")])
+
+;; Only the single bit shift clobbers the T bit.
+
+(define_insn "lshrsi3_m"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "M")))
+ (clobber (reg:SI 18))]
+ "CONST_OK_FOR_M (INTVAL (operands[2]))"
+ "shlr %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_k"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "K")))]
+ "CONST_OK_FOR_K (INTVAL (operands[2]))
+ && ! CONST_OK_FOR_M (INTVAL (operands[2]))"
+ "shlr%O2 %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_n"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ "! sh_dynamicalize_shift_p (operands[2])"
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+ (const_string "2")
+ (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+ (const_string "4")
+ (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
+ (const_string "6")]
+ (const_string "8")))
+ (set_attr "type" "arith")])
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ ""
+ [(use (reg:SI 0))]
+ "
+{
+ gen_shifty_op (LSHIFTRT, operands);
+ DONE;
+}")
+
+(define_expand "lshrsi3"
+ [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")))
+ (clobber (reg:SI 18))])]
+ ""
+ "
+{
+ if (GET_CODE (operands[2]) == CONST_INT
+ && sh_dynamicalize_shift_p (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
+ if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
+ {
+ rtx count = copy_to_mode_reg (SImode, operands[2]);
+ emit_insn (gen_negsi2 (count, count));
+ emit_insn (gen_lshrsi3_d (operands[0], operands[1], count));
+ DONE;
+ }
+ if (! immediate_operand (operands[2], GET_MODE (operands[2])))
+ FAIL;
+}")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashldi3_k"
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+ (const_int 1)))
+ (clobber (reg:SI 18))]
+ ""
+ "shll %R0\;rotcl %S0"
+ [(set_attr "length" "4")
+ (set_attr "type" "arith")])
+
+(define_expand "ashldi3"
+ [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+ (ashift:DI (match_operand:DI 1 "arith_reg_operand" "")
+ (match_operand:DI 2 "immediate_operand" "")))
+ (clobber (reg:SI 18))])]
+ ""
+ "{ if (GET_CODE (operands[2]) != CONST_INT
+ || INTVAL (operands[2]) != 1) FAIL;} ")
+
+;; ??? This should be a define expand.
+
+(define_insn "lshrdi3_k"
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+ (const_int 1)))
+ (clobber (reg:SI 18))]
+ ""
+ "shlr %S0\;rotcr %R0"
+ [(set_attr "length" "4")
+ (set_attr "type" "arith")])
+
+(define_expand "lshrdi3"
+ [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+ (match_operand:DI 2 "immediate_operand" "")))
+ (clobber (reg:SI 18))])]
+ ""
+ "{ if (GET_CODE (operands[2]) != CONST_INT
+ || INTVAL (operands[2]) != 1) FAIL;} ")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrdi3_k"
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+ (const_int 1)))
+ (clobber (reg:SI 18))]
+ ""
+ "shar %S0\;rotcr %R0"
+ [(set_attr "length" "4")
+ (set_attr "type" "arith")])
+
+(define_expand "ashrdi3"
+ [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+ (match_operand:DI 2 "immediate_operand" "")))
+ (clobber (reg:SI 18))])]
+ ""
+ "{ if (GET_CODE (operands[2]) != CONST_INT
+ || INTVAL (operands[2]) != 1) FAIL; } ")
+
+;; combined left/right shift
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")))]
+ "(unsigned)INTVAL (operands[2]) < 32"
+ [(use (reg:SI 0))]
+ "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL;
+ DONE;")
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ "(unsigned)INTVAL (operands[2]) < 32"
+ [(use (reg:SI 0))]
+ "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL;
+ DONE;")
+
+(define_insn ""
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ "shl_and_kind (operands[2], operands[3], 0) == 1"
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+ (const_string "4")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+ (const_string "6")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+ (const_string "8")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 5))
+ (const_string "10")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 6))
+ (const_string "12")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 7))
+ (const_string "14")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 8))
+ (const_string "16")]
+ (const_string "18")))
+ (set_attr "type" "arith")])
+
+(define_insn ""
+ [(set (match_operand:SI 0 "register_operand" "=z")
+ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")))
+ (clobber (reg:SI 18))]
+ "shl_and_kind (operands[2], operands[3], 0) == 2"
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+ (const_string "4")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+ (const_string "6")
+ (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+ (const_string "8")]
+ (const_string "10")))
+ (set_attr "type" "arith")])
+
+;; shift left / and combination with a scratch register: The combine pass
+;; does not accept the individual instructions, even though they are
+;; cheap. But it needs a precise description so that it is usable after
+;; reload.
+(define_insn "and_shl_scratch"
+ [(set (match_operand:SI 0 "register_operand" "=r,&r")
+ (lshiftrt:SI (ashift:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+ (match_operand:SI 2 "const_int_operand" "N,n"))
+ (match_operand:SI 3 "" "0,r"))
+ (match_operand:SI 4 "const_int_operand" "n,n"))
+ (match_operand:SI 5 "const_int_operand" "n,n")))
+ (clobber (reg:SI 18))]
+ ""
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2))
+ (const_string "4")
+ (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3))
+ (const_string "6")
+ (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4))
+ (const_string "8")
+ (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5))
+ (const_string "10")]
+ (const_string "12")))
+ (set_attr "type" "arith")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "=r,&r")
+ (lshiftrt:SI (ashift:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+ (match_operand:SI 2 "const_int_operand" "N,n"))
+ (match_operand:SI 3 "register_operand" "0,r"))
+ (match_operand:SI 4 "const_int_operand" "n,n"))
+ (match_operand:SI 5 "const_int_operand" "n,n")))
+ (clobber (reg:SI 18))]
+ ""
+ [(use (reg:SI 0))]
+ "
+{
+ rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1];
+
+ if (INTVAL (operands[2]))
+ {
+ gen_shifty_op (LSHIFTRT, operands);
+ }
+ emit_insn (gen_andsi3 (operands[0], operands[0], and_source));
+ operands[2] = operands[4];
+ gen_shifty_op (ASHIFT, operands);
+ if (INTVAL (operands[5]))
+ {
+ operands[2] = operands[5];
+ gen_shifty_op (LSHIFTRT, operands);
+ }
+ DONE;
+}")
+
+;; signed left/right shift combination.
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")
+ (const_int 0)))
+ (clobber (reg:SI 18))]
+ ""
+ [(use (reg:SI 0))]
+ "if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1])) FAIL;
+ DONE;")
+
+(define_insn "shl_sext_ext"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")
+ (const_int 0)))
+ (clobber (reg:SI 18))]
+ "(unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5"
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 1))
+ (const_string "2")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 2))
+ (const_string "4")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+ (const_string "6")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+ (const_string "8")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+ (const_string "10")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+ (const_string "12")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 7))
+ (const_string "14")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 8))
+ (const_string "16")]
+ (const_string "18")))
+ (set_attr "type" "arith")])
+
+(define_insn "shl_sext_sub"
+ [(set (match_operand:SI 0 "register_operand" "=z")
+ (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n")
+ (const_int 0)))
+ (clobber (reg:SI 18))]
+ "(shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6"
+ "#"
+ [(set (attr "length")
+ (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+ (const_string "6")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+ (const_string "8")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+ (const_string "10")
+ (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+ (const_string "12")]
+ (const_string "14")))
+ (set_attr "type" "arith")])
+
+;; These patterns are found in expansions of DImode shifts by 16, and
+;; allow the xtrct instruction to be generated from C source.
+
+(define_insn "xtrct_left"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+ (const_int 16))
+ (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0")
+ (const_int 16))))]
+ ""
+ "xtrct %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "xtrct_right"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 16))
+ (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r")
+ (const_int 16))))]
+ ""
+ "xtrct %2,%0"
+ [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negc"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (neg:SI (plus:SI (reg:SI 18)
+ (match_operand:SI 1 "arith_reg_operand" "r"))))
+ (set (reg:SI 18)
+ (ne:SI (ior:SI (reg:SI 18) (match_dup 1))
+ (const_int 0)))]
+ ""
+ "negc %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_expand "negdi2"
+ [(set (match_operand:DI 0 "arith_reg_operand" "")
+ (neg:DI (match_operand:DI 1 "arith_reg_operand" "")))
+ (clobber (reg:SI 18))]
+ ""
+ "
+{
+ int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
+ int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
+
+ rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
+ rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
+
+ rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
+ rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
+
+ emit_insn (gen_clrt ());
+ emit_insn (gen_negc (low_dst, low_src));
+ emit_insn (gen_negc (high_dst, high_src));
+ DONE;
+}")
+
+(define_insn "negsi2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+ ""
+ "neg %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "one_cmplsi2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+ ""
+ "not %1,%0"
+ [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "zero_extendhisi2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))]
+ ""
+ "extu.w %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "zero_extendqisi2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))]
+ ""
+ "extu.b %1,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "zero_extendqihi2"
+ [(set (match_operand:HI 0 "arith_reg_operand" "=r")
+ (zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+ ""
+ "extu.b %1,%0"
+ [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+;; ??? Or perhaps it should be dropped?
+
+/* There is no point in defining extendsidi2; convert_move generates good
+ code for that. */
+
+(define_insn "extendhisi2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
+ (sign_extend:SI (match_operand:HI 1 "general_movsrc_operand" "r,m")))]
+ ""
+ "@
+ exts.w %1,%0
+ mov.w %1,%0"
+ [(set_attr "type" "arith,load")])
+
+(define_insn "extendqisi2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
+ (sign_extend:SI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+ ""
+ "@
+ exts.b %1,%0
+ mov.b %1,%0"
+ [(set_attr "type" "arith,load")])
+
+(define_insn "extendqihi2"
+ [(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
+ (sign_extend:HI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+ ""
+ "@
+ exts.b %1,%0
+ mov.b %1,%0"
+ [(set_attr "type" "arith,load")])
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; define push and pop so it is easy for sh.c
+
+(define_expand "push"
+ [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+ (match_operand:SI 0 "register_operand" "r,l,x"))]
+ ""
+ "")
+
+(define_expand "pop"
+ [(set (match_operand:SI 0 "register_operand" "=r,l,x")
+ (mem:SI (post_inc:SI (reg:SI 15))))]
+ ""
+ "")
+
+(define_expand "push_e"
+ [(parallel [(set (mem:SF (pre_dec:SI (reg:SI 15)))
+ (match_operand:SF 0 "" ""))
+ (use (reg:PSI 48))
+ (clobber (scratch:SI))])]
+ ""
+ "")
+
+(define_insn "push_fpul"
+ [(set (mem:SF (pre_dec:SI (reg:SI 15))) (reg:SF 22))]
+ "TARGET_SH3E"
+ "sts.l fpul,@-r15"
+ [(set_attr "type" "store")
+ (set_attr "hit_stack" "yes")])
+
+;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4,
+;; so use that.
+(define_expand "push_4"
+ [(parallel [(set (mem:DF (pre_dec:SI (reg:SI 15))) (match_operand:DF 0 "" ""))
+ (use (reg:PSI 48))
+ (clobber (scratch:SI))])]
+ ""
+ "")
+
+(define_expand "pop_e"
+ [(parallel [(set (match_operand:SF 0 "" "")
+ (mem:SF (post_inc:SI (reg:SI 15))))
+ (use (reg:PSI 48))
+ (clobber (scratch:SI))])]
+ ""
+ "")
+
+(define_insn "pop_fpul"
+ [(set (reg:SF 22) (mem:SF (post_inc:SI (reg:SI 15))))]
+ "TARGET_SH3E"
+ "lds.l @r15+,fpul"
+ [(set_attr "type" "load")
+ (set_attr "hit_stack" "yes")])
+
+(define_expand "pop_4"
+ [(parallel [(set (match_operand:DF 0 "" "")
+ (mem:DF (post_inc:SI (reg:SI 15))))
+ (use (reg:PSI 48))
+ (clobber (scratch:SI))])]
+ ""
+ "")
+
+;; These two patterns can happen as the result of optimization, when
+;; comparisons get simplified to a move of zero or 1 into the T reg.
+;; They don't disappear completely, because the T reg is a fixed hard reg.
+
+(define_insn "clrt"
+ [(set (reg:SI 18) (const_int 0))]
+ ""
+ "clrt")
+
+(define_insn "sett"
+ [(set (reg:SI 18) (const_int 1))]
+ ""
+ "sett")
+
+;; t/r is first, so that it will be preferred over r/r when reloading a move
+;; of a pseudo-reg into the T reg
+(define_insn "movsi_i"
+ [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,<,xl,x,l,r")
+ (match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,mr,xl,t,r,x,l,r,>,>,i"))]
+ "
+ ! TARGET_SH3E
+ && (register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode))"
+ "@
+ cmp/pl %1
+ mov.l %1,%0
+ mov %1,%0
+ mov.l %1,%0
+ sts %1,%0
+ movt %0
+ mov.l %1,%0
+ sts.l %1,%0
+ sts.l %1,%0
+ lds %1,%0
+ lds.l %1,%0
+ lds.l %1,%0
+ fake %1,%0"
+ [(set_attr "type" "*,pcload_si,move,load_si,move,move,store,store,pstore,move,load,pload,pcload_si")
+ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (subreg:SI (reg:SF 38 fr14) 0) into T (compiling stdlib/strtod.c -m3e -O2)
+;; ??? This allows moves from macl to fpul to be recognized, but these moves
+;; will require a reload.
+(define_insn "movsi_ie"
+ [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,y,r,y,r,y")
+ (match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,mr,xl,t,r,x,l,r,>,>,>,i,r,y,y"))]
+ "TARGET_SH3E
+ && (register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode))"
+ "@
+ mov.l %1,%0
+ mov %1,%0
+ cmp/pl %1
+ mov.l %1,%0
+ sts %1,%0
+ movt %0
+ mov.l %1,%0
+ sts.l %1,%0
+ sts.l %1,%0
+ lds %1,%0
+ lds.l %1,%0
+ lds.l %1,%0
+ lds.l %1,%0
+ fake %1,%0
+ lds %1,%0
+ sts %1,%0
+ ! move optimized away"
+ [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,load,pcload_si,gp_fpul,gp_fpul,nil")
+ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
+
+(define_insn "movsi_i_lowpart"
+ [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "=r,r,r,r,r,m,r"))
+ (match_operand:SI 1 "general_movsrc_operand" "Q,rI,mr,xl,t,r,i"))]
+ "register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode)"
+ "@
+ mov.l %1,%0
+ mov %1,%0
+ mov.l %1,%0
+ sts %1,%0
+ movt %0
+ mov.l %1,%0
+ fake %1,%0"
+ [(set_attr "type" "pcload,move,load,move,move,store,pcload")])
+(define_expand "movsi"
+ [(set (match_operand:SI 0 "general_movdst_operand" "")
+ (match_operand:SI 1 "general_movsrc_operand" ""))]
+ ""
+ "{ if (prepare_move_operands (operands, SImode)) DONE; }")
+
+(define_expand "ic_invalidate_line"
+ [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r")
+ (match_dup 1)] 12)
+ (clobber (scratch:SI))])]
+ "TARGET_HARD_SH4"
+ "
+{
+ operands[0] = force_reg (Pmode, operands[0]);
+ operands[1] = force_reg (Pmode, GEN_INT (0xf0000008));
+}")
+
+;; The address %0 is assumed to be 4-aligned at least. Thus, by ORing
+;; 0xf0000008, we get the low-oder bits *1*00 (binary), ;; which fits
+;; the requirement *0*00 for associative address writes. The alignment of
+;; %0 implies that its least significant bit is cleared,
+;; thus we clear the V bit of a matching entry if there is one.
+(define_insn "ic_invalidate_line_i"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r,r")
+ (match_operand:SI 1 "register_operand" "r,r")] 12)
+ (clobber (match_scratch:SI 2 "=&r,1"))]
+ "TARGET_HARD_SH4"
+ "ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%r1,%r2\;mov.l\\t%0,@%2"
+ [(set_attr "length" "8")])
+
+(define_insn "movqi_i"
+ [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l")
+ (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))]
+ "arith_reg_operand (operands[0], QImode)
+ || arith_reg_operand (operands[1], QImode)"
+ "@
+ mov %1,%0
+ mov.b %1,%0
+ mov.b %1,%0
+ movt %0
+ sts %1,%0
+ lds %1,%0"
+ [(set_attr "type" "move,load,store,move,move,move")])
+
+(define_expand "movqi"
+ [(set (match_operand:QI 0 "general_operand" "")
+ (match_operand:QI 1 "general_operand" ""))]
+ ""
+ "{ if (prepare_move_operands (operands, QImode)) DONE; }")
+
+(define_insn "movhi_i"
+ [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,r,m,r,l,r")
+ (match_operand:HI 1 "general_movsrc_operand" "Q,rI,m,t,r,l,r,i"))]
+ "arith_reg_operand (operands[0], HImode)
+ || arith_reg_operand (operands[1], HImode)"
+ "@
+ mov.w %1,%0
+ mov %1,%0
+ mov.w %1,%0
+ movt %0
+ mov.w %1,%0
+ sts %1,%0
+ lds %1,%0
+ fake %1,%0"
+ [(set_attr "type" "pcload,move,load,move,store,move,move,pcload")])
+
+(define_expand "movhi"
+ [(set (match_operand:HI 0 "general_movdst_operand" "")
+ (match_operand:HI 1 "general_movsrc_operand" ""))]
+ ""
+ "{ if (prepare_move_operands (operands, HImode)) DONE; }")
+
+;; ??? This should be a define expand.
+
+;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
+;; compiled with -m2 -ml -O3 -funroll-loops
+(define_insn ""
+ [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
+ (match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I,i,x,r"))]
+ "arith_reg_operand (operands[0], DImode)
+ || arith_reg_operand (operands[1], DImode)"
+ "* return output_movedouble (insn, operands, DImode);"
+ [(set_attr "length" "4")
+ (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+
+(define_split
+ [(set (match_operand:DI 0 "general_movdst_operand" "")
+ (match_operand:DI 1 "general_movsrc_operand" ""))]
+ "reload_completed"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+ "
+{
+ int regno;
+
+ if ((GET_CODE (operands[0]) == MEM
+ && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+ || (GET_CODE (operands[1]) == MEM
+ && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+ FAIL;
+
+ if (GET_CODE (operands[0]) == REG)
+ regno = REGNO (operands[0]);
+ else if (GET_CODE (operands[0]) == SUBREG)
+ regno = REGNO (SUBREG_REG (operands[0])) + SUBREG_WORD (operands[0]);
+ else if (GET_CODE (operands[0]) == MEM)
+ regno = -1;
+
+ if (regno == -1
+ || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+ {
+ operands[2] = operand_subword (operands[0], 0, 0, DImode);
+ operands[3] = operand_subword (operands[1], 0, 0, DImode);
+ operands[4] = operand_subword (operands[0], 1, 0, DImode);
+ operands[5] = operand_subword (operands[1], 1, 0, DImode);
+ }
+ else
+ {
+ operands[2] = operand_subword (operands[0], 1, 0, DImode);
+ operands[3] = operand_subword (operands[1], 1, 0, DImode);
+ operands[4] = operand_subword (operands[0], 0, 0, DImode);
+ operands[5] = operand_subword (operands[1], 0, 0, DImode);
+ }
+
+ if (operands[2] == 0 || operands[3] == 0
+ || operands[4] == 0 || operands[5] == 0)
+ FAIL;
+}")
+
+(define_expand "movdi"
+ [(set (match_operand:DI 0 "general_movdst_operand" "")
+ (match_operand:DI 1 "general_movsrc_operand" ""))]
+ ""
+ "{ if ( prepare_move_operands (operands, DImode)) DONE; }")
+
+;; ??? This should be a define expand.
+
+(define_insn "movdf_k"
+ [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+ (match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
+ "(! TARGET_SH4 || reload_completed
+ /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
+ || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3
+ || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3)
+ && (arith_reg_operand (operands[0], DFmode)
+ || arith_reg_operand (operands[1], DFmode))"
+ "* return output_movedouble (insn, operands, DFmode);"
+ [(set_attr "length" "4")
+ (set_attr "type" "move,pcload,load,store")])
+
+;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
+;; However, the d/F/c/z alternative cannot be split directly; it is converted
+;; with special code in machine_dependent_reorg into a load of the R0_REG and
+;; the d/m/c/X alternative, which is split later into single-precision
+;; instructions. And when not optimizing, no splits are done before fixing
+;; up pcloads, so we need usable length information for that.
+(define_insn "movdf_i4"
+ [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
+ (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r"))
+ (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c"))
+ (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))]
+ "TARGET_SH4
+ && (arith_reg_operand (operands[0], DFmode)
+ || arith_reg_operand (operands[1], DFmode))"
+ "@
+ fmov %1,%0
+ #
+ #
+ fmov.d %1,%0
+ fmov.d %1,%0
+ #
+ #
+ #
+ #
+ #"
+ [(set_attr_alternative "length"
+ [(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4))
+ (const_int 4)
+ (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+ (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
+ (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
+ (const_int 4)
+ (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
+ (const_int 8) (const_int 8)])
+ (set_attr "type" "fmove,move,pcload,load,store,pcload,load,store,load,load")])
+
+;; Moving DFmode between fp/general registers through memory
+;; (the top of the stack) is faster than moving through fpul even for
+;; little endian. Because the type of an instruction is important for its
+;; scheduling, it is beneficial to split these operations, rather than
+;; emitting them in one single chunk, even if this will expose a stack
+;; use that will prevent scheduling of other stack accesses beyond this
+;; instruction.
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (match_scratch:SI 3 "=X"))]
+ "TARGET_SH4 && reload_completed
+ && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
+ [(const_int 0)]
+ "
+{
+ rtx insn, tos;
+
+ tos = gen_rtx (MEM, DFmode, gen_rtx (PRE_DEC, Pmode, stack_pointer_rtx));
+ insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2]));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX);
+ tos = gen_rtx (MEM, DFmode, gen_rtx (POST_INC, Pmode, stack_pointer_rtx));
+ insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2]));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX);
+ DONE;
+}")
+
+;; local-alloc sometimes allocates scratch registers even when not required,
+;; so we must be prepared to handle these.
+
+;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k.
+(define_split
+ [(set (match_operand:DF 0 "general_movdst_operand" "")
+ (match_operand:DF 1 "general_movsrc_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (match_scratch:SI 3 "X"))]
+ "TARGET_SH4
+ && reload_completed
+ && true_regnum (operands[0]) < 16
+ && true_regnum (operands[1]) < 16"
+ [(set (match_dup 0) (match_dup 1))]
+ "
+{
+ /* If this was a reg <-> mem operation with base + index reg addressing,
+ we have to handle this in a special way. */
+ rtx mem = operands[0];
+ int store_p = 1;
+ if (! memory_operand (mem, DFmode))
+ {
+ mem = operands[1];
+ store_p = 0;
+ }
+ if (GET_CODE (mem) == SUBREG && SUBREG_WORD (mem) == 0)
+ mem = SUBREG_REG (mem);
+ if (GET_CODE (mem) == MEM)
+ {
+ rtx addr = XEXP (mem, 0);
+ if (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == REG
+ && GET_CODE (XEXP (addr, 1)) == REG)
+ {
+ int offset;
+ rtx reg0 = gen_rtx (REG, Pmode, 0);
+ rtx regop = operands[store_p], word0 ,word1;
+
+ if (GET_CODE (regop) == SUBREG)
+ regop = alter_subreg (regop);
+ if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1)))
+ offset = 2;
+ else
+ offset = 4;
+ mem = copy_rtx (mem);
+ PUT_MODE (mem, SImode);
+ word0 = gen_rtx(SUBREG, SImode, regop, 0);
+ emit_insn (store_p
+ ? gen_movsi_ie (mem, word0) : gen_movsi_ie (word0, mem));
+ emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+ mem = copy_rtx (mem);
+ word1 = gen_rtx(SUBREG, SImode, regop, 1);
+ emit_insn (store_p
+ ? gen_movsi_ie (mem, word1) : gen_movsi_ie (word1, mem));
+ emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+ DONE;
+ }
+ }
+}")
+
+;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads.
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (reg:SI 0))]
+ "TARGET_SH4 && reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (clobber (scratch:SI))])]
+ "")
+
+(define_expand "reload_indf"
+ [(parallel [(set (match_operand:DF 0 "register_operand" "=f")
+ (match_operand:DF 1 "immediate_operand" "FQ"))
+ (use (reg:PSI 48))
+ (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+ ""
+ "")
+
+(define_expand "reload_outdf"
+ [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f")
+ (match_operand:DF 1 "register_operand" "af,r"))
+ (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])]
+ ""
+ "")
+
+;; Simplify no-op moves.
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" ""))
+ (clobber (match_scratch:SI 3 "X"))]
+ "TARGET_SH3E && reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(set (match_dup 0) (match_dup 0))]
+ "")
+
+;; fmovd substitute post-reload splits
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (match_scratch:SI 3 "X"))]
+ "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ && true_regnum (operands[0]) >= FIRST_FP_REG
+ && true_regnum (operands[1]) >= FIRST_FP_REG"
+ [(const_int 0)]
+ "
+{
+ int dst = true_regnum (operands[0]), src = true_regnum (operands[1]);
+ emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst),
+ gen_rtx (REG, SFmode, src), operands[2]));
+ emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst + 1),
+ gen_rtx (REG, SFmode, src + 1), operands[2]));
+ DONE;
+}")
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (mem:DF (match_operand:SI 1 "register_operand" "")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (match_scratch:SI 3 "X"))]
+ "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ && true_regnum (operands[0]) >= FIRST_FP_REG
+ && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
+ [(const_int 0)]
+ "
+{
+ int regno = true_regnum (operands[0]);
+ rtx insn;
+ rtx mem2 = gen_rtx (MEM, SFmode, gen_rtx (POST_INC, Pmode, operands[1]));
+
+ insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode,
+ regno + !! TARGET_LITTLE_ENDIAN),
+ mem2, operands[2]));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[1], NULL_RTX);
+ insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode,
+ regno + ! TARGET_LITTLE_ENDIAN),
+ gen_rtx (MEM, SFmode, operands[1]),
+ operands[2]));
+ DONE;
+}")
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (match_scratch:SI 3 "X"))]
+ "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ && true_regnum (operands[0]) >= FIRST_FP_REG"
+ [(const_int 0)]
+ "
+{
+ int regno = true_regnum (operands[0]);
+ rtx addr, insn, adjust = NULL_RTX;
+ rtx mem2 = copy_rtx (operands[1]);
+ rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN);
+ rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN);
+
+ PUT_MODE (mem2, SFmode);
+ operands[1] = copy_rtx (mem2);
+ addr = XEXP (mem2, 0);
+ if (GET_CODE (addr) != POST_INC)
+ {
+ /* If we have to modify the stack pointer, the value that we have
+ read with post-increment might be modified by an interrupt,
+ so write it back. */
+ if (REGNO (addr) == STACK_POINTER_REGNUM)
+ adjust = gen_push_e (reg0);
+ else
+ adjust = gen_addsi3 (addr, addr, GEN_INT (-4));
+ XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
+ }
+ addr = XEXP (addr, 0);
+ insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2]));
+ REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX);
+ insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+ if (adjust)
+ emit_insn (adjust);
+ else
+ REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX);
+ DONE;
+}")
+
+(define_split
+ [(set (match_operand:DF 0 "memory_operand" "")
+ (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (match_scratch:SI 3 "X"))]
+ "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+ && true_regnum (operands[1]) >= FIRST_FP_REG"
+ [(const_int 0)]
+ "
+{
+ int regno = true_regnum (operands[1]);
+ rtx insn, addr, adjust = NULL_RTX;
+
+ operands[0] = copy_rtx (operands[0]);
+ PUT_MODE (operands[0], SFmode);
+ insn = emit_insn (gen_movsf_ie (operands[0],
+ gen_rtx (REG, SFmode,
+ regno + ! TARGET_LITTLE_ENDIAN),
+ operands[2]));
+ operands[0] = copy_rtx (operands[0]);
+ addr = XEXP (operands[0], 0);
+ if (GET_CODE (addr) != PRE_DEC)
+ {
+ adjust = gen_addsi3 (addr, addr, GEN_INT (4));
+ emit_insn_before (adjust, insn);
+ XEXP (operands[0], 0) = addr = gen_rtx (PRE_DEC, SImode, addr);
+ }
+ addr = XEXP (addr, 0);
+ if (! adjust)
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
+ insn = emit_insn (gen_movsf_ie (operands[0],
+ gen_rtx (REG, SFmode,
+ regno + !! TARGET_LITTLE_ENDIAN),
+ operands[2]));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
+ DONE;
+}")
+
+;; The '&' for operand 2 is not really true, but push_secondary_reload
+;; insists on it.
+;; Operand 1 must accept FPUL_REGS in case fpul is reloaded to memory,
+;; to avoid a bogus tertiary reload.
+;; We need a tertiary reload when a floating point register is reloaded
+;; to memory, so the predicate for operand 0 must accept this, while the
+;; constraint of operand 1 must reject the secondary reload register.
+;; Thus, the secondary reload register for this case has to be GENERAL_REGS,
+;; too.
+;; By having the predicate for operand 0 reject any register, we make
+;; sure that the ordinary moves that just need an intermediate register
+;; won't get a bogus tertiary reload.
+;; We use tertiary_reload_operand instead of memory_operand here because
+;; memory_operand rejects operands that are not directly addressible, e.g.:
+;; (mem:SF (plus:SI (reg:SI 14 r14)
+;; (const_int 132)))
+
+(define_expand "reload_outsf"
+ [(parallel [(set (match_operand:SF 2 "register_operand" "=&r")
+ (match_operand:SF 1 "register_operand" "y"))
+ (clobber (scratch:SI))])
+ (parallel [(set (match_operand:SF 0 "tertiary_reload_operand" "=m")
+ (match_dup 2))
+ (clobber (scratch:SI))])]
+ ""
+ "")
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+
+(define_split
+ [(set (match_operand:DF 0 "general_movdst_operand" "")
+ (match_operand:DF 1 "general_movsrc_operand" ""))]
+ "reload_completed"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+ "
+{
+ int regno;
+
+ if ((GET_CODE (operands[0]) == MEM
+ && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+ || (GET_CODE (operands[1]) == MEM
+ && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+ FAIL;
+
+ if (GET_CODE (operands[0]) == REG)
+ regno = REGNO (operands[0]);
+ else if (GET_CODE (operands[0]) == SUBREG)
+ regno = REGNO (SUBREG_REG (operands[0])) + SUBREG_WORD (operands[0]);
+ else if (GET_CODE (operands[0]) == MEM)
+ regno = -1;
+
+ if (regno == -1
+ || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+ {
+ operands[2] = operand_subword (operands[0], 0, 0, DFmode);
+ operands[3] = operand_subword (operands[1], 0, 0, DFmode);
+ operands[4] = operand_subword (operands[0], 1, 0, DFmode);
+ operands[5] = operand_subword (operands[1], 1, 0, DFmode);
+ }
+ else
+ {
+ operands[2] = operand_subword (operands[0], 1, 0, DFmode);
+ operands[3] = operand_subword (operands[1], 1, 0, DFmode);
+ operands[4] = operand_subword (operands[0], 0, 0, DFmode);
+ operands[5] = operand_subword (operands[1], 0, 0, DFmode);
+ }
+
+ if (operands[2] == 0 || operands[3] == 0
+ || operands[4] == 0 || operands[5] == 0)
+ FAIL;
+}")
+
+;; If a base address generated by LEGITIMIZE_ADDRESS for SImode is
+;; used only once, let combine add in the index again.
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "" ""))
+ (clobber (match_operand 2 "register_operand" ""))]
+ "! reload_in_progress && ! reload_completed"
+ [(use (reg:SI 0))]
+ "
+{
+ rtx addr, reg, const_int;
+
+ if (GET_CODE (operands[1]) != MEM)
+ FAIL;
+ addr = XEXP (operands[1], 0);
+ if (GET_CODE (addr) != PLUS)
+ FAIL;
+ reg = XEXP (addr, 0);
+ const_int = XEXP (addr, 1);
+ if (GET_CODE (reg) != REG || GET_CODE (const_int) != CONST_INT)
+ FAIL;
+ emit_move_insn (operands[2], const_int);
+ emit_move_insn (operands[0],
+ change_address (operands[1], VOIDmode,
+ gen_rtx (PLUS, SImode, reg, operands[2])));
+ DONE;
+}")
+
+(define_split
+ [(set (match_operand:SI 1 "" "")
+ (match_operand:SI 0 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))]
+ "! reload_in_progress && ! reload_completed"
+ [(use (reg:SI 0))]
+ "
+{
+ rtx addr, reg, const_int;
+
+ if (GET_CODE (operands[1]) != MEM)
+ FAIL;
+ addr = XEXP (operands[1], 0);
+ if (GET_CODE (addr) != PLUS)
+ FAIL;
+ reg = XEXP (addr, 0);
+ const_int = XEXP (addr, 1);
+ if (GET_CODE (reg) != REG || GET_CODE (const_int) != CONST_INT)
+ FAIL;
+ emit_move_insn (operands[2], const_int);
+ emit_move_insn (change_address (operands[1], VOIDmode,
+ gen_rtx (PLUS, SImode, reg, operands[2])),
+ operands[0]);
+ DONE;
+}")
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "general_movdst_operand" "")
+ (match_operand:DF 1 "general_movsrc_operand" ""))]
+ ""
+ "
+{
+ if (prepare_move_operands (operands, DFmode)) DONE;
+ if (TARGET_SH4)
+ {
+ if (no_new_pseudos)
+ {
+ /* ??? FIXME: This is only a stopgap fix. There is no guarantee
+ that fpscr is in the right state. */
+ emit_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+ DONE;
+ }
+ emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+ /* We need something to tag possible REG_LIBCALL notes on to. */
+ if (TARGET_FPU_SINGLE && rtx_equal_function_value_matters
+ && GET_CODE (operands[0]) == REG)
+ emit_insn (gen_mov_nop (operands[0]));
+ DONE;
+ }
+}")
+
+
+(define_insn "movsf_i"
+ [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
+ (match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,mr,r,r,l"))]
+ "
+ (! TARGET_SH3E
+ /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */
+ || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3
+ || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3)
+ && (arith_reg_operand (operands[0], SFmode)
+ || arith_reg_operand (operands[1], SFmode))"
+ "@
+ mov %1,%0
+ mov %1,%0
+ mov.l %1,%0
+ mov.l %1,%0
+ mov.l %1,%0
+ lds %1,%0
+ sts %1,%0"
+ [(set_attr "type" "move,move,pcload,load,store,move,move")])
+
+;; We may not split the ry/yr/XX alternatives to movsi_ie, since
+;; update_flow_info would not know where to put REG_EQUAL notes
+;; when the destination changes mode.
+(define_insn "movsf_ie"
+ [(set (match_operand:SF 0 "general_movdst_operand"
+ "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,y")
+ (match_operand:SF 1 "general_movsrc_operand"
+ "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y"))
+ (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c"))
+ (clobber (match_scratch:SI 3 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))]
+
+ "TARGET_SH3E
+ && (arith_reg_operand (operands[0], SFmode)
+ || arith_reg_operand (operands[1], SFmode))"
+ "@
+ fmov %1,%0
+ mov %1,%0
+ fldi0 %0
+ fldi1 %0
+ #
+ fmov.s %1,%0
+ fmov.s %1,%0
+ mov.l %1,%0
+ mov.l %1,%0
+ mov.l %1,%0
+ fsts fpul,%0
+ flds %1,fpul
+ lds.l %1,%0
+ #
+ sts %1,%0
+ lds %1,%0
+ ! move optimized away"
+ [(set_attr "type" "fmove,move,fmove,fmove,pcload,load,store,pcload,load,store,fmove,fmove,load,*,gp_fpul,gp_fpul,nil")
+ (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,4,2,2,0")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" ""))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (reg:SI 22))]
+ ""
+ [(parallel [(set (reg:SF 22) (match_dup 1))
+ (use (match_dup 2))
+ (clobber (scratch:SI))])
+ (parallel [(set (match_dup 0) (reg:SF 22))
+ (use (match_dup 2))
+ (clobber (scratch:SI))])]
+ "")
+
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "general_movdst_operand" "")
+ (match_operand:SF 1 "general_movsrc_operand" ""))]
+ ""
+ "
+{
+ if (prepare_move_operands (operands, SFmode))
+ DONE;
+ if (TARGET_SH3E)
+ {
+ if (no_new_pseudos)
+ {
+ /* ??? FIXME: This is only a stopgap fix. There is no guarantee
+ that fpscr is in the right state. */
+ emit_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+ DONE;
+ }
+ emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+ /* We need something to tag possible REG_LIBCALL notes on to. */
+ if (! TARGET_FPU_SINGLE && rtx_equal_function_value_matters
+ && GET_CODE (operands[0]) == REG)
+ emit_insn (gen_mov_nop (operands[0]));
+ DONE;
+ }
+}")
+
+(define_insn "mov_nop"
+ [(set (match_operand 0 "register_operand" "") (match_dup 0))]
+ "TARGET_SH3E"
+ ""
+ [(set_attr "length" "0")
+ (set_attr "type" "nil")])
+
+(define_expand "reload_insf"
+ [(parallel [(set (match_operand:SF 0 "register_operand" "=f")
+ (match_operand:SF 1 "immediate_operand" "FQ"))
+ (use (reg:PSI 48))
+ (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+ ""
+ "")
+
+(define_expand "reload_insi"
+ [(parallel [(set (match_operand:SF 0 "register_operand" "=y")
+ (match_operand:SF 1 "immediate_operand" "FQ"))
+ (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+ ""
+ "")
+
+(define_insn "*movsi_y"
+ [(set (match_operand:SI 0 "register_operand" "=y,y")
+ (match_operand:SI 1 "immediate_operand" "Qi,I"))
+ (clobber (match_scratch:SI 3 "=&z,r"))]
+ "TARGET_SH3E
+ && (reload_in_progress || reload_completed)"
+ "#"
+ [(set_attr "length" "4")
+ (set_attr "type" "pcload,move")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "y")
+ (match_operand:SI 1 "immediate_operand" "I"))
+ (clobber (match_operand:SI 2 "register_operand" "r"))]
+ ""
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "y")
+ (match_operand:SI 1 "memory_operand" ">"))
+ (clobber (reg:SI 0))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+ "")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+(define_insn "branch_true"
+ [(set (pc) (if_then_else (ne (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "* return output_branch (1, insn, operands);"
+ [(set_attr "type" "cbranch")])
+
+(define_insn "branch_false"
+ [(set (pc) (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "* return output_branch (0, insn, operands);"
+ [(set_attr "type" "cbranch")])
+
+;; Patterns to prevent reorg from re-combining a condbranch with a branch
+;; which destination is too far away.
+;; The const_int_operand is distinct for each branch target; it avoids
+;; unwanted matches with redundant_insn.
+(define_insn "block_branch_redirect"
+ [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] 4))]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; This one has the additional purpose to record a possible scratch register
+;; for the following branch.
+(define_insn "indirect_jump_scratch"
+ [(set (match_operand 0 "register_operand" "r")
+ (unspec [(match_operand 1 "const_int_operand" "")] 4))]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; Conditional branch insns
+
+(define_expand "beq"
+ [(set (pc)
+ (if_then_else (ne (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, EQ);")
+
+(define_expand "bne"
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, EQ);")
+
+(define_expand "bgt"
+ [(set (pc)
+ (if_then_else (ne (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, GT);")
+
+(define_expand "blt"
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "
+{
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ {
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ emit_insn (gen_bgt (operands[0]));
+ DONE;
+ }
+ from_compare (operands, GE);
+}")
+
+(define_expand "ble"
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "
+{
+ if (TARGET_SH3E
+ && TARGET_IEEE
+ && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ {
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ emit_insn (gen_bge (operands[0]));
+ DONE;
+ }
+ from_compare (operands, GT);
+}")
+
+(define_expand "bge"
+ [(set (pc)
+ (if_then_else (ne (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "
+{
+ if (TARGET_SH3E
+ && ! TARGET_IEEE
+ && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ {
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ emit_insn (gen_ble (operands[0]));
+ DONE;
+ }
+ from_compare (operands, GE);
+}")
+
+(define_expand "bgtu"
+ [(set (pc)
+ (if_then_else (ne (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, GTU); ")
+
+(define_expand "bltu"
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, GEU);")
+
+(define_expand "bgeu"
+ [(set (pc)
+ (if_then_else (ne (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, GEU);")
+
+(define_expand "bleu"
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "from_compare (operands, GTU);")
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump"
+ [(set (pc)
+ (label_ref (match_operand 0 "" "")))]
+ ""
+ "*
+{
+ /* The length is 16 if the delay slot is unfilled. */
+ if (get_attr_length(insn) > 4)
+ return output_far_jump(insn, operands[0]);
+ else
+ return \"bra %l0%#\";
+}"
+ [(set_attr "type" "jump")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "calli"
+ [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+ (match_operand 1 "" ""))
+ (use (reg:SI 48))
+ (clobber (reg:SI 17))]
+ ""
+ "jsr @%0%#"
+ [(set_attr "type" "call")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_valuei"
+ [(set (match_operand 0 "" "=rf")
+ (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+ (match_operand 2 "" "")))
+ (use (reg:SI 48))
+ (clobber (reg:SI 17))]
+ ""
+ "jsr @%1%#"
+ [(set_attr "type" "call")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call"
+ [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+ (match_operand 1 "" ""))
+ (use (reg:SI 48))
+ (clobber (reg:SI 17))])]
+ ""
+ "operands[0] = force_reg (SImode, XEXP (operands[0], 0));")
+
+(define_expand "call_value"
+ [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+ (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+ (match_operand 2 "" "")))
+ (use (reg:SI 48))
+ (clobber (reg:SI 17))])]
+ ""
+ "operands[1] = force_reg (SImode, XEXP (operands[1], 0));")
+
+(define_insn "indirect_jump"
+ [(set (pc)
+ (match_operand:SI 0 "arith_reg_operand" "r"))]
+ ""
+ "jmp @%0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set_attr "type" "jump_ind")])
+
+;; The use of operand 1 / 2 helps us distinguish case table jumps
+;; which can be present in structured code from indirect jumps which can not
+;; be present in structured code. This allows -fprofile-arcs to work.
+
+;; For SH1 processors.
+(define_insn "casesi_jump_1"
+ [(set (pc)
+ (match_operand:SI 0 "register_operand" "r"))
+ (use (label_ref (match_operand 1 "" "")))]
+ ""
+ "jmp @%0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set_attr "type" "jump_ind")])
+
+;; For all later processors.
+(define_insn "casesi_jump_2"
+ [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r")
+ (label_ref (match_operand 1 "" ""))))
+ (use (label_ref (match_operand 2 "" "")))]
+ "! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn"
+ "braf %0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set_attr "type" "jump_ind")])
+
+;; Call subroutine returning any type.
+;; ??? This probably doesn't work.
+
+(define_expand "untyped_call"
+ [(parallel [(call (match_operand 0 "" "")
+ (const_int 0))
+ (match_operand 1 "" "")
+ (match_operand 2 "" "")])]
+ "TARGET_SH3E"
+ "
+{
+ int i;
+
+ emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx, const0_rtx));
+
+ for (i = 0; i < XVECLEN (operands[2], 0); i++)
+ {
+ rtx set = XVECEXP (operands[2], 0, i);
+ emit_move_insn (SET_DEST (set), SET_SRC (set));
+ }
+
+ /* The optimizer does not know that the call sets the function value
+ registers we stored in the result block. We avoid problems by
+ claiming that all hard registers are used and clobbered at this
+ point. */
+ emit_insn (gen_blockage ());
+
+ DONE;
+}")
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "dect"
+ [(set (reg:SI 18)
+ (eq:SI (match_operand:SI 0 "arith_reg_operand" "+r") (const_int 1)))
+ (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+ "TARGET_SH2"
+ "dt %0"
+ [(set_attr "type" "arith")])
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+ "nop")
+
+;; Load address of a label. This is only generated by the casesi expand,
+;; and by machine_dependent_reorg (fixing up fp moves).
+;; This must use unspec, because this only works for labels that are
+;; within range,
+
+(define_insn "mova"
+ [(set (reg:SI 0)
+ (unspec [(label_ref (match_operand 0 "" ""))] 1))]
+ ""
+ "mova %O0,r0"
+ [(set_attr "in_delay_slot" "no")
+ (set_attr "type" "arith")])
+
+;; case instruction for switch statements.
+
+;; Operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+
+(define_expand "casesi"
+ [(match_operand:SI 0 "arith_reg_operand" "")
+ (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "arith_reg_operand" "")
+ (match_operand 3 "" "") (match_operand 4 "" "")]
+ ""
+ "
+{
+ rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (SImode);
+ operands[1] = copy_to_mode_reg (SImode, operands[1]);
+ operands[2] = copy_to_mode_reg (SImode, operands[2]);
+ /* If optimizing, casesi_worker depends on the mode of the instruction
+ before label it 'uses' - operands[3]. */
+ emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4],
+ reg));
+ emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3]));
+ if (TARGET_SH2)
+ emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3]));
+ else
+ emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3]));
+ /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to
+ operands[3], but to lab. We will fix this up in
+ machine_dependent_reorg. */
+ emit_barrier ();
+ DONE;
+}")
+
+(define_expand "casesi_0"
+ [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" ""))
+ (set (match_dup 4) (minus:SI (match_dup 4)
+ (match_operand:SI 1 "arith_operand" "")))
+ (set (reg:SI 18)
+ (gtu:SI (match_dup 4)
+ (match_operand:SI 2 "arith_reg_operand" "")))
+ (set (pc)
+ (if_then_else (ne (reg:SI 18)
+ (const_int 0))
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+ "")
+
+;; ??? reload might clobber r0 if we use it explicitly in the RTL before
+;; reload; using a R0_REGS pseudo reg is likely to give poor code.
+;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload.
+
+(define_insn "casesi_worker_0"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec [(match_operand 1 "register_operand" "0,r")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 "=X,1"))
+ (clobber (match_scratch:SI 4 "=&z,z"))]
+ ""
+ "#")
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec [(match_operand 1 "register_operand" "")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))]
+ "! TARGET_SH2 && reload_completed"
+ [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1))
+ (parallel [(set (match_dup 0)
+ (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2))
+ (clobber (match_dup 3))])
+ (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI 0)))]
+ "LABEL_NUSES (operands[2])++;")
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec [(match_operand 1 "register_operand" "")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))]
+ "TARGET_SH2 && reload_completed"
+ [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1))
+ (parallel [(set (match_dup 0)
+ (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2))
+ (clobber (match_dup 3))])]
+ "LABEL_NUSES (operands[2])++;")
+
+(define_insn "*casesi_worker"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec [(reg:SI 0) (match_operand 1 "register_operand" "0,r")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 "=X,1"))]
+ ""
+ "*
+{
+ rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+
+ if (GET_CODE (diff_vec) != ADDR_DIFF_VEC)
+ abort ();
+
+ switch (GET_MODE (diff_vec))
+ {
+ case SImode:
+ return \"shll2 %1\;mov.l @(r0,%1),%0\";
+ case HImode:
+ return \"add %1,%1\;mov.w @(r0,%1),%0\";
+ case QImode:
+ if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+ return \"mov.b @(r0,%1),%0\;extu.b %0,%0\";
+ return \"mov.b @(r0,%1),%0\";
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "length" "4")])
+
+;; ??? This is not the proper place to invoke another compiler pass;
+;; Alas, there is no proper place to put it.
+;; ??? This is also an odd place for the call to emit_fpscr_use. It
+;; would be all right if it were for an define_expand for return, but
+;; that doesn't mix with emitting a prologue.
+(define_insn "return"
+ [(return)]
+ "emit_fpscr_use (),
+ remove_dead_before_cse (),
+ reload_completed"
+ "%@ %#"
+ [(set_attr "type" "return")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "prologue"
+ [(const_int 0)]
+ ""
+ "sh_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+ [(return)]
+ ""
+ "sh_expand_epilogue ();")
+
+(define_insn "blockage"
+ [(unspec_volatile [(const_int 0)] 0)]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "movt"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (eq:SI (reg:SI 18) (const_int 1)))]
+ ""
+ "movt %0"
+ [(set_attr "type" "arith")])
+
+(define_expand "seq"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (EQ);")
+
+(define_expand "slt"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (LT);")
+
+(define_expand "sle"
+ [(match_operand:SI 0 "arith_reg_operand" "")]
+ ""
+ "
+{
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ emit_insn (gen_sge (operands[0]));
+ DONE;
+}")
+
+(define_expand "sgt"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (GT);")
+
+(define_expand "sge"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "
+{
+ if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ {
+ if (TARGET_IEEE)
+ {
+ rtx t_reg = gen_rtx (REG, SImode, T_REG);
+ rtx lab = gen_label_rtx ();
+ prepare_scc_operands (EQ);
+ emit_jump_insn (gen_branch_true (lab));
+ prepare_scc_operands (GT);
+ emit_label (lab);
+ emit_insn (gen_movt (operands[0]));
+ }
+ else
+ emit_insn (gen_movnegt (operands[0], prepare_scc_operands (LT)));
+ DONE;
+ }
+ operands[1] = prepare_scc_operands (GE);
+}")
+
+(define_expand "sgtu"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (GTU);")
+
+(define_expand "sltu"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (LTU);")
+
+(define_expand "sleu"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (LEU);")
+
+(define_expand "sgeu"
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (match_dup 1))]
+ ""
+ "operands[1] = prepare_scc_operands (GEU);")
+
+;; sne moves the complement of the T reg to DEST like this:
+;; cmp/eq ...
+;; mov #-1,temp
+;; negc temp,dest
+;; This is better than xoring compare result with 1 because it does
+;; not require r0 and further, the -1 may be CSE-ed or lifted out of a
+;; loop.
+
+(define_expand "sne"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (neg:SI (plus:SI (match_dup 1)
+ (match_dup 2))))
+ (set (reg:SI 18)
+ (ne:SI (ior:SI (match_dup 1) (match_dup 2))
+ (const_int 0)))])]
+ ""
+ "
+{
+ operands[1] = prepare_scc_operands (EQ);
+ operands[2] = gen_reg_rtx (SImode);
+}")
+
+;; Use the same trick for FP sle / sge
+(define_expand "movnegt"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (match_operand 0 "" "")
+ (neg:SI (plus:SI (match_dup 1)
+ (match_dup 2))))
+ (set (reg:SI 18)
+ (ne:SI (ior:SI (match_operand 1 "" "") (match_dup 2))
+ (const_int 0)))])]
+ ""
+ "operands[2] = gen_reg_rtx (SImode);")
+
+;; Recognize mov #-1/negc/neg sequence, and change it to movt/add #-1.
+;; This prevents a regression that occurred when we switched from xor to
+;; mov/neg for sne.
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (plus:SI (reg:SI 18)
+ (const_int -1)))]
+ ""
+ [(set (match_dup 0) (eq:SI (reg:SI 18) (const_int 1)))
+ (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+ "")
+
+;; -------------------------------------------------------------------------
+;; Instructions to cope with inline literal tables
+;; -------------------------------------------------------------------------
+
+; 2 byte integer in line
+
+(define_insn "consttable_2"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 2)]
+ ""
+ "*
+{
+ assemble_integer (operands[0], 2, 1);
+ return \"\";
+}"
+ [(set_attr "length" "2")
+ (set_attr "in_delay_slot" "no")])
+
+; 4 byte integer in line
+
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 4)]
+ ""
+ "*
+{
+ assemble_integer (operands[0], 4, 1);
+ return \"\";
+}"
+ [(set_attr "length" "4")
+ (set_attr "in_delay_slot" "no")])
+
+; 8 byte integer in line
+
+(define_insn "consttable_8"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 6)]
+ ""
+ "*
+{
+ assemble_integer (operands[0], 8, 1);
+ return \"\";
+}"
+ [(set_attr "length" "8")
+ (set_attr "in_delay_slot" "no")])
+
+; 4 byte floating point
+
+(define_insn "consttable_sf"
+ [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g")] 4)]
+ ""
+ "*
+{
+ union real_extract u;
+ bcopy ((char *) &CONST_DOUBLE_LOW (operands[0]), (char *) &u, sizeof u);
+ assemble_real (u.d, SFmode);
+ return \"\";
+}"
+ [(set_attr "length" "4")
+ (set_attr "in_delay_slot" "no")])
+
+; 8 byte floating point
+
+(define_insn "consttable_df"
+ [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g")] 6)]
+ ""
+ "*
+{
+ union real_extract u;
+ bcopy ((char *) &CONST_DOUBLE_LOW (operands[0]), (char *) &u, sizeof u);
+ assemble_real (u.d, DFmode);
+ return \"\";
+}"
+ [(set_attr "length" "8")
+ (set_attr "in_delay_slot" "no")])
+
+;; Alignment is needed for some constant tables; it may also be added for
+;; Instructions at the start of loops, or after unconditional branches.
+;; ??? We would get more accurate lengths if we did instruction
+;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used
+;; here is too conservative.
+
+; align to a two byte boundary
+
+(define_expand "align_2"
+ [(unspec_volatile [(const_int 1)] 1)]
+ ""
+ "")
+
+; align to a four byte boundary
+;; align_4 and align_log are instructions for the starts of loops, or
+;; after unconditional branches, which may take up extra room.
+
+(define_expand "align_4"
+ [(unspec_volatile [(const_int 2)] 1)]
+ ""
+ "")
+
+; align to a cache line boundary
+
+(define_insn "align_log"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 1)]
+ ""
+ ""
+ [(set_attr "length" "0")
+ (set_attr "in_delay_slot" "no")])
+
+; emitted at the end of the literal table, used to emit the
+; 32bit branch labels if needed.
+
+(define_insn "consttable_end"
+ [(unspec_volatile [(const_int 0)] 11)]
+ ""
+ "* return output_jump_label_table ();"
+ [(set_attr "in_delay_slot" "no")])
+
+;; -------------------------------------------------------------------------
+;; Misc
+;; -------------------------------------------------------------------------
+
+;; String/block move insn.
+
+(define_expand "movstrsi"
+ [(parallel [(set (mem:BLK (match_operand:BLK 0 "" ""))
+ (mem:BLK (match_operand:BLK 1 "" "")))
+ (use (match_operand:SI 2 "nonmemory_operand" ""))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 4))
+ (clobber (reg:SI 5))
+ (clobber (reg:SI 0))])]
+ ""
+ "
+{
+ if(expand_block_move (operands))
+ DONE;
+ else FAIL;
+}")
+
+(define_insn "block_move_real"
+ [(parallel [(set (mem:BLK (reg:SI 4))
+ (mem:BLK (reg:SI 5)))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 0))])]
+ "! TARGET_HARD_SH4"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real"
+ [(parallel [(set (mem:BLK (reg:SI 4))
+ (mem:BLK (reg:SI 5)))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (reg:SI 6))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 4))
+ (clobber (reg:SI 5))
+ (clobber (reg:SI 6))
+ (clobber (reg:SI 0))])]
+ "! TARGET_HARD_SH4"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_move_real_i4"
+ [(parallel [(set (mem:BLK (reg:SI 4))
+ (mem:BLK (reg:SI 5)))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 0))
+ (clobber (reg:SI 1))
+ (clobber (reg:SI 2))])]
+ "TARGET_HARD_SH4"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real_i4"
+ [(parallel [(set (mem:BLK (reg:SI 4))
+ (mem:BLK (reg:SI 5)))
+ (use (match_operand:SI 0 "arith_reg_operand" "r"))
+ (use (reg:SI 6))
+ (clobber (reg:SI 17))
+ (clobber (reg:SI 4))
+ (clobber (reg:SI 5))
+ (clobber (reg:SI 6))
+ (clobber (reg:SI 0))
+ (clobber (reg:SI 1))
+ (clobber (reg:SI 2))
+ (clobber (reg:SI 3))])]
+ "TARGET_HARD_SH4"
+ "jsr @%0%#"
+ [(set_attr "type" "sfunc")
+ (set_attr "needs_delay_slot" "yes")])
+
+;; -------------------------------------------------------------------------
+;; Floating point instructions.
+;; -------------------------------------------------------------------------
+
+;; ??? All patterns should have a type attribute.
+
+(define_expand "fpu_switch0"
+ [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values"))
+ (set (match_dup 2) (match_dup 1))]
+ ""
+ "
+{
+ operands[1] = gen_rtx (MEM, PSImode, operands[0]);
+ RTX_UNCHANGING_P (operands[1]) = 1;
+ operands[2] = get_fpscr_rtx ();
+}")
+
+(define_expand "fpu_switch1"
+ [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values"))
+ (set (match_dup 1) (plus:SI (match_dup 0) (const_int 4)))
+ (set (match_dup 3) (match_dup 2))]
+ ""
+ "
+{
+ operands[1] = gen_reg_rtx (SImode);
+ operands[2] = gen_rtx (MEM, PSImode, operands[1]);
+ RTX_UNCHANGING_P (operands[2]) = 1;
+ operands[3] = get_fpscr_rtx ();
+}")
+
+(define_expand "movpsi"
+ [(set (match_operand:PSI 0 "register_operand" "")
+ (match_operand:PSI 1 "general_movsrc_operand" ""))]
+ ""
+ "")
+
+;; The c / m alternative is a fake to guide reload to load directly into
+;; fpscr, since reload doesn't know how to use post-increment.
+;; GO_IF_LEGITIMATE_ADDRESS guards about bogus addresses before reload,
+;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's
+;; predicate after reload.
+;; The gp_fpul type for r/!c might look a bit odd, but it actually schedules
+;; like a gpr <-> fpul move.
+(define_insn "fpu_switch"
+ [(set (match_operand:PSI 0 "register_operand" "c,c,r,c,c,r,m,r")
+ (match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c"))]
+ "! reload_completed
+ || true_regnum (operands[0]) != FPSCR_REG || GET_CODE (operands[1]) != MEM
+ || GET_CODE (XEXP (operands[1], 0)) != PLUS"
+ "@
+ ! precision stays the same
+ lds.l %1,fpscr
+ mov.l %1,%0
+ #
+ lds %1,fpscr
+ mov %1,%0
+ mov.l %1,%0
+ sts fpscr,%0"
+ [(set_attr "length" "0,2,2,4,2,2,2,2")
+ (set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")])
+
+(define_split
+ [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))]
+ "find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
+ [(set (match_dup 0) (match_dup 0))]
+ "
+{
+ rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+ gen_rtx (MEM, PSImode,
+ gen_rtx (POST_INC, Pmode,
+ operands[0]))));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX);
+}")
+
+(define_split
+ [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))]
+ ""
+ [(set (match_dup 0) (plus:SI (match_dup 0) (const_int -4)))]
+ "
+{
+ rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+ gen_rtx (MEM, PSImode,
+ gen_rtx (POST_INC, Pmode,
+ operands[0]))));
+ REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX);
+}")
+
+;; ??? This uses the fp unit, but has no type indicating that.
+;; If we did that, this would either give a bogus latency or introduce
+;; a bogus FIFO constraint.
+;; Since this insn is currently only used for prologues/epilogues,
+;; it is probably best to claim no function unit, which matches the
+;; current setting.
+(define_insn "toggle_sz"
+ [(set (reg:PSI 48) (xor:PSI (reg:PSI 48) (const_int 1048576)))]
+ "TARGET_SH4"
+ "fschg")
+
+(define_expand "addsf3"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")
+ (match_operand:SF 2 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "{ expand_sf_binop (&gen_addsf3_i, operands); DONE; }")
+
+(define_insn "addsf3_i"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (plus:SF (match_operand:SF 1 "arith_reg_operand" "%0")
+ (match_operand:SF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fadd %2,%0"
+ [(set_attr "type" "fp")])
+
+(define_expand "subsf3"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")
+ (match_operand:SF 2 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "{ expand_sf_binop (&gen_subsf3_i, operands); DONE; }")
+
+(define_insn "subsf3_i"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (minus:SF (match_operand:SF 1 "arith_reg_operand" "0")
+ (match_operand:SF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fsub %2,%0"
+ [(set_attr "type" "fp")])
+
+;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
+;; register in feeding fp instructions. Thus, we cannot generate fmac for
+;; mixed-precision SH4 targets. To allow it to be still generated for the
+;; SH3E, we use a separate insn for SH3E mulsf3.
+
+(define_expand "mulsf3"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")
+ (match_operand:SF 2 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "
+{
+ if (TARGET_SH4)
+ expand_sf_binop (&gen_mulsf3_i4, operands);
+ else
+ emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
+ DONE;
+}")
+
+(define_insn "mulsf3_i4"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (mult:SF (match_operand:SF 1 "arith_reg_operand" "%0")
+ (match_operand:SF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fmul %2,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "mulsf3_ie"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (mult:SF (match_operand:SF 1 "arith_reg_operand" "%0")
+ (match_operand:SF 2 "arith_reg_operand" "f")))]
+ "TARGET_SH3E && ! TARGET_SH4"
+ "fmul %2,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "*macsf3"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "arith_reg_operand" "%w")
+ (match_operand:SF 2 "arith_reg_operand" "f"))
+ (match_operand:SF 3 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 4 "fpscr_operand" "c"))]
+ "TARGET_SH3E && ! TARGET_SH4"
+ "fmac fr0,%2,%0"
+ [(set_attr "type" "fp")])
+
+(define_expand "divsf3"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")
+ (match_operand:SF 2 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "{ expand_sf_binop (&gen_divsf3_i, operands); DONE; }")
+
+(define_insn "divsf3_i"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (div:SF (match_operand:SF 1 "arith_reg_operand" "0")
+ (match_operand:SF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fdiv %2,%0"
+ [(set_attr "type" "fdiv")])
+
+(define_expand "floatsisf2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "arith_reg_operand" ""))
+ (parallel [(set (match_operand:SF 0 "arith_reg_operand" "")
+ (float:SF (reg:SI 22)))
+ (use (match_dup 2))])]
+ "TARGET_SH3E"
+ "
+{
+ if (TARGET_SH4)
+ {
+ emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22),
+ operands[1]));
+ emit_sf_insn (gen_floatsisf2_i4 (operands[0], get_fpscr_rtx ()));
+ DONE;
+ }
+ operands[2] = get_fpscr_rtx ();
+}")
+
+(define_insn "floatsisf2_i4"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (float:SF (reg:SI 22)))
+ (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "float fpul,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "*floatsisf2_ie"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (float:SF (reg:SI 22)))]
+ "TARGET_SH3E && ! TARGET_SH4"
+ "float fpul,%0"
+ [(set_attr "type" "fp")])
+
+(define_expand "fix_truncsfsi2"
+ [(set (reg:SI 22)
+ (fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+ (set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (reg:SI 22))]
+ "TARGET_SH3E"
+ "
+{
+ if (TARGET_SH4)
+ {
+ emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[1], get_fpscr_rtx ()));
+ emit_insn (gen_rtx (SET, VOIDmode, operands[0],
+ gen_rtx (REG, SImode, 22)));
+ DONE;
+ }
+}")
+
+(define_insn "fix_truncsfsi2_i4"
+ [(set (reg:SI 22)
+ (fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "ftrc %0,fpul"
+ [(set_attr "type" "fp")])
+
+(define_insn "fix_truncsfsi2_i4_2"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+ (use (reg:SI 48))
+ (clobber (reg:SI 22))]
+ "TARGET_SH4"
+ "#"
+ [(set_attr "length" "4")])
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (reg:SI 22))]
+ "TARGET_SH4"
+ [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1)))
+ (use (match_dup 2))])
+ (set (match_dup 0) (reg:SI 22))])
+
+(define_insn "*fixsfsi"
+ [(set (reg:SI 22)
+ (fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))]
+ "TARGET_SH3E && ! TARGET_SH4"
+ "ftrc %0,fpul"
+ [(set_attr "type" "fp")])
+
+(define_insn "cmpgtsf_t"
+ [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f")))]
+ "TARGET_SH3E && ! TARGET_SH4"
+ "fcmp/gt %1,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "cmpeqsf_t"
+ [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f")))]
+ "TARGET_SH3E && ! TARGET_SH4"
+ "fcmp/eq %1,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "ieee_ccmpeqsf_t"
+ [(set (reg:SI 18) (ior:SI (reg:SI 18)
+ (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f"))))]
+ "TARGET_SH3E && TARGET_IEEE && ! TARGET_SH4"
+ "* return output_ieee_ccmpeq (insn, operands);"
+ [(set_attr "length" "4")])
+
+
+(define_insn "cmpgtsf_t_i4"
+ [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fcmp/gt %1,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "cmpeqsf_t_i4"
+ [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fcmp/eq %1,%0"
+ [(set_attr "type" "fp")])
+
+(define_insn "*ieee_ccmpeqsf_t_4"
+ [(set (reg:SI 18) (ior:SI (reg:SI 18)
+ (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f"))))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_IEEE && TARGET_SH4"
+ "* return output_ieee_ccmpeq (insn, operands);"
+ [(set_attr "length" "4")])
+
+(define_expand "cmpsf"
+ [(set (reg:SI 18) (compare (match_operand:SF 0 "arith_operand" "")
+ (match_operand:SF 1 "arith_operand" "")))]
+ "TARGET_SH3E"
+ "
+{
+ sh_compare_op0 = operands[0];
+ sh_compare_op1 = operands[1];
+ DONE;
+}")
+
+(define_expand "negsf2"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "{ expand_sf_unop (&gen_negsf2_i, operands); DONE; }")
+
+(define_insn "negsf2_i"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (neg:SF (match_operand:SF 1 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fneg %0"
+ [(set_attr "type" "fmove")])
+
+(define_expand "sqrtsf2"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "{ expand_sf_unop (&gen_sqrtsf2_i, operands); DONE; }")
+
+(define_insn "sqrtsf2_i"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fsqrt %0"
+ [(set_attr "type" "fdiv")])
+
+(define_expand "abssf2"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")]
+ "TARGET_SH3E"
+ "{ expand_sf_unop (&gen_abssf2_i, operands); DONE; }")
+
+(define_insn "abssf2_i"
+ [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+ (abs:SF (match_operand:SF 1 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH3E"
+ "fabs %0"
+ [(set_attr "type" "fmove")])
+
+(define_expand "adddf3"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")
+ (match_operand:DF 2 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_binop (&gen_adddf3_i, operands); DONE; }")
+
+(define_insn "adddf3_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (plus:DF (match_operand:DF 1 "arith_reg_operand" "%0")
+ (match_operand:DF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fadd %2,%0"
+ [(set_attr "type" "dfp_arith")])
+
+(define_expand "subdf3"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")
+ (match_operand:DF 2 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_binop (&gen_subdf3_i, operands); DONE; }")
+
+(define_insn "subdf3_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (minus:DF (match_operand:DF 1 "arith_reg_operand" "0")
+ (match_operand:DF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fsub %2,%0"
+ [(set_attr "type" "dfp_arith")])
+
+(define_expand "muldf3"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")
+ (match_operand:DF 2 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_binop (&gen_muldf3_i, operands); DONE; }")
+
+(define_insn "muldf3_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (mult:DF (match_operand:DF 1 "arith_reg_operand" "%0")
+ (match_operand:DF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fmul %2,%0"
+ [(set_attr "type" "dfp_arith")])
+
+(define_expand "divdf3"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")
+ (match_operand:DF 2 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_binop (&gen_divdf3_i, operands); DONE; }")
+
+(define_insn "divdf3_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (div:DF (match_operand:DF 1 "arith_reg_operand" "0")
+ (match_operand:DF 2 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fdiv %2,%0"
+ [(set_attr "type" "dfdiv")])
+
+(define_expand "floatsidf2"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:SI 1 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "
+{
+ emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), operands[1]));
+ emit_df_insn (gen_floatsidf2_i (operands[0], get_fpscr_rtx ()));
+ DONE;
+}")
+
+(define_insn "floatsidf2_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (float:DF (reg:SI 22)))
+ (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "float fpul,%0"
+ [(set_attr "type" "dfp_conv")])
+
+(define_expand "fix_truncdfsi2"
+ [(match_operand:SI 0 "arith_reg_operand" "=r")
+ (match_operand:DF 1 "arith_reg_operand" "f")]
+ "TARGET_SH4"
+ "
+{
+ emit_df_insn (gen_fix_truncdfsi2_i (operands[1], get_fpscr_rtx ()));
+ emit_insn (gen_rtx (SET, VOIDmode, operands[0], gen_rtx (REG, SImode, 22)));
+ DONE;
+}")
+
+(define_insn "fix_truncdfsi2_i"
+ [(set (reg:SI 22)
+ (fix:SI (match_operand:DF 0 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "ftrc %0,fpul"
+ [(set_attr "type" "dfp_conv")])
+
+(define_insn "fix_truncdfsi2_i4"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (reg:SI 22))]
+ "TARGET_SH4"
+ "#"
+ [(set_attr "length" "4")])
+
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))
+ (clobber (reg:SI 22))]
+ "TARGET_SH4"
+ [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1)))
+ (use (match_dup 2))])
+ (set (match_dup 0) (reg:SI 22))])
+
+(define_insn "cmpgtdf_t"
+ [(set (reg:SI 18) (gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
+ (match_operand:DF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fcmp/gt %1,%0"
+ [(set_attr "type" "dfp_cmp")])
+
+(define_insn "cmpeqdf_t"
+ [(set (reg:SI 18) (eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
+ (match_operand:DF 1 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fcmp/eq %1,%0"
+ [(set_attr "type" "dfp_cmp")])
+
+(define_insn "*ieee_ccmpeqdf_t"
+ [(set (reg:SI 18) (ior:SI (reg:SI 18)
+ (eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
+ (match_operand:DF 1 "arith_reg_operand" "f"))))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_IEEE && TARGET_SH4"
+ "* return output_ieee_ccmpeq (insn, operands);"
+ [(set_attr "length" "4")])
+
+(define_expand "cmpdf"
+ [(set (reg:SI 18) (compare (match_operand:DF 0 "arith_operand" "")
+ (match_operand:DF 1 "arith_operand" "")))]
+ "TARGET_SH4"
+ "
+{
+ sh_compare_op0 = operands[0];
+ sh_compare_op1 = operands[1];
+ DONE;
+}")
+
+(define_expand "negdf2"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_unop (&gen_negdf2_i, operands); DONE; }")
+
+(define_insn "negdf2_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (neg:DF (match_operand:DF 1 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fneg %0"
+ [(set_attr "type" "fmove")])
+
+(define_expand "sqrtdf2"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_unop (&gen_sqrtdf2_i, operands); DONE; }")
+
+(define_insn "sqrtdf2_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (sqrt:DF (match_operand:DF 1 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fsqrt %0"
+ [(set_attr "type" "dfdiv")])
+
+(define_expand "absdf2"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "{ expand_df_unop (&gen_absdf2_i, operands); DONE; }")
+
+(define_insn "absdf2_i"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (abs:DF (match_operand:DF 1 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fabs %0"
+ [(set_attr "type" "fmove")])
+
+(define_expand "extendsfdf2"
+ [(match_operand:DF 0 "arith_reg_operand" "")
+ (match_operand:SF 1 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "
+{
+ emit_sf_insn (gen_movsf_ie (gen_rtx (REG, SFmode, 22), operands[1],
+ get_fpscr_rtx ()));
+ emit_df_insn (gen_extendsfdf2_i4 (operands[0], get_fpscr_rtx ()));
+ DONE;
+}")
+
+(define_insn "extendsfdf2_i4"
+ [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+ (float_extend:DF (reg:SF 22)))
+ (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fcnvsd fpul,%0"
+ [(set_attr "type" "fp")])
+
+(define_expand "truncdfsf2"
+ [(match_operand:SF 0 "arith_reg_operand" "")
+ (match_operand:DF 1 "arith_reg_operand" "")]
+ "TARGET_SH4"
+ "
+{
+ emit_df_insn (gen_truncdfsf2_i4 (operands[1], get_fpscr_rtx ()));
+ emit_sf_insn (gen_movsf_ie (operands[0], gen_rtx (REG, SFmode, 22),
+ get_fpscr_rtx ()));
+ DONE;
+}")
+
+(define_insn "truncdfsf2_i4"
+ [(set (reg:SF 22)
+ (float_truncate:SF (match_operand:DF 0 "arith_reg_operand" "f")))
+ (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+ "TARGET_SH4"
+ "fcnvds %0,fpul"
+ [(set_attr "type" "fp")])
+
+;; Bit field extract patterns. These give better code for packed bitfields,
+;; because they allow auto-increment addresses to be generated.
+
+(define_expand "insv"
+ [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "")
+ (match_operand:SI 1 "immediate_operand" "")
+ (match_operand:SI 2 "immediate_operand" ""))
+ (match_operand:SI 3 "general_operand" ""))]
+ "! TARGET_LITTLE_ENDIAN"
+ "
+{
+ rtx addr_target, orig_address, shift_reg;
+ HOST_WIDE_INT size;
+
+ /* ??? expmed doesn't care for non-register predicates. */
+ if (! memory_operand (operands[0], VOIDmode)
+ || ! immediate_operand (operands[1], VOIDmode)
+ || ! immediate_operand (operands[2], VOIDmode)
+ || ! general_operand (operands[3], VOIDmode))
+ FAIL;
+ /* If this isn't a 16 / 24 / 32 bit field, or if
+ it doesn't start on a byte boundary, then fail. */
+ size = INTVAL (operands[1]);
+ if (size < 16 || size > 32 || size % 8 != 0
+ || (INTVAL (operands[2]) % 8) != 0)
+ FAIL;
+
+ size /= 8;
+ orig_address = XEXP (operands[0], 0);
+ addr_target = gen_reg_rtx (SImode);
+ shift_reg = gen_reg_rtx (SImode);
+ emit_insn (gen_movsi (shift_reg, operands[3]));
+ emit_insn (gen_addsi3 (addr_target, orig_address, GEN_INT (size - 1)));
+
+ operands[0] = change_address (operands[0], QImode, addr_target);
+ emit_insn (gen_movqi (operands[0], gen_rtx (SUBREG, QImode, shift_reg, 0)));
+
+ while (size -= 1)
+ {
+ emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8)));
+ emit_insn (gen_addsi3 (addr_target, addr_target, GEN_INT (-1)));
+ emit_insn (gen_movqi (operands[0],
+ gen_rtx (SUBREG, QImode, shift_reg, 0)));
+ }
+
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Peepholes
+;; -------------------------------------------------------------------------
+
+;; This matches cases where a stack pointer increment at the start of the
+;; epilogue combines with a stack slot read loading the return value.
+
+(define_peephole
+ [(set (match_operand:SI 0 "arith_reg_operand" "")
+ (mem:SI (match_operand:SI 1 "arith_reg_operand" "")))
+ (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+ "REGNO (operands[1]) != REGNO (operands[0])"
+ "mov.l @%1+,%0")
+
+;; See the comment on the dt combiner pattern above.
+
+(define_peephole
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (plus:SI (match_dup 0)
+ (const_int -1)))
+ (set (reg:SI 18)
+ (eq:SI (match_dup 0)
+ (const_int 0)))]
+ "TARGET_SH2"
+ "dt %0")
+
+;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn'
+;; to `mov #k,r0; mov.l @(r0,r15),rn'. These sequences are generated by
+;; reload when the constant is too large for a reg+offset address.
+
+;; ??? We would get much better code if this was done in reload. This would
+;; require modifying find_reloads_address to recognize that if the constant
+;; is out-of-range for an immediate add, then we get better code by reloading
+;; the constant into a register than by reloading the sum into a register,
+;; since the former is one instruction shorter if the address does not need
+;; to be offsettable. Unfortunately this does not work, because there is
+;; only one register, r0, that can be used as an index register. This register
+;; is also the function return value register. So, if we try to force reload
+;; to use double-reg addresses, then we end up with some instructions that
+;; need to use r0 twice. The only way to fix this is to change the calling
+;; convention so that r0 is not used to return values.
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (mem:SI (match_dup 0))
+ (match_operand:SI 2 "general_movsrc_operand" ""))]
+ "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+ "mov.l %2,@(%0,%1)")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (match_operand:SI 2 "general_movdst_operand" "")
+ (mem:SI (match_dup 0)))]
+ "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+ "mov.l @(%0,%1),%2")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (mem:HI (match_dup 0))
+ (match_operand:HI 2 "general_movsrc_operand" ""))]
+ "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+ "mov.w %2,@(%0,%1)")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (match_operand:HI 2 "general_movdst_operand" "")
+ (mem:HI (match_dup 0)))]
+ "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+ "mov.w @(%0,%1),%2")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (mem:QI (match_dup 0))
+ (match_operand:QI 2 "general_movsrc_operand" ""))]
+ "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+ "mov.b %2,@(%0,%1)")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (match_operand:QI 2 "general_movdst_operand" "")
+ (mem:QI (match_dup 0)))]
+ "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+ "mov.b @(%0,%1),%2")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (mem:SF (match_dup 0))
+ (match_operand:SF 2 "general_movsrc_operand" ""))]
+ "REGNO (operands[0]) == 0
+ && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) < 16)
+ || (GET_CODE (operands[2]) == SUBREG
+ && REGNO (SUBREG_REG (operands[2])) < 16))
+ && reg_unused_after (operands[0], insn)"
+ "mov.l %2,@(%0,%1)")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (match_operand:SF 2 "general_movdst_operand" "")
+
+ (mem:SF (match_dup 0)))]
+ "REGNO (operands[0]) == 0
+ && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) < 16)
+ || (GET_CODE (operands[2]) == SUBREG
+ && REGNO (SUBREG_REG (operands[2])) < 16))
+ && reg_unused_after (operands[0], insn)"
+ "mov.l @(%0,%1),%2")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (mem:SF (match_dup 0))
+ (match_operand:SF 2 "general_movsrc_operand" ""))]
+ "REGNO (operands[0]) == 0
+ && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) >= FIRST_FP_REG)
+ || (GET_CODE (operands[2]) == SUBREG
+ && REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG))
+ && reg_unused_after (operands[0], insn)"
+ "fmov{.s|} %2,@(%0,%1)")
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+ (set (match_operand:SF 2 "general_movdst_operand" "")
+
+ (mem:SF (match_dup 0)))]
+ "REGNO (operands[0]) == 0
+ && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) >= FIRST_FP_REG)
+ || (GET_CODE (operands[2]) == SUBREG
+ && REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG))
+ && reg_unused_after (operands[0], insn)"
+ "fmov{.s|} @(%0,%1),%2")
+
+;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF). */
+(define_insn "sp_switch_1"
+ [(const_int 1)]
+ ""
+ "*
+{
+ rtx xoperands[1];
+
+ xoperands[0] = sp_switch;
+ output_asm_insn (\"mov.l r0,@-r15\;mov.l %0,r0\", xoperands);
+ output_asm_insn (\"mov.l @r0,r0\;mov.l r15,@-r0\", xoperands);
+ return \"mov r0,r15\";
+}"
+ [(set_attr "length" "10")])
+
+;; Switch back to the original stack for interrupt functions with the
+;; sp_switch attribute. */
+(define_insn "sp_switch_2"
+ [(const_int 2)]
+ ""
+ "mov.l @r15+,r15\;mov.l @r15+,r0"
+ [(set_attr "length" "4")])
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
new file mode 100755
index 0000000..bfbf45e
--- /dev/null
+++ b/gcc/config/sh/t-sh
@@ -0,0 +1,29 @@
+CROSS_LIBGCC1 = libgcc1-asm.a
+LIB1ASMSRC = sh/lib1funcs.asm
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \
+ _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr
+
+# These are really part of libgcc1, but this will cause them to be
+# built correctly, so...
+
+LIB2FUNCS_EXTRA = fp-bit.c dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+ echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+ echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+ echo '#endif' >> dp-bit.c
+ cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+ echo '#define FLOAT' > fp-bit.c
+ echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+ echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+ echo '#endif' >> fp-bit.c
+ cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+MULTILIB_OPTIONS= ml m2/m3e/m4-single-only/m4-single/m4
+MULTILIB_DIRNAMES=
+MULTILIB_MATCHES = m2=m3
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sh/xm-sh.h b/gcc/config/sh/xm-sh.h
new file mode 100755
index 0000000..f51b787
--- /dev/null
+++ b/gcc/config/sh/xm-sh.h
@@ -0,0 +1,42 @@
+/* Configuration for GNU C-compiler for Hitachi SH.
+ Copyright (C) 1993, 1997 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* #defines that need visibility everywhere. */
+#define FALSE 0
+#define TRUE 1
+
+/* This describes the machine the compiler is hosted on. */
+#define HOST_BITS_PER_CHAR 8
+#define HOST_BITS_PER_SHORT 16
+#define HOST_BITS_PER_INT 32
+#define HOST_BITS_PER_LONG 32
+
+/* If compiled with GNU C, use the built-in alloca. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#endif
+
+/* target machine dependencies.
+ tm.h is a symbolic link to the actual target specific file. */
+#include "tm.h"
+
+/* Arguments to use with `exit'. */
+#define SUCCESS_EXIT_CODE 0
+#define FATAL_EXIT_CODE 33