9 files changed, 13140 insertions, 0 deletions
diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h
new file mode 100755
index 0000000..68cc691
--- /dev/null
+++ b/gcc/config/sh/elf.h
@@ -0,0 +1,123 @@
+/* Definitions of target machine for gcc for Hitachi Super-H using ELF.
+   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <ian@cygnus.com>.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* Mostly like the regular SH configuration.  */
+#include "sh/sh.h"
+
+/* No SDB debugging info.  */
+#undef SDB_DEBUGGING_INFO
+
+/* Undefine some macros defined in both sh.h and svr4.h.  */
+#undef IDENT_ASM_OP
+#undef ASM_FILE_END
+#undef ASM_OUTPUT_SOURCE_LINE
+#undef DBX_OUTPUT_MAIN_SOURCE_FILE_END
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#undef ASM_OUTPUT_SECTION_NAME
+#undef ASM_OUTPUT_CONSTRUCTOR
+#undef ASM_OUTPUT_DESTRUCTOR
+#undef ASM_DECLARE_FUNCTION_NAME
+#undef PREFERRED_DEBUGGING_TYPE
+#undef MAX_OFILE_ALIGNMENT
+
+/* Be ELF-like.  */
+#include "svr4.h"
+
+/* The prefix to add to user-visible assembler symbols.
+   Note that svr4.h redefined it from the original value (that we want)
+   in sh.h */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+#undef ASM_FILE_START
+#define ASM_FILE_START(FILE) do {				\
+  output_file_directive ((FILE), main_input_filename);		\
+  if (TARGET_LITTLE_ENDIAN)					\
+    fprintf ((FILE), "\t.little\n");				\
+} while (0)
+
+
+
+/* Let code know that this is ELF.  */
+#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Acpu(sh) -Amachine(sh)"
+
+/* Pass -ml and -mrelax to the assembler and linker.  */
+#undef ASM_SPEC
+#define ASM_SPEC  "%{ml:-little} %{mrelax:-relax}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{ml:-m shlelf} %{mrelax:-relax}"
+
+/* svr4.h undefined DBX_REGISTER_NUMBER, so we need to define it
+   again.  */
+#define DBX_REGISTER_NUMBER(REGNO)	\
+  (((REGNO) >= 22 && (REGNO) <= 39) ? ((REGNO) + 1) : (REGNO))
+
+/* SH ELF, unlike most ELF implementations, uses underscores before
+   symbol names.  */
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM,NAME) \
+  asm_fprintf (STREAM, "%U%s", NAME)
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+  sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM))
+
+#undef ASM_OUTPUT_INTERNAL_LABEL
+#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
+  asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM))
+
+#undef  ASM_OUTPUT_SOURCE_LINE
+#define ASM_OUTPUT_SOURCE_LINE(file, line)				\
+do									\
+  {									\
+    static int sym_lineno = 1;						\
+    asm_fprintf ((file), ".stabn 68,0,%d,%LLM%d-",			\
+	     (line), sym_lineno);					\
+    assemble_name ((file),						\
+		   XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));\
+    asm_fprintf ((file), "\n%LLM%d:\n", sym_lineno);			\
+    sym_lineno += 1;							\
+  }									\
+while (0)
+
+#undef DBX_OUTPUT_MAIN_SOURCE_FILE_END
+#define DBX_OUTPUT_MAIN_SOURCE_FILE_END(FILE, FILENAME)			\
+do {									\
+  text_section ();							\
+  fprintf ((FILE), "\t.stabs \"\",%d,0,0,Letext\nLetext:\n", N_SO);	\
+} while (0)
+
+/* Arrange to call __main, rather than using crtbegin.o and crtend.o
+   and relying on .init and .fini being executed at appropriate times.  */
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+/* HANDLE_SYSV_PRAGMA (defined by svr4.h) takes precedence over HANDLE_PRAGMA.
+   We want to use the HANDLE_PRAGMA from sh.h.  */
+#undef HANDLE_SYSV_PRAGMA
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm
new file mode 100755
index 0000000..bf9ea9a
--- /dev/null
+++ b/gcc/config/sh/lib1funcs.asm
@@ -0,0 +1,1206 @@
+/* Copyright (C) 1994, 1995, 1997, 1998 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.  */
+
+
+!! libgcc1 routines for the Hitachi SH cpu.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+   ELF local label prefixes by J"orn Rennecke
+   amylaar@cygnus.com  */
+
+#ifdef __ELF__
+#define LOCAL(X) .L_##X
+#else
+#define LOCAL(X) L_##X
+#endif
+
+#ifdef L_ashiftrt
+	.global	___ashiftrt_r4_0
+	.global	___ashiftrt_r4_1
+	.global	___ashiftrt_r4_2
+	.global	___ashiftrt_r4_3
+	.global	___ashiftrt_r4_4
+	.global	___ashiftrt_r4_5
+	.global	___ashiftrt_r4_6
+	.global	___ashiftrt_r4_7
+	.global	___ashiftrt_r4_8
+	.global	___ashiftrt_r4_9
+	.global	___ashiftrt_r4_10
+	.global	___ashiftrt_r4_11
+	.global	___ashiftrt_r4_12
+	.global	___ashiftrt_r4_13
+	.global	___ashiftrt_r4_14
+	.global	___ashiftrt_r4_15
+	.global	___ashiftrt_r4_16
+	.global	___ashiftrt_r4_17
+	.global	___ashiftrt_r4_18
+	.global	___ashiftrt_r4_19
+	.global	___ashiftrt_r4_20
+	.global	___ashiftrt_r4_21
+	.global	___ashiftrt_r4_22
+	.global	___ashiftrt_r4_23
+	.global	___ashiftrt_r4_24
+	.global	___ashiftrt_r4_25
+	.global	___ashiftrt_r4_26
+	.global	___ashiftrt_r4_27
+	.global	___ashiftrt_r4_28
+	.global	___ashiftrt_r4_29
+	.global	___ashiftrt_r4_30
+	.global	___ashiftrt_r4_31
+	.global	___ashiftrt_r4_32
+
+	.align	1
+___ashiftrt_r4_32:
+___ashiftrt_r4_31:
+	rotcl	r4
+	rts
+	subc	r4,r4
+
+___ashiftrt_r4_30:
+	shar	r4
+___ashiftrt_r4_29:
+	shar	r4
+___ashiftrt_r4_28:
+	shar	r4
+___ashiftrt_r4_27:
+	shar	r4
+___ashiftrt_r4_26:
+	shar	r4
+___ashiftrt_r4_25:
+	shar	r4
+___ashiftrt_r4_24:
+	shlr16	r4
+	shlr8	r4
+	rts
+	exts.b	r4,r4
+
+___ashiftrt_r4_23:
+	shar	r4
+___ashiftrt_r4_22:
+	shar	r4
+___ashiftrt_r4_21:
+	shar	r4
+___ashiftrt_r4_20:
+	shar	r4
+___ashiftrt_r4_19:
+	shar	r4
+___ashiftrt_r4_18:
+	shar	r4
+___ashiftrt_r4_17:
+	shar	r4
+___ashiftrt_r4_16:
+	shlr16	r4
+	rts
+	exts.w	r4,r4
+
+___ashiftrt_r4_15:
+	shar	r4
+___ashiftrt_r4_14:
+	shar	r4
+___ashiftrt_r4_13:
+	shar	r4
+___ashiftrt_r4_12:
+	shar	r4
+___ashiftrt_r4_11:
+	shar	r4
+___ashiftrt_r4_10:
+	shar	r4
+___ashiftrt_r4_9:
+	shar	r4
+___ashiftrt_r4_8:
+	shar	r4
+___ashiftrt_r4_7:
+	shar	r4
+___ashiftrt_r4_6:
+	shar	r4
+___ashiftrt_r4_5:
+	shar	r4
+___ashiftrt_r4_4:
+	shar	r4
+___ashiftrt_r4_3:
+	shar	r4
+___ashiftrt_r4_2:
+	shar	r4
+___ashiftrt_r4_1:
+	rts
+	shar	r4
+
+___ashiftrt_r4_0:
+	rts
+	nop
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! ___ashrsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+
+	.global	___ashrsi3
+	.align	2
+___ashrsi3:
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashrsi3_table):
+	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+	rotcl	r0
+	rts
+	subc	r0,r0
+
+LOCAL(ashrsi3_30):
+	shar	r0
+LOCAL(ashrsi3_29):
+	shar	r0
+LOCAL(ashrsi3_28):
+	shar	r0
+LOCAL(ashrsi3_27):
+	shar	r0
+LOCAL(ashrsi3_26):
+	shar	r0
+LOCAL(ashrsi3_25):
+	shar	r0
+LOCAL(ashrsi3_24):
+	shlr16	r0
+	shlr8	r0
+	rts
+	exts.b	r0,r0
+
+LOCAL(ashrsi3_23):
+	shar	r0
+LOCAL(ashrsi3_22):
+	shar	r0
+LOCAL(ashrsi3_21):
+	shar	r0
+LOCAL(ashrsi3_20):
+	shar	r0
+LOCAL(ashrsi3_19):
+	shar	r0
+LOCAL(ashrsi3_18):
+	shar	r0
+LOCAL(ashrsi3_17):
+	shar	r0
+LOCAL(ashrsi3_16):
+	shlr16	r0
+	rts
+	exts.w	r0,r0
+
+LOCAL(ashrsi3_15):
+	shar	r0
+LOCAL(ashrsi3_14):
+	shar	r0
+LOCAL(ashrsi3_13):
+	shar	r0
+LOCAL(ashrsi3_12):
+	shar	r0
+LOCAL(ashrsi3_11):
+	shar	r0
+LOCAL(ashrsi3_10):
+	shar	r0
+LOCAL(ashrsi3_9):
+	shar	r0
+LOCAL(ashrsi3_8):
+	shar	r0
+LOCAL(ashrsi3_7):
+	shar	r0
+LOCAL(ashrsi3_6):
+	shar	r0
+LOCAL(ashrsi3_5):
+	shar	r0
+LOCAL(ashrsi3_4):
+	shar	r0
+LOCAL(ashrsi3_3):
+	shar	r0
+LOCAL(ashrsi3_2):
+	shar	r0
+LOCAL(ashrsi3_1):
+	rts
+	shar	r0
+
+LOCAL(ashrsi3_0):
+	rts
+	nop
+
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! ___ashlsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+	.global	___ashlsi3
+	.align	2
+___ashlsi3:
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashlsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashlsi3_table):
+	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
+
+LOCAL(ashlsi3_6):
+	shll2	r0
+LOCAL(ashlsi3_4):
+	shll2	r0
+LOCAL(ashlsi3_2):
+	rts
+	shll2	r0
+
+LOCAL(ashlsi3_7):
+	shll2	r0
+LOCAL(ashlsi3_5):
+	shll2	r0
+LOCAL(ashlsi3_3):
+	shll2	r0
+LOCAL(ashlsi3_1):
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_14):
+	shll2	r0
+LOCAL(ashlsi3_12):
+	shll2	r0
+LOCAL(ashlsi3_10):
+	shll2	r0
+LOCAL(ashlsi3_8):
+	rts
+	shll8	r0
+
+LOCAL(ashlsi3_15):
+	shll2	r0
+LOCAL(ashlsi3_13):
+	shll2	r0
+LOCAL(ashlsi3_11):
+	shll2	r0
+LOCAL(ashlsi3_9):
+	shll8	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_22):
+	shll2	r0
+LOCAL(ashlsi3_20):
+	shll2	r0
+LOCAL(ashlsi3_18):
+	shll2	r0
+LOCAL(ashlsi3_16):
+	rts
+	shll16	r0
+
+LOCAL(ashlsi3_23):
+	shll2	r0
+LOCAL(ashlsi3_21):
+	shll2	r0
+LOCAL(ashlsi3_19):
+	shll2	r0
+LOCAL(ashlsi3_17):
+	shll16	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_30):
+	shll2	r0
+LOCAL(ashlsi3_28):
+	shll2	r0
+LOCAL(ashlsi3_26):
+	shll2	r0
+LOCAL(ashlsi3_24):
+	shll16	r0
+	rts
+	shll8	r0
+
+LOCAL(ashlsi3_31):
+	shll2	r0
+LOCAL(ashlsi3_29):
+	shll2	r0
+LOCAL(ashlsi3_27):
+	shll2	r0
+LOCAL(ashlsi3_25):
+	shll16	r0
+	shll8	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_0):
+	rts
+	nop
+
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! ___lshrsi3
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+	.global	___lshrsi3
+	.align	2
+___lshrsi3:
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(lshrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(lshrsi3_table):
+	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
+
+LOCAL(lshrsi3_6):
+	shlr2	r0
+LOCAL(lshrsi3_4):
+	shlr2	r0
+LOCAL(lshrsi3_2):
+	rts
+	shlr2	r0
+
+LOCAL(lshrsi3_7):
+	shlr2	r0
+LOCAL(lshrsi3_5):
+	shlr2	r0
+LOCAL(lshrsi3_3):
+	shlr2	r0
+LOCAL(lshrsi3_1):
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_14):
+	shlr2	r0
+LOCAL(lshrsi3_12):
+	shlr2	r0
+LOCAL(lshrsi3_10):
+	shlr2	r0
+LOCAL(lshrsi3_8):
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi3_15):
+	shlr2	r0
+LOCAL(lshrsi3_13):
+	shlr2	r0
+LOCAL(lshrsi3_11):
+	shlr2	r0
+LOCAL(lshrsi3_9):
+	shlr8	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_22):
+	shlr2	r0
+LOCAL(lshrsi3_20):
+	shlr2	r0
+LOCAL(lshrsi3_18):
+	shlr2	r0
+LOCAL(lshrsi3_16):
+	rts
+	shlr16	r0
+
+LOCAL(lshrsi3_23):
+	shlr2	r0
+LOCAL(lshrsi3_21):
+	shlr2	r0
+LOCAL(lshrsi3_19):
+	shlr2	r0
+LOCAL(lshrsi3_17):
+	shlr16	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_30):
+	shlr2	r0
+LOCAL(lshrsi3_28):
+	shlr2	r0
+LOCAL(lshrsi3_26):
+	shlr2	r0
+LOCAL(lshrsi3_24):
+	shlr16	r0
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi3_31):
+	shlr2	r0
+LOCAL(lshrsi3_29):
+	shlr2	r0
+LOCAL(lshrsi3_27):
+	shlr2	r0
+LOCAL(lshrsi3_25):
+	shlr16	r0
+	shlr8	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_0):
+	rts
+	nop
+
+#endif
+
+#ifdef L_movstr
+	.text
+! done all the large groups, do the remainder
+
+! jump to movstr+
+done:
+	add	#64,r5
+	mova	___movstrSI0,r0
+	shll2	r6
+	add	r6,r0
+	jmp	@r0
+	add	#64,r4
+	.align	4
+	.global	___movstrSI64
+___movstrSI64:
+	mov.l	@(60,r5),r0
+	mov.l	r0,@(60,r4)
+	.global	___movstrSI60
+___movstrSI60:
+	mov.l	@(56,r5),r0
+	mov.l	r0,@(56,r4)
+	.global	___movstrSI56
+___movstrSI56:
+	mov.l	@(52,r5),r0
+	mov.l	r0,@(52,r4)
+	.global	___movstrSI52
+___movstrSI52:
+	mov.l	@(48,r5),r0
+	mov.l	r0,@(48,r4)
+	.global	___movstrSI48
+___movstrSI48:
+	mov.l	@(44,r5),r0
+	mov.l	r0,@(44,r4)
+	.global	___movstrSI44
+___movstrSI44:
+	mov.l	@(40,r5),r0
+	mov.l	r0,@(40,r4)
+	.global	___movstrSI40
+___movstrSI40:
+	mov.l	@(36,r5),r0
+	mov.l	r0,@(36,r4)
+	.global	___movstrSI36
+___movstrSI36:
+	mov.l	@(32,r5),r0
+	mov.l	r0,@(32,r4)
+	.global	___movstrSI32
+___movstrSI32:
+	mov.l	@(28,r5),r0
+	mov.l	r0,@(28,r4)
+	.global	___movstrSI28
+___movstrSI28:
+	mov.l	@(24,r5),r0
+	mov.l	r0,@(24,r4)
+	.global	___movstrSI24
+___movstrSI24:
+	mov.l	@(20,r5),r0
+	mov.l	r0,@(20,r4)
+	.global	___movstrSI20
+___movstrSI20:
+	mov.l	@(16,r5),r0
+	mov.l	r0,@(16,r4)
+	.global	___movstrSI16
+___movstrSI16:
+	mov.l	@(12,r5),r0
+	mov.l	r0,@(12,r4)
+	.global	___movstrSI12
+___movstrSI12:
+	mov.l	@(8,r5),r0
+	mov.l	r0,@(8,r4)
+	.global	___movstrSI8
+___movstrSI8:
+	mov.l	@(4,r5),r0
+	mov.l	r0,@(4,r4)
+	.global	___movstrSI4
+___movstrSI4:
+	mov.l	@(0,r5),r0
+	mov.l	r0,@(0,r4)
+___movstrSI0:
+	rts
+	or	r0,r0,r0
+
+	.align	4
+
+	.global	___movstr
+___movstr:
+	mov.l	@(60,r5),r0
+	mov.l	r0,@(60,r4)
+
+	mov.l	@(56,r5),r0
+	mov.l	r0,@(56,r4)
+
+	mov.l	@(52,r5),r0
+	mov.l	r0,@(52,r4)
+
+	mov.l	@(48,r5),r0
+	mov.l	r0,@(48,r4)
+
+	mov.l	@(44,r5),r0
+	mov.l	r0,@(44,r4)
+
+	mov.l	@(40,r5),r0
+	mov.l	r0,@(40,r4)
+
+	mov.l	@(36,r5),r0
+	mov.l	r0,@(36,r4)
+
+	mov.l	@(32,r5),r0
+	mov.l	r0,@(32,r4)
+
+	mov.l	@(28,r5),r0
+	mov.l	r0,@(28,r4)
+
+	mov.l	@(24,r5),r0
+	mov.l	r0,@(24,r4)
+
+	mov.l	@(20,r5),r0
+	mov.l	r0,@(20,r4)
+
+	mov.l	@(16,r5),r0
+	mov.l	r0,@(16,r4)
+
+	mov.l	@(12,r5),r0
+	mov.l	r0,@(12,r4)
+
+	mov.l	@(8,r5),r0
+	mov.l	r0,@(8,r4)
+
+	mov.l	@(4,r5),r0
+	mov.l	r0,@(4,r4)
+
+	mov.l	@(0,r5),r0
+	mov.l	r0,@(0,r4)
+
+	add	#-16,r6
+	cmp/pl	r6
+	bf	done
+
+	add	#64,r5
+	bra	___movstr
+	add	#64,r4
+#endif
+
+#ifdef L_movstr_i4
+#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+	.text
+	.global	___movstr_i4_even
+	.global	___movstr_i4_odd
+	.global	___movstrSI12_i4
+
+	.p2align	5
+L_movstr_2mod4_end:
+	mov.l	r0,@(16,r4)
+	rts
+	mov.l	r1,@(20,r4)
+
+	.p2align	2
+
+___movstr_i4_odd:
+	mov.l	@r5+,r1
+	add	#-4,r4
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r1,@(4,r4)
+	mov.l	r2,@(8,r4)
+
+L_movstr_loop:
+	mov.l	r3,@(12,r4)
+	dt	r6
+	mov.l	@r5+,r0
+	bt/s	L_movstr_2mod4_end
+	mov.l	@r5+,r1
+	add	#16,r4
+L_movstr_start_even:
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r0,@r4
+	dt	r6
+	mov.l	r1,@(4,r4)
+	bf/s	L_movstr_loop
+	mov.l	r2,@(8,r4)
+	rts
+	mov.l	r3,@(12,r4)
+
+___movstr_i4_even:
+	mov.l	@r5+,r0
+	bra	L_movstr_start_even
+	mov.l	@r5+,r1
+
+	.p2align	4
+___movstrSI12_i4:
+	mov.l	@r5,r0
+	mov.l	@(4,r5),r1
+	mov.l	@(8,r5),r2
+	mov.l	r0,@r4
+	mov.l	r1,@(4,r4)
+	rts
+	mov.l	r2,@(8,r4)
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_mulsi3
+
+
+	.global	___mulsi3
+
+! r4 =       aabb
+! r5 =       ccdd
+! r0 = aabb*ccdd  via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+___mulsi3:
+	mulu    r4,r5		! multiply the lsws  macl=bb*dd
+	mov     r5,r3		! r3 = ccdd
+	swap.w  r4,r2		! r2 = bbaa
+	xtrct   r2,r3		! r3 = aacc
+	tst  	r3,r3		! msws zero ?
+	bf      hiset
+	rts			! yes - then we have the answer
+	sts     macl,r0
+
+hiset:	sts	macl,r0		! r0 = bb*dd
+	mulu	r2,r5		| brewing macl = aa*dd
+	sts	macl,r1
+	mulu	r3,r4		| brewing macl = cc*bb
+	sts	macl,r2
+	add	r1,r2
+	shll16	r2
+	rts
+	add	r2,r0
+
+
+#endif
+#ifdef L_sdivsi3_i4
+	.title "SH DIVIDE"
+!! 4 byte integer Divide code for the Hitachi SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber dr0, dr2
+
+	.global	___sdivsi3_i4
+___sdivsi3_i4:
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
+
+	.global	___sdivsi3_i4
+___sdivsi3_i4:
+	sts.l fpscr,@-r15
+	mov #8,r2
+	swap.w r2,r2
+	lds r2,fpscr
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	ftrc dr0,fpul
+	rts
+	lds.l @r15+,fpscr
+
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh3e code.  */
+#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1,r2,r3
+
+	.global	___sdivsi3
+___sdivsi3:
+	mov	r4,r1
+	mov	r5,r0
+
+	tst	r0,r0
+	bt	div0
+	mov	#0,r2
+	div0s	r2,r1
+	subc	r3,r3
+	subc	r2,r1
+	div0s	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	addc	r2,r1
+	rts
+	mov	r1,r0
+
+
+div0:	rts
+	mov	#0,r0
+
+#endif /* ! __SH4__ */
+#endif
+#ifdef L_udivsi3_i4
+
+	.title "SH DIVIDE"
+!! 4 byte integer Divide code for the Hitachi SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+	.global	___udivsi3_i4
+___udivsi3_i4:
+	mov #1,r1
+	cmp/hi r1,r5
+	bf trivial
+	rotr r1
+	xor r1,r4
+	lds r4,fpul
+	mova L1,r0
+#ifdef FMOVD_WORKS
+	fmov.d @r0+,dr4
+#else
+#ifdef __LITTLE_ENDIAN__
+	fmov.s @r0+,fr5
+	fmov.s @r0,fr4
+#else
+	fmov.s @r0+,fr4
+	fmov.s @r0,fr5
+#endif
+#endif
+	float fpul,dr0
+	xor r1,r5
+	lds r5,fpul
+	float fpul,dr2
+	fadd dr4,dr0
+	fadd dr4,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+trivial:
+	rts
+	lds r4,fpul
+
+	.align 2
+L1:
+	.double 2147483648
+
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+	.global	___udivsi3_i4
+___udivsi3_i4:
+	mov #1,r1
+	cmp/hi r1,r5
+	bf trivial
+	sts.l fpscr,@-r15
+	mova L1,r0
+	lds.l @r0+,fpscr
+	rotr r1
+	xor r1,r4
+	lds r4,fpul
+#ifdef FMOVD_WORKS
+	fmov.d @r0+,dr4
+#else
+#ifdef __LITTLE_ENDIAN__
+	fmov.s @r0+,fr5
+	fmov.s @r0,fr4
+#else
+	fmov.s @r0+,fr4
+	fmov.s @r0,fr5
+#endif
+#endif
+	float fpul,dr0
+	xor r1,r5
+	lds r5,fpul
+	float fpul,dr2
+	fadd dr4,dr0
+	fadd dr4,dr2
+	fdiv dr2,dr0
+	ftrc dr0,fpul
+	rts
+	lds.l @r15+,fpscr
+
+trivial:
+	rts
+	lds r4,fpul
+
+	.align 2
+L1:
+#if defined (__LITTLE_ENDIAN__) || ! defined (FMOVD_WORKS)
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+	.double 2147483648
+
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh3e code.  */
+#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+	.global	___udivsi3
+
+___udivsi3:
+longway:
+	mov	#0,r0
+	div0u
+	! get one bit from the msb of the numerator into the T
+	! bit and divide it by whats in r5.  Put the answer bit
+	! into the T bit so it can come out again at the bottom
+
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+shortway:
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+
+vshortway:
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4 ; div1 r5,r0
+	rotcl	r4
+ret:	rts
+	mov	r4,r0
+
+#endif /* __SH4__ */
+#endif
+#ifdef L_set_fpscr
+#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
+	.global ___set_fpscr
+___set_fpscr:
+	lds r4,fpscr
+	mov.l ___set_fpscr_L1,r1
+	swap.w r4,r0
+	or #24,r0
+#ifndef FMOVD_WORKS
+	xor #16,r0
+#endif
+#if defined(__SH4__)
+	swap.w r0,r3
+	mov.l r3,@(4,r1)
+#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r2
+	mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+	xor #8,r0
+#else
+	xor #24,r0
+#endif
+#if defined(__SH4__)
+	swap.w r0,r2
+	rts
+	mov.l r2,@r1
+#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r3
+	rts
+	mov.l r3,@(4,r1)
+#endif
+	.align 2
+___set_fpscr_L1:
+	.long ___fpscr_values
+#ifdef __ELF__
+        .comm   ___fpscr_values,8,4
+#else
+        .comm   ___fpscr_values,8
+#endif /* ELF */
+#endif /* SH3E / SH4 */
+#endif /* L_set_fpscr */
diff --git a/gcc/config/sh/rtems.h b/gcc/config/sh/rtems.h
new file mode 100755
index 0000000..3e3fc7b
--- /dev/null
+++ b/gcc/config/sh/rtems.h
@@ -0,0 +1,35 @@
+/* Definitions for rtems targeting a SH using COFF.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include "sh/sh.h"
+
+/* Specify predefined symbols in preprocessor.  */
+
+#undef CPP_PREDEFINES
+#define CPP_PREDEFINES "-D__sh__ -Drtems -D__rtems__ \
+  -Asystem(rtems) -Acpu(sh) -Amachine(sh)"
+
+/* Generate calls to memcpy, memcmp and memset.  */
+#ifndef TARGET_MEM_FUNCTIONS
+#define TARGET_MEM_FUNCTIONS
+#endif
+
+/* end of sh/rtems.h */
diff --git a/gcc/config/sh/rtemself.h b/gcc/config/sh/rtemself.h
new file mode 100755
index 0000000..8000a3a
--- /dev/null
+++ b/gcc/config/sh/rtemself.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a SH using elf.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include "sh/elf.h"
+
+/* Specify predefined symbols in preprocessor.  */
+
+#undef CPP_PREDEFINES
+#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Drtems -D__rtems__ \
+  -Asystem(rtems) -Acpu(sh) -Amachine(sh)"
+
+/* Generate calls to memcpy, memcmp and memset.  */
+#ifndef TARGET_MEM_FUNCTIONS
+#define TARGET_MEM_FUNCTIONS
+#endif
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
new file mode 100755
index 0000000..4d4b5cd
--- /dev/null
+++ b/gcc/config/sh/sh.c
@@ -0,0 +1,4786 @@
+/* Output routines for GCC for Hitachi Super-H.
+   Copyright (C) 1993-1998 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).  */
+
+#include "config.h"
+
+#include <stdio.h>
+
+#include "rtl.h"
+#include "tree.h"
+#include "flags.h"
+#include "insn-flags.h"
+#include "expr.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "insn-attr.h"
+
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
+#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
+#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+/* ??? The pragma interrupt support will not work for SH3.  */
+/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
+   output code for the next function appropriate for an interrupt handler.  */
+int pragma_interrupt;
+
+/* This is set by the trap_exit attribute for functions.   It specifies
+   a trap number to be used in a trapa instruction at function exit
+   (instead of an rte instruction).  */
+int trap_exit;
+
+/* This is used by the sp_switch attribute for functions.  It specifies
+   a variable holding the address of the stack the interrupt function
+   should switch to/from at entry/exit.  */
+rtx sp_switch;
+
+/* This is set by #pragma trapa, and is similar to the above, except that
+   the compiler doesn't emit code to preserve all registers.  */
+static int pragma_trapa;
+
+/* This is set by #pragma nosave_low_regs.  This is useful on the SH3,
+   which has a separate set of low regs for User and Supervisor modes.
+   This should only be used for the lowest level of interrupts.  Higher levels
+   of interrupts must save the registers in case they themselves are
+   interrupted.  */
+int pragma_nosave_low_regs;
+
+/* This is used for communication between SETUP_INCOMING_VARARGS and
+   sh_expand_prologue.  */
+int current_function_anonymous_args;
+
+/* Global variables from toplev.c and final.c that are used within, but
+   not declared in any header file.  */
+extern char *version_string;
+extern int *insn_addresses;
+
+/* Global variables for machine-dependent things. */
+
+/* Which cpu are we scheduling for.  */
+enum processor_type sh_cpu;
+
+/* Saved operands from the last compare to use when we generate an scc
+   or bcc insn.  */
+
+rtx sh_compare_op0;
+rtx sh_compare_op1;
+
+enum machine_mode sh_addr_diff_vec_mode;
+
+/* Provides the class number of the smallest class containing
+   reg number.  */
+
+int regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
+  MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
+  FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  FPSCR_REGS,
+};
+
+char fp_reg_names[][5] =
+{
+  "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
+  "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
+  "fpul",
+  "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
+};
+
+/* Provide reg_class from a letter such as appears in the machine
+   description.  */
+
+enum reg_class reg_class_from_letter[] =
+{
+  /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
+  /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
+  /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
+  /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
+  /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
+  /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
+  /* y */ FPUL_REGS, /* z */ R0_REGS
+};
+
+int assembler_dialect;
+
+rtx get_fpscr_rtx ();
+void emit_sf_insn ();
+void emit_df_insn ();
+
+static void split_branches PROTO ((rtx));
+
+/* Print the operand address in x to the stream.  */
+
+void
+print_operand_address (stream, x)
+     FILE *stream;
+     rtx x;
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      fprintf (stream, "@%s", reg_names[true_regnum (x)]);
+      break;
+
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx index = XEXP (x, 1);
+
+	switch (GET_CODE (index))
+	  {
+	  case CONST_INT:
+	    fprintf (stream, "@(%d,%s)", INTVAL (index),
+		     reg_names[true_regnum (base)]);
+	    break;
+
+	  case REG:
+	  case SUBREG:
+	    {
+	      int base_num = true_regnum (base);
+	      int index_num = true_regnum (index);
+
+	      fprintf (stream, "@(r0,%s)",
+		       reg_names[MAX (base_num, index_num)]);
+	      break;
+	    }
+
+	  default:
+	    debug_rtx (x);
+	    abort ();
+	  }
+      }
+      break;
+
+    case PRE_DEC:
+      fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    default:
+      output_addr_const (stream, x);
+      break;
+    }
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+   according to modifier code.
+
+   '.'  print a .s if insn needs delay slot
+   ','  print LOCAL_LABEL_PREFIX
+   '@'  print trap, rte or rts depending upon pragma interruptness
+   '#'  output a nop if there is nothing to put in the delay slot
+   'O'  print a constant without the #
+   'R'  print the LSW of a dp value - changes if in little endian
+   'S'  print the MSW of a dp value - changes if in little endian
+   'T'  print the next word of a dp value - same as 'R' in big endian mode.
+   'o'  output an operator.  */
+
+void
+print_operand (stream, x, code)
+     FILE *stream;
+     rtx x;
+     int code;
+{
+  switch (code)
+    {
+    case '.':
+      if (final_sequence
+	  && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+	fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+      break;
+    case ',':
+      fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
+      break;
+    case '@':
+      {
+	int interrupt_handler;
+
+	if ((lookup_attribute
+	     ("interrupt_handler",
+	      DECL_MACHINE_ATTRIBUTES (current_function_decl)))
+	    != NULL_TREE)
+	  interrupt_handler = 1;
+	else
+	  interrupt_handler = 0;
+	
+      if (trap_exit)
+	fprintf (stream, "trapa #%d", trap_exit);
+      else if (interrupt_handler)
+	fprintf (stream, "rte");
+      else
+	fprintf (stream, "rts");
+      break;
+      }
+    case '#':
+      /* Output a nop if there's nothing in the delay slot.  */
+      if (dbr_sequence_length () == 0)
+	fprintf (stream, "\n\tnop");
+      break;
+    case 'O':
+      output_addr_const (stream, x);
+      break;
+    case 'R':
+      fputs (reg_names[REGNO (x) + LSW], (stream));
+      break;
+    case 'S':
+      fputs (reg_names[REGNO (x) + MSW], (stream));
+      break;
+    case 'T':
+      /* Next word of a double.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], (stream));
+	  break;
+	case MEM:
+	  if (GET_CODE (XEXP (x, 0)) != PRE_DEC
+	      && GET_CODE (XEXP (x, 0)) != POST_INC)
+	    x = adj_offsettable_operand (x, 4);
+	  print_operand_address (stream, XEXP (x, 0));
+	  break;
+	}
+      break;
+    case 'o':
+      switch (GET_CODE (x))
+	{
+	case PLUS:  fputs ("add", stream); break;
+	case MINUS: fputs ("sub", stream); break;
+	case MULT:  fputs ("mul", stream); break;
+	case DIV:   fputs ("div", stream); break;
+	}
+      break;
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG
+	      && GET_MODE_SIZE (GET_MODE (x)) > 4)
+	    fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
+	  else
+	    fputs (reg_names[REGNO (x)], (stream));
+	  break;
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  break;
+	default:
+	  fputc ('#', stream);
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+/* Emit code to perform a block move.  Choose the best method.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the size.
+   OPERANDS[3] is the alignment safe to use.  */
+
+int
+expand_block_move (operands)
+     rtx *operands;
+{
+  int align = INTVAL (operands[3]);
+  int constp = (GET_CODE (operands[2]) == CONST_INT);
+  int bytes = (constp ? INTVAL (operands[2]) : 0);
+
+  /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
+     alignment, or if it isn't a multiple of 4 bytes, then fail.  */
+  if (! constp || align < 4 || (bytes % 4 != 0))
+    return 0;
+
+  if (TARGET_HARD_SH4)
+    {
+      if (bytes < 12)
+	return 0;
+      else if (bytes == 12)
+	{
+	  tree entry_name;
+	  rtx func_addr_rtx;
+	  rtx r4 = gen_rtx (REG, SImode, 4);
+	  rtx r5 = gen_rtx (REG, SImode, 5);
+
+	  entry_name = get_identifier ("__movstrSI12_i4");
+
+	  func_addr_rtx
+	    = copy_to_mode_reg (Pmode,
+				gen_rtx_SYMBOL_REF (Pmode,
+						    IDENTIFIER_POINTER (entry_name)));
+	  emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+	  emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+	  return 1;
+	}
+      else if (! TARGET_SMALLCODE)
+	{
+	  tree entry_name;
+	  rtx func_addr_rtx;
+	  int dwords;
+	  rtx r4 = gen_rtx (REG, SImode, 4);
+	  rtx r5 = gen_rtx (REG, SImode, 5);
+	  rtx r6 = gen_rtx (REG, SImode, 6);
+
+	  entry_name = get_identifier (bytes & 4
+				       ? "__movstr_i4_odd"
+				       : "__movstr_i4_even");
+	  func_addr_rtx
+	    = copy_to_mode_reg (Pmode,
+				gen_rtx_SYMBOL_REF (Pmode,
+						    IDENTIFIER_POINTER (entry_name)));
+	  emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+	  emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+
+	  dwords = bytes >> 3;
+	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+	  return 1;
+	}
+      else
+	return 0;
+    }
+  if (bytes < 64)
+    {
+      char entry[30];
+      tree entry_name;
+      rtx func_addr_rtx;
+      rtx r4 = gen_rtx (REG, SImode, 4);
+      rtx r5 = gen_rtx (REG, SImode, 5);
+
+      sprintf (entry, "__movstrSI%d", bytes);
+      entry_name = get_identifier (entry);
+
+      func_addr_rtx
+	= copy_to_mode_reg (Pmode,
+			    gen_rtx (SYMBOL_REF, Pmode,
+				     IDENTIFIER_POINTER (entry_name)));
+      emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+      emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+      emit_insn (gen_block_move_real (func_addr_rtx));
+      return 1;
+    }
+
+  /* This is the same number of bytes as a memcpy call, but to a different
+     less common function name, so this will occasionally use more space.  */
+  if (! TARGET_SMALLCODE)
+    {
+      tree entry_name;
+      rtx func_addr_rtx;
+      int final_switch, while_loop;
+      rtx r4 = gen_rtx (REG, SImode, 4);
+      rtx r5 = gen_rtx (REG, SImode, 5);
+      rtx r6 = gen_rtx (REG, SImode, 6);
+
+      entry_name = get_identifier ("__movstr");
+      func_addr_rtx
+	= copy_to_mode_reg (Pmode,
+			    gen_rtx (SYMBOL_REF, Pmode,
+				     IDENTIFIER_POINTER (entry_name)));
+      emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
+      emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+
+      /* r6 controls the size of the move.  16 is decremented from it
+	 for each 64 bytes moved.  Then the negative bit left over is used
+	 as an index into a list of move instructions.  e.g., a 72 byte move
+	 would be set up with size(r6) = 14, for one iteration through the
+	 big while loop, and a switch of -2 for the last part.  */
+
+      final_switch = 16 - ((bytes / 4) % 16);
+      while_loop = ((bytes / 4) / 16 - 1) * 16;
+      emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
+      emit_insn (gen_block_lump_real (func_addr_rtx));
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Prepare operands for a move define_expand; specifically, one of the
+   operands must be in a register.  */
+
+int
+prepare_move_operands (operands, mode)
+     rtx operands[];
+     enum machine_mode mode;
+{
+  if (! reload_in_progress && ! reload_completed)
+    {
+      /* Copy the source to a register if both operands aren't registers.  */
+      if (! register_operand (operands[0], mode)
+	  && ! register_operand (operands[1], mode))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+
+      /* This case can happen while generating code to move the result
+	 of a library call to the target.  Reject `st r0,@(rX,rY)' because
+	 reload will fail to find a spill register for rX, since r0 is already
+	 being used for the source.  */
+      else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
+	       && GET_CODE (operands[0]) == MEM
+	       && GET_CODE (XEXP (operands[0], 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+    }
+
+  return 0;
+}
+
+/* Prepare the operands for an scc instruction; make sure that the
+   compare has been done.  */
+rtx
+prepare_scc_operands (code)
+     enum rtx_code code;
+{
+  rtx t_reg = gen_rtx (REG, SImode, T_REG);
+  enum rtx_code oldcode = code;
+  enum machine_mode mode;
+
+  /* First need a compare insn.  */
+  switch (code)
+    {
+    case NE:
+      /* It isn't possible to handle this case.  */
+      abort ();
+    case LT:
+      code = GT;
+      break;
+    case LE:
+      code = GE;
+      break;
+    case LTU:
+      code = GTU;
+      break;
+    case LEU:
+      code = GEU;
+      break;
+    }
+  if (code != oldcode)
+    {
+      rtx tmp = sh_compare_op0;
+      sh_compare_op0 = sh_compare_op1;
+      sh_compare_op1 = tmp;
+    }
+
+  mode = GET_MODE (sh_compare_op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (sh_compare_op1);
+
+  sh_compare_op0 = force_reg (mode, sh_compare_op0);
+  if ((code != EQ && code != NE
+       && (sh_compare_op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    sh_compare_op1 = force_reg (mode, sh_compare_op1);
+
+  if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    (mode == SFmode ? emit_sf_insn : emit_df_insn)
+     (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
+		gen_rtx (SET, VOIDmode, t_reg,
+			 gen_rtx (code, SImode,
+				  sh_compare_op0, sh_compare_op1)),
+		gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
+  else
+    emit_insn (gen_rtx (SET, VOIDmode, t_reg,
+			gen_rtx (code, SImode, sh_compare_op0,
+				 sh_compare_op1)));
+
+  return t_reg;
+}
+
+/* Called from the md file, set up the operands of a compare instruction.  */
+
+void
+from_compare (operands, code)
+     rtx *operands;
+     int code;
+{
+  enum machine_mode mode = GET_MODE (sh_compare_op0);
+  rtx insn;
+  if (mode == VOIDmode)
+    mode = GET_MODE (sh_compare_op1);
+  if (code != EQ
+      || mode == DImode
+      || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    {
+      /* Force args into regs, since we can't use constants here.  */
+      sh_compare_op0 = force_reg (mode, sh_compare_op0);
+      if (sh_compare_op1 != const0_rtx
+	  || code == GTU  || code == GEU
+	  || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+	sh_compare_op1 = force_reg (mode, sh_compare_op1);
+    }
+  if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
+    {
+      from_compare (operands, GT);
+      insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
+    }
+  else
+    insn = gen_rtx (SET, VOIDmode,
+		    gen_rtx (REG, SImode, 18),
+		    gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
+  if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      insn = gen_rtx (PARALLEL, VOIDmode,
+		      gen_rtvec (2, insn,
+				 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
+      (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
+    }
+  else
+    emit_insn (insn);
+}
+
+/* Functions to output assembly code.  */
+
+/* Return a sequence of instructions to perform DI or DF move.
+
+   Since the SH cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+
+char *
+output_movedouble (insn, operands, mode)
+     rtx insn;
+     rtx operands[];
+     enum machine_mode mode;
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (GET_CODE (dst) == MEM
+      && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+    return "mov.l	%T1,%0\n\tmov.l	%1,%0";
+
+  if (register_operand (dst, mode)
+      && register_operand (src, mode))
+    {
+      if (REGNO (src) == MACH_REG)
+	return "sts	mach,%S0\n\tsts	macl,%R0";
+
+      /* When mov.d r1,r2 do r2->r3 then r1->r2;
+         when mov.d r1,r0 do r1->r0 then r2->r1.  */
+
+      if (REGNO (src) + 1 == REGNO (dst))
+	return "mov	%T1,%T0\n\tmov	%1,%0";
+      else
+	return "mov	%1,%0\n\tmov	%T1,%T0";
+    }
+  else if (GET_CODE (src) == CONST_INT)
+    {
+      if (INTVAL (src) < 0)
+	output_asm_insn ("mov	#-1,%S0", operands);
+      else
+	output_asm_insn ("mov	#0,%S0", operands);
+
+      return "mov	%1,%R0";
+    }
+  else if (GET_CODE (src) == MEM)
+    {
+      int ptrreg = -1;
+      int dreg = REGNO (dst);
+      rtx inside = XEXP (src, 0);
+
+      if (GET_CODE (inside) == REG)
+	ptrreg = REGNO (inside);
+      else if (GET_CODE (inside) == SUBREG)
+	ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside);
+      else if (GET_CODE (inside) == PLUS)
+	{
+	  ptrreg = REGNO (XEXP (inside, 0));
+	  /* ??? A r0+REG address shouldn't be possible here, because it isn't
+	     an offsettable address.  Unfortunately, offsettable addresses use
+	     QImode to check the offset, and a QImode offsettable address
+	     requires r0 for the other operand, which is not currently
+	     supported, so we can't use the 'o' constraint.
+	     Thus we must check for and handle r0+REG addresses here.
+	     We punt for now, since this is likely very rare.  */
+	  if (GET_CODE (XEXP (inside, 1)) == REG)
+	    abort ();
+	}
+      else if (GET_CODE (inside) == LABEL_REF)
+	return "mov.l	%1,%0\n\tmov.l	%1+4,%T0";
+      else if (GET_CODE (inside) == POST_INC)
+	return "mov.l	%1,%0\n\tmov.l	%1,%T0";
+      else
+	abort ();
+
+      /* Work out the safe way to copy.  Copy into the second half first.  */
+      if (dreg == ptrreg)
+	return "mov.l	%T1,%T0\n\tmov.l	%1,%0";
+    }
+
+  return "mov.l	%1,%0\n\tmov.l	%T1,%T0";
+}
+
+/* Print an instruction which would have gone into a delay slot after
+   another instruction, but couldn't because the other instruction expanded
+   into a sequence where putting the slot insn at the end wouldn't work.  */
+
+static void
+print_slot (insn)
+     rtx insn;
+{
+  final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
+
+  INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
+}
+
+char *
+output_far_jump (insn, op)
+     rtx insn;
+     rtx op;
+{
+  struct { rtx lab, reg, op; } this;
+  char *jump;
+  int far;
+  int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)];
+
+  this.lab = gen_label_rtx ();
+
+  if (TARGET_SH2
+      && offset >= -32764
+      && offset - get_attr_length (insn) <= 32766)
+    {
+      far = 0;
+      jump = "mov.w	%O0,%1;braf	%1";
+    }
+  else
+    {
+      far = 1;
+      jump = "mov.l	%O0,%1;jmp	@%1";
+    }
+  /* If we have a scratch register available, use it.  */
+  if (GET_CODE (PREV_INSN (insn)) == INSN
+      && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
+    {
+      this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
+      output_asm_insn (jump, &this.lab);
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+      else
+	output_asm_insn ("nop", 0);
+    }
+  else
+    {
+      /* Output the delay slot insn first if any.  */
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+
+      this.reg = gen_rtx (REG, SImode, 13);
+      output_asm_insn ("mov.l	r13,@-r15", 0);
+      output_asm_insn (jump, &this.lab);
+      output_asm_insn ("mov.l	@r15+,r13", 0);
+    }
+  if (far)
+    output_asm_insn (".align	2", 0);
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
+  this.op = op;
+  output_asm_insn (far ? ".long	%O2" : ".word %O2-%O0", &this.lab);
+  return "";
+}
+
+/* Local label counter, used for constants in the pool and inside
+   pattern branches.  */
+
+static int lf = 100;
+
+/* Output code for ordinary branches.  */
+
+char *
+output_branch (logic, insn, operands)
+     int logic;
+     rtx insn;
+     rtx *operands;
+{
+  switch (get_attr_length (insn))
+    {
+    case 6:
+      /* This can happen if filling the delay slot has caused a forward
+	 branch to exceed its range (we could reverse it, but only
+	 when we know we won't overextend other branches; this should
+	 best be handled by relaxation).
+	 It can also happen when other condbranches hoist delay slot insn
+	 from their destination, thus leading to code size increase.
+	 But the branch will still be in the range -4092..+4098 bytes.  */
+
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+	  /* The call to print_slot will clobber the operands.  */
+	  rtx op0 = operands[0];
+    
+	  /* If the instruction in the delay slot is annulled (true), then
+	     there is no delay slot where we can put it now.  The only safe
+	     place for it is after the label.  final will do that by default.  */
+    
+	  if (final_sequence
+	      && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+	    {
+	      asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+	                   ASSEMBLER_DIALECT ? "/" : ".", label);
+	      print_slot (final_sequence);
+	    }
+	  else
+	    asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
+    
+	  output_asm_insn ("bra\t%l0", &op0);
+	  fprintf (asm_out_file, "\tnop\n");
+	  ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
+    
+	  return "";
+	}
+      /* When relaxing, handle this like a short branch.  The linker
+	 will fix it up if it still doesn't fit after relaxation.  */
+    case 2:
+      return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+    default:
+      abort ();
+    }
+}
+
+char *
+output_branchy_insn (code, template, insn, operands)
+     char *template;
+     enum rtx_code code;
+     rtx insn;
+     rtx *operands;
+{
+  rtx next_insn = NEXT_INSN (insn);
+  int label_nr;
+
+  if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
+    {
+      rtx src = SET_SRC (PATTERN (next_insn));
+      if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+	{
+	  /* Following branch not taken */
+	  operands[9] = gen_label_rtx ();
+	  emit_label_after (operands[9], next_insn);
+	  return template;
+	}
+      else
+	{
+	  int offset = (branch_dest (next_insn)
+			- insn_addresses[INSN_UID (next_insn)] + 4);
+	  if (offset >= -252 && offset <= 258)
+	    {
+	      if (GET_CODE (src) == IF_THEN_ELSE)
+		/* branch_true */
+		src = XEXP (src, 1);
+	      operands[9] = src;
+	      return template;
+	    }
+	}
+    }
+  operands[9] = gen_label_rtx ();
+  emit_label_after (operands[9], insn);
+  return template;
+}
+
+char *
+output_ieee_ccmpeq (insn, operands)
+     rtx insn, operands;
+{
+  output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
+}
+
+/* Output to FILE the start of the assembler file.  */
+
+void
+output_file_start (file)
+     FILE *file;
+{
+  register int pos;
+
+  output_file_directive (file, main_input_filename);
+
+  /* Switch to the data section so that the coffsem symbol and the
+     gcc2_compiled. symbol aren't in the text section.  */
+  data_section ();
+
+  if (TARGET_LITTLE_ENDIAN)
+    fprintf (file, "\t.little\n");
+}
+
+/* Actual number of instructions used to make a shift by N.  */
+static char ashiftrt_insns[] =
+  { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
+
+/* Left shift and logical right shift are the same.  */
+static char shift_insns[]    =
+  { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+/* Individual shift amounts needed to get the above length sequences.
+   One bit right shifts clobber the T bit, so when possible, put one bit
+   shifts in the middle of the sequence, so the ends are eligible for
+   branch delay slots.  */
+static short shift_amounts[32][5] = {
+  {0}, {1}, {2}, {2, 1},
+  {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
+  {8}, {8, 1}, {8, 2}, {8, 1, 2},
+  {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
+  {16}, {16, 1}, {16, 2}, {16, 1, 2},
+  {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+  {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+  {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Likewise, but for shift amounts < 16, up to three highmost bits
+   might be clobbered.  This is typically used when combined with some
+   kind of sign or zero extension.  */
+   
+static char ext_shift_insns[]    =
+  { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+static short ext_shift_amounts[32][4] = {
+  {0}, {1}, {2}, {2, 1},
+  {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
+  {8}, {8, 1}, {8, 2}, {8, 1, 2},
+  {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
+  {16}, {16, 1}, {16, 2}, {16, 1, 2},
+  {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+  {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+  {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Assuming we have a value that has been sign-extended by at least one bit,
+   can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
+   to shift it by N without data loss, and quicker than by other means?  */
+#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
+
+/* This is used in length attributes in sh.md to help compute the length
+   of arbitrary constant shift instructions.  */
+
+int
+shift_insns_rtx (insn)
+     rtx insn;
+{
+  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  int shift_count = INTVAL (XEXP (set_src, 1));
+  enum rtx_code shift_code = GET_CODE (set_src);
+
+  switch (shift_code)
+    {
+    case ASHIFTRT:
+      return ashiftrt_insns[shift_count];
+    case LSHIFTRT:
+    case ASHIFT:
+      return shift_insns[shift_count];
+    default:
+      abort();
+    }
+}
+
+/* Return the cost of a shift.  */
+
+int
+shiftcosts (x)
+     rtx x;
+{
+  int value = INTVAL (XEXP (x, 1));
+
+  /* If shift by a non constant, then this will be expensive.  */
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return SH_DYNAMIC_SHIFT_COST;
+
+  /* Otherwise, return the true cost in instructions.  */
+  if (GET_CODE (x) == ASHIFTRT)
+    {
+      int cost = ashiftrt_insns[value];
+      /* If SH3, then we put the constant in a reg and use shad.  */
+      if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+	cost = 1 + SH_DYNAMIC_SHIFT_COST;
+      return cost;
+    }
+  else
+    return shift_insns[value];
+}
+
+/* Return the cost of an AND operation.  */
+
+int
+andcosts (x)
+     rtx x;
+{
+  int i;
+
+  /* Anding with a register is a single cycle and instruction.  */
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return 1;
+
+  i = INTVAL (XEXP (x, 1));
+  /* These constants are single cycle extu.[bw] instructions.  */
+  if (i == 0xff || i == 0xffff)
+    return 1;
+  /* Constants that can be used in an and immediate instruction is a single
+     cycle, but this requires r0, so make it a little more expensive.  */
+  if (CONST_OK_FOR_L (i))
+    return 2;
+  /* Constants that can be loaded with a mov immediate and an and.
+     This case is probably unnecessary.  */
+  if (CONST_OK_FOR_I (i))
+    return 2;
+  /* Any other constants requires a 2 cycle pc-relative load plus an and.
+     This case is probably unnecessary.  */
+  return 3;
+}
+
+/* Return the cost of a multiply.  */
+int
+multcosts (x)
+     rtx x;
+{
+  if (TARGET_SH2)
+    {
+      /* We have a mul insn, so we can never take more than the mul and the
+	 read of the mac reg, but count more because of the latency and extra
+	 reg usage.  */
+      if (TARGET_SMALLCODE)
+	return 2;
+      return 3;
+    }
+
+  /* If we're aiming at small code, then just count the number of
+     insns in a multiply call sequence.  */
+  if (TARGET_SMALLCODE)
+    return 5;
+
+  /* Otherwise count all the insns in the routine we'd be calling too.  */
+  return 20;
+}
+
+/* Code to expand a shift.  */
+
+void
+gen_ashift (type, n, reg)
+     int type;
+     int n;
+     rtx reg;
+{
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  switch (type)
+    {
+    case ASHIFTRT:
+      emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
+      break;
+    case LSHIFTRT:
+      if (n == 1)
+	emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
+      else
+	emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
+      break;
+    case ASHIFT:
+      emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n)));
+      break;
+    }
+}
+
+/* Same for HImode */
+
+void
+gen_ashift_hi (type, n, reg)
+     int type;
+     int n;
+     rtx reg;
+{
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  switch (type)
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* We don't have HImode right shift operations because using the
+	 ordinary 32 bit shift instructions for that doesn't generate proper
+	 zero/sign extension.
+	 gen_ashift_hi is only called in contexts where we know that the
+	 sign extension works out correctly.  */
+      {
+	int word = 0;
+	if (GET_CODE (reg) == SUBREG)
+	  {
+	    word = SUBREG_WORD (reg);
+	    reg = SUBREG_REG (reg);
+	  }
+	gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
+	break;
+      }
+    case ASHIFT:
+      emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
+      break;
+    }
+}
+
+/* Output RTL to split a constant shift into its component SH constant
+   shift instructions.  */
+   
+int
+gen_shifty_op (code, operands)
+     int code;
+     rtx *operands;
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+
+  /* Truncate the shift count in case it is out of bounds.  */
+  value = value & 0x1f;
+ 
+  if (value == 31)
+    {
+      if (code == LSHIFTRT)
+	{
+	  emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+	  emit_insn (gen_movt (operands[0]));
+	  return;
+	}
+      else if (code == ASHIFT)
+	{
+	  /* There is a two instruction sequence for 31 bit left shifts,
+	     but it requires r0.  */
+	  if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
+	    {
+	      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+	      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+	      return;
+	    }
+	}
+    }
+  else if (value == 0)
+    {
+      /* This can happen when not optimizing.  We must output something here
+	 to prevent the compiler from aborting in final.c after the try_split
+	 call.  */
+      emit_insn (gen_nop ());
+      return;
+    }
+
+  max = shift_insns[value];
+  for (i = 0; i < max; i++)
+    gen_ashift (code, shift_amounts[value][i], operands[0]);
+}
+   
+/* Same as above, but optimized for values where the topmost bits don't
+   matter.  */
+
+int
+gen_shifty_hi_op (code, operands)
+     int code;
+     rtx *operands;
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+  void (*gen_fun)();
+
+  /* This operation is used by and_shl for SImode values with a few
+     high bits known to be cleared.  */
+  value &= 31;
+  if (value == 0)
+    {
+      emit_insn (gen_nop ());
+      return;
+    }
+
+  gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
+  if (code == ASHIFT)
+    {
+      max = ext_shift_insns[value];
+      for (i = 0; i < max; i++)
+	gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+    }
+  else
+    /* When shifting right, emit the shifts in reverse order, so that
+       solitary negative values come first.  */
+    for (i = ext_shift_insns[value] - 1; i >= 0; i--)
+      gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+}
+
+/* Output RTL for an arithmetic right shift.  */
+
+/* ??? Rewrite to use super-optimizer sequences.  */
+
+int
+expand_ashiftrt (operands)
+     rtx *operands;
+{
+  rtx wrk;
+  char func[18];
+  tree func_name;
+  int value;
+
+  if (TARGET_SH3)
+    {
+      if (GET_CODE (operands[2]) != CONST_INT)
+	{
+	  rtx count = copy_to_mode_reg (SImode, operands[2]);
+	  emit_insn (gen_negsi2 (count, count));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return 1;
+	}
+      else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+	       > 1 + SH_DYNAMIC_SHIFT_COST)
+	{
+	  rtx count
+	    = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return 1;
+	}
+    }
+  if (GET_CODE (operands[2]) != CONST_INT)
+    return 0;
+
+  value = INTVAL (operands[2]) & 31;
+
+  if (value == 31)
+    {
+      emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
+      return 1;
+    }
+  else if (value >= 16 && value <= 19)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
+      value -= 16;
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return 1;
+    }
+  /* Expand a short sequence inline, longer call a magic routine.  */
+  else if (value <= 5)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_move_insn (wrk, operands[1]);
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return 1;
+    }
+
+  wrk = gen_reg_rtx (Pmode);
+
+  /* Load the value into an arg reg and call a helper.  */
+  emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
+  sprintf (func, "__ashiftrt_r4_%d", value);
+  func_name = get_identifier (func);
+  emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode,
+				IDENTIFIER_POINTER (func_name)));
+  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+  emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
+  return 1;
+}
+
+int sh_dynamicalize_shift_p (count)
+     rtx count;
+{
+  return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                           (match_operand:SI 2 "const_int_operand" "n"))
+                (match_operand:SI 3 "const_int_operand" "n"))) .
+  LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
+  return 0 for simple right / left or left/right shift combination.
+  return 1 for a combination of shifts with zero_extend.
+  return 2 for a combination of shifts with an AND that needs r0.
+  return 3 for a combination of shifts with an AND that needs an extra
+    scratch register, when the three highmost bits of the AND mask are clear.
+  return 4 for a combination of shifts with an AND that needs an extra
+    scratch register, when any of the three highmost bits of the AND mask
+    is set.
+  If ATTRP is set, store an initial right shift width in ATTRP[0],
+  and the instruction length in ATTRP[1] .  These values are not valid
+  when returning 0.
+  When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
+  shift_amounts for the last shift value that is to be used before the
+  sign extend.  */
+int
+shl_and_kind (left_rtx, mask_rtx, attrp)
+     rtx left_rtx, mask_rtx;
+     int *attrp;
+{
+  unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
+  int left = INTVAL (left_rtx), right;
+  int best = 0;
+  int cost, best_cost = 10000;
+  int best_right = 0, best_len = 0;
+  int i;
+  int can_ext;
+
+  if (left < 0 || left > 31)
+    return 0;
+  if (GET_CODE (mask_rtx) == CONST_INT)
+    mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
+  else
+    mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
+  /* Can this be expressed as a right shift / left shift pair ? */
+  lsb = ((mask ^ (mask - 1)) >> 1) + 1;
+  right = exact_log2 (lsb);
+  mask2 = ~(mask + lsb - 1);
+  lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
+  /* mask has no zeroes but trailing zeroes <==> ! mask2 */
+  if (! mask2)
+    best_cost = shift_insns[right] + shift_insns[right + left];
+  /* mask has no trailing zeroes <==> ! right */
+  else if (! right && mask2 == ~(lsb2 - 1))
+    {
+      int late_right = exact_log2 (lsb2);
+      best_cost = shift_insns[left + late_right] + shift_insns[late_right];
+    }
+  /* Try to use zero extend */
+  if (mask2 == ~(lsb2 - 1))
+    {
+      int width, first;
+
+      for (width = 8; width <= 16; width += 8)
+	{
+	  /* Can we zero-extend right away? */
+	  if (lsb2 == (HOST_WIDE_INT)1 << width)
+	    {
+	      cost
+		= 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = -1;
+		}
+	      continue;
+	    }
+	  /* ??? Could try to put zero extend into initial right shift,
+	     or even shift a bit left before the right shift. */
+	  /* Determine value of first part of left shift, to get to the
+	     zero extend cut-off point.  */
+	  first = width - exact_log2 (lsb2) + right;
+	  if (first >= 0 && right + left - first >= 0)
+	    {
+	      cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
+		+ ext_shift_insns[right + left - first];
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = first;
+		  }
+	    }
+	}
+    }
+  /* Try to use r0 AND pattern */
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      if (! CONST_OK_FOR_L (mask >> i))
+	continue;
+      cost = (i != 0) + 2 + ext_shift_insns[left + i];
+      if (cost < best_cost)
+	{
+	  best = 2;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1;
+	}
+    }
+  /* Try to use a scratch register to hold the AND operand.  */
+  can_ext = ((mask << left) & 0xe0000000) == 0;
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
+	+ (can_ext ? ext_shift_insns : shift_insns)[left + i];
+      if (cost < best_cost)
+	{
+	  best = 4 - can_ext;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
+	}
+    }
+
+  if (attrp)
+    {
+      attrp[0] = best_right;
+      attrp[1] = best_len;
+    }
+  return best;
+}
+
+/* This is used in length attributes of the unnamed instructions
+   corresponding to shl_and_kind return values of 1 and 2.  */
+int
+shl_and_length (insn)
+     rtx insn;
+{
+  rtx set_src, left_rtx, mask_rtx;
+  int attributes[3];
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  mask_rtx = XEXP (set_src, 1);
+  shl_and_kind (left_rtx, mask_rtx, attributes);
+  return attributes[1];
+}
+
+/* This is used in length attribute of the and_shl_scratch instruction.  */
+
+int
+shl_and_scr_length (insn)
+     rtx insn;
+{
+  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  int len = shift_insns[INTVAL (XEXP (set_src, 1))];
+  rtx op = XEXP (set_src, 0);
+  len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
+  op = XEXP (XEXP (op, 0), 0);
+  return len + shift_insns[INTVAL (XEXP (op, 1))];
+}
+
+/* Generating rtl? */
+extern int rtx_equal_function_value_matters;
+
+/* Generate rtl for instructions for which shl_and_kind advised a particular
+   method of generating them, i.e. returned zero.  */
+
+int
+gen_shl_and (dest, left_rtx, mask_rtx, source)
+     rtx dest, left_rtx, mask_rtx, source;
+{
+  int attributes[3];
+  unsigned HOST_WIDE_INT mask;
+  int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
+  int right, total_shift;
+  int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op;
+
+  right = attributes[0];
+  total_shift = INTVAL (left_rtx) + right;
+  mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
+  switch (kind)
+    {
+    default:
+      return -1;
+    case 1:
+      {
+	int first = attributes[2];
+	rtx operands[3];
+
+	if (first < 0)
+	  {
+	    emit_insn ((mask << right) <= 0xff
+		       ? gen_zero_extendqisi2(dest,
+					      gen_lowpart (QImode, source))
+		       : gen_zero_extendhisi2(dest,
+					      gen_lowpart (HImode, source)));
+	    source = dest;
+	  }
+	if (source != dest)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (right)
+	  {
+	    operands[2] = GEN_INT (right);
+	    gen_shifty_hi_op (LSHIFTRT, operands);
+	  }
+	if (first > 0)
+	  {
+	    operands[2] = GEN_INT (first);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    total_shift -= first;
+	    mask <<= first;
+	  }
+	if (first >= 0)
+	  emit_insn (mask <= 0xff
+		     ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
+		     : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
+	if (total_shift > 0)
+	  {
+	    operands[2] = GEN_INT (total_shift);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	break;
+      }
+    case 4:
+      shift_gen_fun = gen_shifty_op;
+    case 3:
+      /* If the topmost bit that matters is set, set the topmost bits
+	 that don't matter.  This way, we might be able to get a shorter
+	 signed constant.  */
+      if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift))
+	mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
+    case 2:
+      /* Don't expand fine-grained when combining, because that will
+         make the pattern fail.  */
+      if (rtx_equal_function_value_matters
+	  || reload_in_progress || reload_completed)
+	{
+	  rtx operands[3];
+  
+	  /* Cases 3 and 4 should be handled by this split
+	     only while combining  */
+	  if (kind > 2)
+	    abort ();
+	  if (right)
+	    {
+	      emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
+	      source = dest;
+	    }
+	  emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
+	  if (total_shift)
+	    {
+	      operands[0] = dest;
+	      operands[1] = dest;
+	      operands[2] = GEN_INT (total_shift);
+	      shift_gen_fun (ASHIFT, operands);
+	    }
+	  break;
+	}
+      else
+	{
+	  int neg = 0;
+	  if (kind != 4 && total_shift < 16)
+	    {
+	      neg = -ext_shift_amounts[total_shift][1];
+	      if (neg > 0)
+		neg -= ext_shift_amounts[total_shift][2];
+	      else
+		neg = 0;
+	    }
+	  emit_insn (gen_and_shl_scratch (dest, source,
+					  GEN_INT (right),
+					  GEN_INT (mask),
+					  GEN_INT (total_shift + neg),
+					  GEN_INT (neg)));
+	  emit_insn (gen_movsi (dest, dest));
+	  break;
+	}
+    }
+  return 0;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                                    (match_operand:SI 2 "const_int_operand" "n")
+                         (match_operand:SI 3 "const_int_operand" "n")
+                         (const_int 0)))
+   (clobber (reg:SI 18))]
+  LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
+  return 0 for simple left / right shift combination.
+  return 1 for left shift / 8 bit sign extend / left shift.
+  return 2 for left shift / 16 bit sign extend / left shift.
+  return 3 for left shift / 8 bit sign extend / shift / sign extend.
+  return 4 for left shift / 16 bit sign extend / shift / sign extend.
+  return 5 for left shift / 16 bit sign extend / right shift
+  return 6 for < 8 bit sign extend / left shift.
+  return 7 for < 8 bit sign extend / left shift / single right shift.
+  If COSTP is nonzero, assign the calculated cost to *COSTP.  */
+
+int
+shl_sext_kind (left_rtx, size_rtx, costp)
+     rtx left_rtx, size_rtx;
+     int *costp;
+{
+  int left, size, insize, ext;
+  int cost, best_cost;
+  int kind;
+
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  if (insize <= 0)
+    abort ();
+  /* Default to left / right shift.  */
+  kind = 0;
+  best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
+  if (size <= 16)
+    {
+      /* 16 bit shift / sign extend / 16 bit shift */
+      cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
+      /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
+	 below, by alternative 3 or something even better.  */
+      if (cost < best_cost)
+	{
+	  kind = 5;
+	  best_cost = cost;
+	}
+    }
+  /* Try a plain sign extend between two shifts.  */
+  for (ext = 16; ext >= insize; ext -= 8)
+    {
+      if (ext <= size)
+	{
+	  cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
+	  if (cost < best_cost)
+	    {
+	      kind = ext / 8U;
+	      best_cost = cost;
+	    }
+	}
+      /* Check if we can do a sloppy shift with a final signed shift
+	 restoring the sign.  */
+      if (EXT_SHIFT_SIGNED (size - ext))
+	cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
+      /* If not, maybe it's still cheaper to do the second shift sloppy,
+	 and do a final sign extend?  */
+      else if (size <= 16)
+	cost = ext_shift_insns[ext - insize] + 1
+	  + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
+      else
+	continue;
+      if (cost < best_cost)
+	{
+	  kind = ext / 8U + 2;
+	  best_cost = cost;
+	}
+    }
+  /* Check if we can sign extend in r0 */
+  if (insize < 8)
+    {
+      cost = 3 + shift_insns[left];
+      if (cost < best_cost)
+	{
+	  kind = 6;
+	  best_cost = cost;
+	}
+      /* Try the same with a final signed shift.  */
+      if (left < 31)
+	{
+	  cost = 3 + ext_shift_insns[left + 1] + 1;
+	  if (cost < best_cost)
+	    {
+	      kind = 7;
+	      best_cost = cost;
+	    }
+	}
+    }
+  if (TARGET_SH3)
+    {
+      /* Try to use a dynamic shift.  */
+      cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
+      if (cost < best_cost)
+	{
+	  kind = 0;
+	  best_cost = cost;
+	}
+    }
+  if (costp)
+    *costp = cost;
+  return kind;
+}
+
+/* Function to be used in the length attribute of the instructions
+   implementing this pattern.  */
+
+int
+shl_sext_length (insn)
+     rtx insn;
+{
+  rtx set_src, left_rtx, size_rtx;
+  int cost;
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  size_rtx = XEXP (set_src, 1);
+  shl_sext_kind (left_rtx, size_rtx, &cost);
+  return cost;
+}
+
+/* Generate rtl for this pattern */
+
+int
+gen_shl_sext (dest, left_rtx, size_rtx, source)
+     rtx dest, left_rtx, size_rtx, source;
+{
+  int kind;
+  int left, size, insize, cost;
+  rtx operands[3];
+
+  kind = shl_sext_kind (left_rtx, size_rtx, &cost);
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  switch (kind)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      {
+	int ext = kind & 1 ? 8 : 16;
+	int shift2 = size - ext;
+
+	/* Don't expand fine-grained when combining, because that will
+	   make the pattern fail.  */
+	if (! rtx_equal_function_value_matters
+	    && ! reload_in_progress && ! reload_completed)
+	  {
+	    emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	    emit_insn (gen_movsi (dest, source));
+	    break;
+	  }
+	if (dest != source)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (ext - insize)
+	  {
+	    operands[2] = GEN_INT (ext - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	emit_insn (kind & 1
+		   ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
+		   : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
+	if (kind <= 2)
+	  {
+	    if (shift2)
+	      {
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_op (ASHIFT, operands);
+	      }
+	  }
+	else
+	  {
+	    if (shift2 > 0)
+	      {
+		if (EXT_SHIFT_SIGNED (shift2))
+		  {
+		    operands[2] = GEN_INT (shift2 + 1);
+		    gen_shifty_op (ASHIFT, operands);
+		    operands[2] = GEN_INT (1);
+		    gen_shifty_op (ASHIFTRT, operands);
+		    break;
+		  }
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_hi_op (ASHIFT, operands);
+	      }
+	    else if (shift2)
+	      {
+		operands[2] = GEN_INT (-shift2);
+		gen_shifty_hi_op (LSHIFTRT, operands);
+	      }
+	    emit_insn (size <= 8
+		       ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		       : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	break;
+      }
+    case 5:
+      {
+	int i = 16 - size;
+	if (! rtx_equal_function_value_matters
+	    && ! reload_in_progress && ! reload_completed)
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	else
+	  {
+	    operands[0] = dest;
+	    operands[2] = GEN_INT (16 - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	/* Don't use gen_ashrsi3 because it generates new pseudos.  */
+	while (--i >= 0)
+	  gen_ashift (ASHIFTRT, 1, dest);
+	break;
+      }
+    case 6:
+    case 7:
+      /* Don't expand fine-grained when combining, because that will
+	 make the pattern fail.  */
+      if (! rtx_equal_function_value_matters
+	  && ! reload_in_progress && ! reload_completed)
+	{
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	  emit_insn (gen_movsi (dest, source));
+	  break;
+	}
+      emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
+      emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
+      emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
+      operands[0] = dest;
+      operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
+      gen_shifty_op (ASHIFT, operands);
+      if (kind == 7)
+	emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
+      break;
+    default:
+      return -1;
+    }
+  return 0;
+}
+
+/* The SH cannot load a large constant into a register, constants have to
+   come from a pc relative load.  The reference of a pc relative load
+   instruction must be less than 1k infront of the instruction.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow things
+   down and make things bigger.
+
+   Worst case code looks like:
+
+   mov.l L1,rn
+   bra   L2
+   nop
+   align
+   L1:   .long value
+   L2:
+   ..
+
+   mov.l L3,rn
+   bra   L4
+   nop
+   align
+   L3:   .long value
+   L4:
+   ..
+
+   We fix this by performing a scan before scheduling, which notices which
+   instructions need to have their operands fetched from the constant table
+   and builds the table.
+
+   The algorithm is:
+
+   scan, find an instruction which needs a pcrel move.  Look forward, find the
+   last barrier which is within MAX_COUNT bytes of the requirement.
+   If there isn't one, make one.  Process all the instructions between
+   the find and the barrier.
+
+   In the above example, we can tell that L3 is within 1k of L1, so
+   the first move can be shrunk from the 3 insn+constant sequence into
+   just 1 insn, and the constant moved to L3 to make:
+
+   mov.l        L1,rn
+   ..
+   mov.l        L3,rn
+   bra          L4
+   nop
+   align
+   L3:.long value
+   L4:.long value
+
+   Then the second move becomes the target for the shortening process.  */
+
+typedef struct
+{
+  rtx value;			/* Value in table.  */
+  rtx label;			/* Label of value.  */
+  enum machine_mode mode;	/* Mode of value.  */
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+   the pc relative range is 0...1020 bytes and constants are at least 4
+   bytes long.  */
+
+#define MAX_POOL_SIZE (1020/4)
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+
+/* ??? If we need a constant in HImode which is the truncated value of a
+   constant we need in SImode, we could combine the two entries thus saving
+   two bytes.  Is this common enough to be worth the effort of implementing
+   it?  */
+
+/* ??? This stuff should be done at the same time that we shorten branches.
+   As it is now, we must assume that all branches are the maximum size, and
+   this causes us to almost always output constant pools sooner than
+   necessary.  */
+
+/* Add a constant to the pool and return its label.  */
+
+static rtx
+add_constant (x, mode, last_value)
+     rtx last_value;
+     rtx x;
+     enum machine_mode mode;
+{
+  int i;
+  rtx lab;
+
+  /* First see if we've already got it.  */
+  for (i = 0; i < pool_size; i++)
+    {
+      if (x->code == pool_vector[i].value->code
+	  && mode == pool_vector[i].mode)
+	{
+	  if (x->code == CODE_LABEL)
+	    {
+	      if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
+		continue;
+	    }
+	  if (rtx_equal_p (x, pool_vector[i].value))
+	    {
+	      lab = 0;
+	      if (! last_value
+		  || ! i
+		  || ! rtx_equal_p (last_value, pool_vector[i-1].value))
+		{
+		  lab = pool_vector[i].label;
+		  if (! lab)
+		    pool_vector[i].label = lab = gen_label_rtx ();
+		}
+	      return lab;
+	    }
+	}
+    }
+
+  /* Need a new one.  */
+  pool_vector[pool_size].value = x;
+  if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
+    lab = 0;
+  else
+    lab = gen_label_rtx ();
+  pool_vector[pool_size].mode = mode;
+  pool_vector[pool_size].label = lab;
+  pool_size++;
+  return lab;
+}
+
+/* Output the literal table.  */
+
+static void
+dump_table (scan)
+     rtx scan;
+{
+  int i;
+  int need_align = 1;
+
+  /* Do two passes, first time dump out the HI sized constants.  */
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      if (p->mode == HImode)
+	{
+	  if (need_align)
+	    {
+	      scan = emit_insn_after (gen_align_2 (), scan);
+	      need_align = 0;
+	    }
+	  scan = emit_label_after (p->label, scan);
+	  scan = emit_insn_after (gen_consttable_2 (p->value), scan);
+	}
+    }
+
+  need_align = 1;
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      switch (p->mode)
+	{
+	case HImode:
+	  break;
+	case SImode:
+	case SFmode:
+	  if (need_align)
+	    {
+	      need_align = 0;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  if (p->label)
+	    scan = emit_label_after (p->label, scan);
+	  scan = emit_insn_after (gen_consttable_4 (p->value), scan);
+	  break;
+	case DFmode:
+	case DImode:
+	  if (need_align)
+	    {
+	      need_align = 0;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  if (p->label)
+	    scan = emit_label_after (p->label, scan);
+	  scan = emit_insn_after (gen_consttable_8 (p->value), scan);
+	  break;
+	default:
+	  abort ();
+	  break;
+	}
+    }
+
+  scan = emit_insn_after (gen_consttable_end (), scan);
+  scan = emit_barrier_after (scan);
+  pool_size = 0;
+}
+
+/* Return non-zero if constant would be an ok source for a
+   mov.w instead of a mov.l.  */
+
+static int
+hi_const (src)
+     rtx src;
+{
+  return (GET_CODE (src) == CONST_INT
+	  && INTVAL (src) >= -32768
+	  && INTVAL (src) <= 32767);
+}
+
+/* Non-zero if the insn is a move instruction which needs to be fixed.  */
+
+/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
+   CONST_DOUBLE input value is CONST_OK_FOR_I.  For a SFmode move, we don't
+   need to fix it if the input value is CONST_OK_FOR_I.  */
+
+static int
+broken_move (insn)
+     rtx insn;
+{
+  if (GET_CODE (insn) == INSN)
+    {
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      if (GET_CODE (pat) == SET
+	  /* We can load any 8 bit value if we don't care what the high
+	     order bits end up as.  */
+	  && GET_MODE (SET_DEST (pat)) != QImode
+	  && CONSTANT_P (SET_SRC (pat))
+	  && ! (TARGET_SH3E
+		&& GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+		&& (fp_zero_operand (SET_SRC (pat))
+		    || fp_one_operand (SET_SRC (pat)))
+		&& GET_CODE (SET_DEST (pat)) == REG
+		&& REGNO (SET_DEST (pat)) >= FIRST_FP_REG
+		&& REGNO (SET_DEST (pat)) <= LAST_FP_REG)
+	  && (GET_CODE (SET_SRC (pat)) != CONST_INT
+	      || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
+	return 1;
+    }
+
+  return 0;
+}
+
+static int
+mova_p (insn)
+     rtx insn;
+{
+  return (GET_CODE (insn) == INSN
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
+}
+
+/* Find the last barrier from insn FROM which is close enough to hold the
+   constant pool.  If we can't find one, then create one near the end of
+   the range.  */
+
+static rtx
+find_barrier (num_mova, mova, from)
+     int num_mova;
+     rtx mova, from;
+{
+  int count_si = 0;
+  int count_hi = 0;
+  int found_hi = 0;
+  int found_si = 0;
+  int hi_align = 2;
+  int si_align = 2;
+  int leading_mova = num_mova;
+  rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
+  int si_limit;
+  int hi_limit;
+
+  /* For HImode: range is 510, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 for the jump instruction
+     that we may need to emit before the table, subtract 2 for the instruction
+     that fills the jump delay slot (in very rare cases, reorg will take an
+     instruction from after the constant pool or will leave the delay slot
+     empty).  This gives 510.
+     For SImode: range is 1020, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 in case pc is 2 byte
+     aligned, subtract 2 for the jump instruction that we may need to emit
+     before the table, subtract 2 for the instruction that fills the jump
+     delay slot.  This gives 1018.  */
+
+  /* The branch will always be shortened now that the reference address for
+     forward branches is the successor address, thus we need no longer make
+     adjustments to the [sh]i_limit for -O0.  */
+
+  si_limit = 1018;
+  hi_limit = 510;
+
+  while (from && count_si < si_limit && count_hi < hi_limit)
+    {
+      int inc = get_attr_length (from);
+      int new_align = 1;
+
+      if (GET_CODE (from) == CODE_LABEL)
+	{
+	  if (optimize)
+	    new_align = 1 << label_to_alignment (from);
+	  else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
+	    new_align = 1 << barrier_align (from);
+	  else
+	    new_align = 1;
+	  inc = 0;
+	}
+
+      if (GET_CODE (from) == BARRIER)
+	{
+
+	  found_barrier = from;
+
+	  /* If we are at the end of the function, or in front of an alignment
+	     instruction, we need not insert an extra alignment.  We prefer
+	     this kind of barrier.  */
+	  if (barrier_align (from) > 2)
+	    good_barrier = from;
+	}
+
+      if (broken_move (from))
+	{
+	  rtx pat, src, dst;
+	  enum machine_mode mode;
+
+	  pat = PATTERN (from);
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+	  src = SET_SRC (pat);
+	  dst = SET_DEST (pat);
+	  mode = GET_MODE (dst);
+
+	  /* We must explicitly check the mode, because sometimes the
+	     front end will generate code to load unsigned constants into
+	     HImode targets without properly sign extending them.  */
+	  if (mode == HImode
+	      || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
+	    {
+	      found_hi += 2;
+	      /* We put the short constants before the long constants, so
+		 we must count the length of short constants in the range
+		 for the long constants.  */
+	      /* ??? This isn't optimal, but is easy to do.  */
+	      si_limit -= 2;
+	    }
+	  else
+	    {
+	      while (si_align > 2 && found_si + si_align - 2 > count_si)
+		si_align >>= 1;
+	      if (found_si > count_si)
+		count_si = found_si;
+	      found_si += GET_MODE_SIZE (mode);
+	      if (num_mova)
+		si_limit -= GET_MODE_SIZE (mode);
+	    }
+	}
+
+      if (mova_p (from))
+	{
+	  if (! num_mova++)
+	    {
+	      leading_mova = 0;
+	      mova = from;
+	      barrier_before_mova = good_barrier ? good_barrier : found_barrier;
+	    }
+	  if (found_si > count_si)
+	    count_si = found_si;
+	}
+      else if (GET_CODE (from) == JUMP_INSN
+	       && (GET_CODE (PATTERN (from)) == ADDR_VEC
+		   || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
+	{
+	  if (num_mova)
+	    num_mova--;
+	  if (barrier_align (next_real_insn (from)) == CACHE_LOG)
+	    {
+	      /* We have just passed the barrier in front of the
+		 ADDR_DIFF_VEC, which is stored in found_barrier.  Since
+		 the ADDR_DIFF_VEC is accessed as data, just like our pool
+		 constants, this is a good opportunity to accommodate what
+		 we have gathered so far.
+		 If we waited any longer, we could end up at a barrier in
+		 front of code, which gives worse cache usage for separated
+		 instruction / data caches.  */
+	      good_barrier = found_barrier;
+	      break;
+	    }
+	  else
+	    {
+	      rtx body = PATTERN (from);
+	      inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
+	    }
+	}
+
+      if (found_si)
+	{
+	  if (new_align > si_align)
+	    {
+	      si_limit -= count_si - 1 & new_align - si_align;
+	      si_align = new_align;
+	    }
+	  count_si = count_si + new_align - 1 & -new_align;
+	  count_si += inc;
+	}
+      if (found_hi)
+	{
+	  if (new_align > hi_align)
+	    {
+	      hi_limit -= count_hi - 1 & new_align - hi_align;
+	      hi_align = new_align;
+	    }
+	  count_hi = count_hi + new_align - 1 & -new_align;
+	  count_hi += inc;
+	}
+      from = NEXT_INSN (from);
+    }
+
+  if (num_mova)
+    if (leading_mova)
+      {
+	/* Try as we might, the leading mova is out of range.  Change
+	   it into a load (which will become a pcload) and retry.  */
+	SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+	INSN_CODE (mova) = -1;
+        return find_barrier (0, 0, mova);
+      }
+    else
+      {
+	/* Insert the constant pool table before the mova instruction,
+	   to prevent the mova label reference from going out of range.  */
+	from = mova;
+	good_barrier = found_barrier = barrier_before_mova;
+      }
+
+  if (found_barrier)
+    {
+      if (good_barrier && next_real_insn (found_barrier))
+	found_barrier = good_barrier;
+    }
+  else
+    {
+      /* We didn't find a barrier in time to dump our stuff,
+	 so we'll make one.  */
+      rtx label = gen_label_rtx ();
+
+      /* If we exceeded the range, then we must back up over the last
+	 instruction we looked at.  Otherwise, we just need to undo the
+	 NEXT_INSN at the end of the loop.  */
+      if (count_hi > hi_limit || count_si > si_limit)
+	from = PREV_INSN (PREV_INSN (from));
+      else
+	from = PREV_INSN (from);
+
+      /* Walk back to be just before any jump or label.
+	 Putting it before a label reduces the number of times the branch
+	 around the constant pool table will be hit.  Putting it before
+	 a jump makes it more likely that the bra delay slot will be
+	 filled.  */
+      while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
+	     || GET_CODE (from) == CODE_LABEL)
+	from = PREV_INSN (from);
+
+      from = emit_jump_insn_after (gen_jump (label), from);
+      JUMP_LABEL (from) = label;
+      LABEL_NUSES (label) = 1;
+      found_barrier = emit_barrier_after (from);
+      emit_label_after (label, found_barrier);
+    }
+
+  return found_barrier;
+}
+
+/* If the instruction INSN is implemented by a special function, and we can
+   positively find the register that is used to call the sfunc, and this
+   register is not used anywhere else in this instruction - except as the
+   destination of a set, return this register; else, return 0.  */
+rtx
+sfunc_uses_reg (insn)
+     rtx insn;
+{
+  int i;
+  rtx pattern, part, reg_part, reg;
+
+  if (GET_CODE (insn) != INSN)
+    return 0;
+  pattern = PATTERN (insn);
+  if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
+    return 0;
+
+  for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
+	reg_part = part;
+    }
+  if (! reg_part)
+    return 0;
+  reg = XEXP (reg_part, 0);
+  for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (part == reg_part || GET_CODE (part) == CLOBBER)
+	continue;
+      if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
+				  && GET_CODE (SET_DEST (part)) == REG)
+				 ? SET_SRC (part) : part)))
+	return 0;
+    }
+  return reg;
+}
+
+/* See if the only way in which INSN uses REG is by calling it, or by
+   setting it while calling it.  Set *SET to a SET rtx if the register
+   is set by INSN.  */
+
+static int
+noncall_uses_reg (reg, insn, set)
+     rtx reg;
+     rtx insn;
+     rtx *set;
+{
+  rtx pattern, reg2;
+
+  *set = NULL_RTX;
+
+  reg2 = sfunc_uses_reg (insn);
+  if (reg2 && REGNO (reg2) == REGNO (reg))
+    {
+      pattern = single_set (insn);
+      if (pattern
+	  && GET_CODE (SET_DEST (pattern)) == REG
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	*set = pattern;
+      return 0;
+    }
+  if (GET_CODE (insn) != CALL_INSN)
+    {
+      /* We don't use rtx_equal_p because we don't care if the mode is
+	 different.  */
+      pattern = single_set (insn);
+      if (pattern
+	  && GET_CODE (SET_DEST (pattern)) == REG
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	{
+	  rtx par, part;
+	  int i;
+
+	  *set = pattern;
+	  par = PATTERN (insn);
+	  if (GET_CODE (par) == PARALLEL)
+	    for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+	      {
+		part = XVECEXP (par, 0, i);
+		if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
+		  return 1;
+	      }
+	  return reg_mentioned_p (reg, SET_SRC (pattern));
+	}
+
+      return 1;
+    }
+
+  pattern = PATTERN (insn);
+
+  if (GET_CODE (pattern) == PARALLEL)
+    {
+      int i;
+
+      for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+	if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
+	  return 1;
+      pattern = XVECEXP (pattern, 0, 0);
+    }
+
+  if (GET_CODE (pattern) == SET)
+    {
+      if (reg_mentioned_p (reg, SET_DEST (pattern)))
+	{
+	  /* We don't use rtx_equal_p, because we don't care if the
+             mode is different.  */
+	  if (GET_CODE (SET_DEST (pattern)) != REG
+	      || REGNO (reg) != REGNO (SET_DEST (pattern)))
+	    return 1;
+
+	  *set = pattern;
+	}
+
+      pattern = SET_SRC (pattern);
+    }
+
+  if (GET_CODE (pattern) != CALL
+      || GET_CODE (XEXP (pattern, 0)) != MEM
+      || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
+    return 1;
+
+  return 0;
+}
+
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+   general registers.  Bits 0..15 mean that the respective registers
+   are used as inputs in the instruction.  Bits 16..31 mean that the
+   registers 0..15, respectively, are used as outputs, or are clobbered.
+   IS_DEST should be set to 16 if X is the destination of a SET, else to 0.  */
+int
+regs_used (x, is_dest)
+     rtx x; int is_dest;
+{
+  enum rtx_code code;
+  char *fmt;
+  int i, used = 0;
+
+  if (! x)
+    return used;
+  code = GET_CODE (x);
+  switch (code)
+    {
+    case REG:
+      if (REGNO (x) < 16)
+	return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		<< (REGNO (x) + is_dest));
+      return 0;
+    case SUBREG:
+      {
+	rtx y = SUBREG_REG (x);
+     
+	if (GET_CODE (y) != REG)
+	  break;
+	if (REGNO (y) < 16)
+	  return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		  << (REGNO (y) + SUBREG_WORD (x) + is_dest));
+	return 0;
+      }
+    case SET:
+      return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+    case RETURN:
+      /* If there was a return value, it must have been indicated with USE.  */
+      return 0x00ffff00;
+    case CLOBBER:
+      is_dest = 1;
+      break;
+    case MEM:
+      is_dest = 0;
+      break;
+    case CALL:
+      used |= 0x00ff00f0;
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    used |= regs_used (XVECEXP (x, i, j), is_dest);
+	}
+      else if (fmt[i] == 'e')
+	used |= regs_used (XEXP (x, i), is_dest);
+    }
+  return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+   to the destination of the JUMP with address ADDR.
+   If the branch needs to be implemented as an indirect jump, try to find
+   a scratch register for it.
+   If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+   If any preceding insn that doesn't fit into a delay slot is good enough,
+   pass 1.  Pass 2 if a definite blocking insn is needed.
+   -1 is used internally to avoid deep recursion.
+   If a blocking instruction is made or recognized, return it.  */
+   
+static rtx
+gen_block_redirect (jump, addr, need_block)
+     rtx jump;
+     int addr, need_block;
+{
+  int dead = 0;
+  rtx prev = prev_nonnote_insn (jump);
+  rtx dest;
+
+  /* First, check if we already have an instruction that satisfies our need.  */
+  if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
+    {
+      if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+	return prev;
+      if (GET_CODE (PATTERN (prev)) == USE
+	  || GET_CODE (PATTERN (prev)) == CLOBBER
+	  || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	prev = jump;
+      else if ((need_block &= ~1) < 0)
+	return prev;
+      else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+	need_block = 0;
+    }
+  /* We can't use JUMP_LABEL here because it might be undefined
+     when not optimizing.  */
+  dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+  /* If the branch is out of range, try to find a scratch register for it.  */
+  if (optimize
+      && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
+    {
+      rtx scan;
+      /* Don't look for the stack pointer as a scratch register,
+	 it would cause trouble if an interrupt occurred.  */
+      unsigned try = 0x7fff, used;
+      int jump_left = flag_expensive_optimizations + 1;
+    
+      /* It is likely that the most recent eligible instruction is wanted for
+	 the delay slot.  Therefore, find out which registers it uses, and
+	 try to avoid using them.  */
+	 
+      for (scan = jump; scan = PREV_INSN (scan); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (code == CODE_LABEL || code == JUMP_INSN)
+	    break;
+	  if (code == INSN
+	      && GET_CODE (PATTERN (scan)) != USE
+	      && GET_CODE (PATTERN (scan)) != CLOBBER
+	      && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+	    {
+	      try &= ~regs_used (PATTERN (scan), 0);
+	      break;
+	    }
+	}
+      for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (GET_RTX_CLASS (code) == 'i')
+	    {
+	      used |= regs_used (PATTERN (scan), 0);
+	      if (code == CALL_INSN)
+		used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+	      dead |= (used >> 16) & ~used;
+	      if (dead & try)
+		{
+		  dead &= try;
+		  break;
+		}
+	      if (code == JUMP_INSN)
+		if (jump_left-- && simplejump_p (scan))
+		  scan = JUMP_LABEL (scan);
+		else
+		  break;
+	    }
+	}
+      /* Mask out the stack pointer again, in case it was
+	 the only 'free' register we have found.  */
+      dead &= 0x7fff;
+    }
+  /* If the immediate destination is still in range, check for possible
+     threading with a jump beyond the delay slot insn.
+     Don't check if we are called recursively; the jump has been or will be
+     checked in a different invocation then.  */
+	
+  else if (optimize && need_block >= 0)
+    {
+      rtx next = next_active_insn (next_active_insn (dest));
+      if (next && GET_CODE (next) == JUMP_INSN
+	  && GET_CODE (PATTERN (next)) == SET
+	  && recog_memoized (next) == CODE_FOR_jump)
+	{
+	  dest = JUMP_LABEL (next);
+	  if (dest
+	      && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
+	    gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
+	}
+    }
+
+  if (dead)
+    {
+      rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
+
+      /* It would be nice if we could convert the jump into an indirect
+	 jump / far branch right now, and thus exposing all constituent
+	 instructions to further optimization.  However, reorg uses
+	 simplejump_p to determine if there is an unconditional jump where
+	 it should try to schedule instructions from the target of the
+	 branch; simplejump_p fails for indirect jumps even if they have
+	 a JUMP_LABEL.  */
+      rtx insn = emit_insn_before (gen_indirect_jump_scratch
+				   (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
+				   , jump);
+      INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+      return insn;
+    }
+  else if (need_block)
+    /* We can't use JUMP_LABEL here because it might be undefined
+       when not optimizing.  */
+    return emit_insn_before (gen_block_branch_redirect
+		      (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
+		      , jump);
+  return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+  /* A label (to be placed) in front of the jump
+     that jumps to our ultimate destination.  */
+  rtx near_label;
+  /* Where we are going to insert it if we cannot move the jump any farther,
+     or the jump itself if we have picked up an existing jump.  */
+  rtx insert_place;
+  /* The ultimate destination.  */
+  rtx far_label;
+  struct far_branch *prev;
+  /* If the branch has already been created, its address;
+     else the address of its first prospective user.  */
+  int address;
+};
+
+enum mdep_reorg_phase_e mdep_reorg_phase;
+void
+gen_far_branch (bp)
+     struct far_branch *bp;
+{
+  rtx insn = bp->insert_place;
+  rtx jump;
+  rtx label = gen_label_rtx ();
+
+  emit_label_after (label, insn);
+  if (bp->far_label)
+    {
+      jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+      LABEL_NUSES (bp->far_label)++;
+    }
+  else
+    jump = emit_jump_insn_after (gen_return (), insn);
+  /* Emit a barrier so that reorg knows that any following instructions
+     are not reachable via a fall-through path.
+     But don't do this when not optimizing, since we wouldn't supress the
+     alignment for the barrier then, and could end up with out-of-range
+     pc-relative loads.  */
+  if (optimize)
+    emit_barrier_after (jump);
+  emit_label_after (bp->near_label, insn);
+  JUMP_LABEL (jump) = bp->far_label;
+  if (! invert_jump (insn, label))
+    abort ();
+  /* Prevent reorg from undoing our splits.  */
+  gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+/* Fix up ADDR_DIFF_VECs.  */
+void
+fixup_addr_diff_vecs (first)
+     rtx first;
+{
+  rtx insn;
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    {
+      rtx vec_lab, pat, prev, prevpat, x, braf_label;
+
+      if (GET_CODE (insn) != JUMP_INSN
+	  || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+	continue;
+      pat = PATTERN (insn);
+      vec_lab = XEXP (XEXP (pat, 0), 0);
+
+      /* Search the matching casesi_jump_2.  */
+      for (prev = vec_lab; ; prev = PREV_INSN (prev))
+	{
+	  if (GET_CODE (prev) != JUMP_INSN)
+	    continue;
+	  prevpat = PATTERN (prev);
+	  if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+	    continue;
+	  x = XVECEXP (prevpat, 0, 1);
+	  if (GET_CODE (x) != USE)
+	    continue;
+	  x = XEXP (x, 0);
+	  if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+	    break;
+	}
+
+      /* Emit the reference label of the braf where it belongs, right after
+	 the casesi_jump_2 (i.e. braf).  */
+      braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+      emit_label_after (braf_label, prev);
+
+      /* Fix up the ADDR_DIF_VEC to be relative
+	 to the reference address of the braf.  */
+      XEXP (XEXP (pat, 0), 0) = braf_label;
+    }
+}
+
+/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
+   a barrier.  Return the base 2 logarithm of the desired alignment.  */
+int
+barrier_align (barrier_or_label)
+     rtx barrier_or_label;
+{
+  rtx next = next_real_insn (barrier_or_label), pat, prev;
+  int slot, credit;
+ 
+  if (! next)
+    return 0;
+
+  pat = PATTERN (next);
+
+  if (GET_CODE (pat) == ADDR_DIFF_VEC)
+    return 2;
+
+  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
+    /* This is a barrier in front of a constant table.  */
+    return 0;
+
+  prev = prev_real_insn (barrier_or_label);
+  if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
+    {
+      pat = PATTERN (prev);
+      /* If this is a very small table, we want to keep the alignment after
+	 the table to the minimum for proper code alignment.  */
+      return ((TARGET_SMALLCODE
+	       || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+		   <= 1 << (CACHE_LOG - 2)))
+	      ? 1 : CACHE_LOG);
+    }
+
+  if (TARGET_SMALLCODE)
+    return 0;
+
+  if (! TARGET_SH3 || ! optimize)
+    return CACHE_LOG;
+
+  /* When fixing up pcloads, a constant table might be inserted just before
+     the basic block that ends with the barrier.  Thus, we can't trust the
+     instruction lengths before that.  */
+  if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
+    {
+      /* Check if there is an immediately preceding branch to the insn beyond
+	 the barrier.  We must weight the cost of discarding useful information
+	 from the current cache line when executing this branch and there is
+	 an alignment, against that of fetching unneeded insn in front of the
+	 branch target when there is no alignment.  */
+
+      /* PREV is presumed to be the JUMP_INSN for the barrier under
+	 investigation.  Skip to the insn before it.  */
+      prev = prev_real_insn (prev);
+
+      for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
+	   credit >= 0 && prev && GET_CODE (prev) == INSN;
+	   prev = prev_real_insn (prev))
+	{
+	  if (GET_CODE (PATTERN (prev)) == USE
+	      || GET_CODE (PATTERN (prev)) == CLOBBER)
+	    continue;
+	  if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+	    prev = XVECEXP (PATTERN (prev), 0, 1);
+	  if (slot &&
+	      get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	    slot = 0;
+	  credit -= get_attr_length (prev);
+	}
+      if (prev
+	  && GET_CODE (prev) == JUMP_INSN
+	  && JUMP_LABEL (prev)
+	  && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
+	  && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
+	return 0;
+    }
+
+  return CACHE_LOG;
+}
+
+/* If we are inside a phony loop, lmost any kind of label can turn up as the
+   first one in the loop.  Aligning a braf label causes incorrect switch
+   destination addresses; we can detect braf labels because they are
+   followed by a BARRIER.
+   Applying loop alignment to small constant or switch tables is a waste
+   of space, so we suppress this too.  */
+int
+sh_loop_align (label)
+     rtx label;
+{
+  rtx next = label;
+
+  do
+    next = next_nonnote_insn (next);
+  while (next && GET_CODE (next) == CODE_LABEL);
+
+  if (! next
+      || GET_RTX_CLASS (GET_CODE (next)) != 'i'
+      || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
+      || recog_memoized (next) == CODE_FOR_consttable_2)
+    return 0;
+  return 2;
+}
+
+/* Exported to toplev.c.
+
+   Do a final pass over the function, just before delayed branch
+   scheduling.  */
+
+void
+machine_dependent_reorg (first)
+     rtx first;
+{
+  rtx insn, mova;
+  int num_mova;
+  rtx r0_rtx = gen_rtx (REG, Pmode, 0);
+  rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
+
+  /* If relaxing, generate pseudo-ops to associate function calls with
+     the symbols they call.  It does no harm to not generate these
+     pseudo-ops.  However, when we can generate them, it enables to
+     linker to potentially relax the jsr to a bsr, and eliminate the
+     register load and, possibly, the constant pool entry.  */
+
+  mdep_reorg_phase = SH_INSERT_USES_LABELS;
+  if (TARGET_RELAX)
+    {
+      /* Remove all REG_LABEL notes.  We want to use them for our own
+	 purposes.  This works because none of the remaining passes
+	 need to look at them.
+
+	 ??? But it may break in the future.  We should use a machine
+	 dependent REG_NOTE, or some other approach entirely.  */
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+	    {
+	      rtx note;
+
+	      while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
+		remove_note (insn, note);
+	    }
+	}
+
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx pattern, reg, link, set, scan, dies, label;
+	  int rescan = 0, foundinsn = 0;
+
+	  if (GET_CODE (insn) == CALL_INSN)
+	    {
+	      pattern = PATTERN (insn);
+
+	      if (GET_CODE (pattern) == PARALLEL)
+		pattern = XVECEXP (pattern, 0, 0);
+	      if (GET_CODE (pattern) == SET)
+		pattern = SET_SRC (pattern);
+
+	      if (GET_CODE (pattern) != CALL
+		  || GET_CODE (XEXP (pattern, 0)) != MEM)
+		continue;
+
+	      reg = XEXP (XEXP (pattern, 0), 0);
+	    }
+	  else
+	    {
+	      reg = sfunc_uses_reg (insn);
+	      if (! reg)
+		continue;
+	    }
+
+	  if (GET_CODE (reg) != REG)
+	    continue;
+
+	  /* This is a function call via REG.  If the only uses of REG
+	     between the time that it is set and the time that it dies
+	     are in function calls, then we can associate all the
+	     function calls with the setting of REG.  */
+
+	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+	    {
+	      if (REG_NOTE_KIND (link) != 0)
+		continue;
+	      set = single_set (XEXP (link, 0));
+	      if (set && rtx_equal_p (reg, SET_DEST (set)))
+		{
+		  link = XEXP (link, 0);
+		  break;
+		}
+	    }
+
+	  if (! link)
+	    {
+	      /* ??? Sometimes global register allocation will have
+                 deleted the insn pointed to by LOG_LINKS.  Try
+                 scanning backward to find where the register is set.  */
+	      for (scan = PREV_INSN (insn);
+		   scan && GET_CODE (scan) != CODE_LABEL;
+		   scan = PREV_INSN (scan))
+		{
+		  if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
+		    continue;
+
+		  if (! reg_mentioned_p (reg, scan))
+		    continue;
+
+		  if (noncall_uses_reg (reg, scan, &set))
+		    break;
+
+		  if (set)
+		    {
+		      link = scan;
+		      break;
+		    }
+		}
+	    }
+
+	  if (! link)
+	    continue;
+
+	  /* The register is set at LINK.  */
+
+	  /* We can only optimize the function call if the register is
+             being set to a symbol.  In theory, we could sometimes
+             optimize calls to a constant location, but the assembler
+             and linker do not support that at present.  */
+	  if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
+	      && GET_CODE (SET_SRC (set)) != LABEL_REF)
+	    continue;
+
+	  /* Scan forward from LINK to the place where REG dies, and
+             make sure that the only insns which use REG are
+             themselves function calls.  */
+
+	  /* ??? This doesn't work for call targets that were allocated
+	     by reload, since there may not be a REG_DEAD note for the
+	     register.  */
+
+	  dies = NULL_RTX;
+	  for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
+	    {
+	      rtx scanset;
+
+	      /* Don't try to trace forward past a CODE_LABEL if we haven't
+		 seen INSN yet.  Ordinarily, we will only find the setting insn
+		 in LOG_LINKS if it is in the same basic block.  However,
+		 cross-jumping can insert code labels in between the load and
+		 the call, and can result in situations where a single call
+		 insn may have two targets depending on where we came from.  */
+
+	      if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
+		break;
+
+	      if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
+		continue;
+
+	      /* Don't try to trace forward past a JUMP.  To optimize
+                 safely, we would have to check that all the
+                 instructions at the jump destination did not use REG.  */
+
+	      if (GET_CODE (scan) == JUMP_INSN)
+		break;
+
+	      if (! reg_mentioned_p (reg, scan))
+		continue;
+
+	      if (noncall_uses_reg (reg, scan, &scanset))
+		break;
+
+	      if (scan == insn)
+		foundinsn = 1;
+
+	      if (scan != insn
+		  && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
+		{
+		  /* There is a function call to this register other
+                     than the one we are checking.  If we optimize
+                     this call, we need to rescan again below.  */
+		  rescan = 1;
+		}
+
+	      /* ??? We shouldn't have to worry about SCANSET here.
+		 We should just be able to check for a REG_DEAD note
+		 on a function call.  However, the REG_DEAD notes are
+		 apparently not dependable around libcalls; c-torture
+		 execute/920501-2 is a test case.  If SCANSET is set,
+		 then this insn sets the register, so it must have
+		 died earlier.  Unfortunately, this will only handle
+		 the cases in which the register is, in fact, set in a
+		 later insn.  */
+
+	      /* ??? We shouldn't have to use FOUNDINSN here.
+		 However, the LOG_LINKS fields are apparently not
+		 entirely reliable around libcalls;
+		 newlib/libm/math/e_pow.c is a test case.  Sometimes
+		 an insn will appear in LOG_LINKS even though it is
+		 not the most recent insn which sets the register. */
+
+	      if (foundinsn
+		  && (scanset
+		      || find_reg_note (scan, REG_DEAD, reg)))
+		{
+		  dies = scan;
+		  break;
+		}
+	    }
+
+	  if (! dies)
+	    {
+	      /* Either there was a branch, or some insn used REG
+                 other than as a function call address.  */
+	      continue;
+	    }
+
+	  /* Create a code label, and put it in a REG_LABEL note on
+             the insn which sets the register, and on each call insn
+             which uses the register.  In final_prescan_insn we look
+             for the REG_LABEL notes, and output the appropriate label
+             or pseudo-op.  */
+
+	  label = gen_label_rtx ();
+	  REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label,
+				      REG_NOTES (link));
+	  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
+				      REG_NOTES (insn));
+	  if (rescan)
+	    {
+	      scan = link;
+	      do
+		{
+		  rtx reg2;
+
+		  scan = NEXT_INSN (scan);
+		  if (scan != insn
+		      && ((GET_CODE (scan) == CALL_INSN
+			   && reg_mentioned_p (reg, scan))
+			  || ((reg2 = sfunc_uses_reg (scan))
+			      && REGNO (reg2) == REGNO (reg))))
+		    REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL,
+						label, REG_NOTES (scan));
+		}
+	      while (scan != dies);
+	    }
+	}
+    }
+
+  if (TARGET_SH2)
+    fixup_addr_diff_vecs (first);
+
+  if (optimize)
+    {
+      mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+      shorten_branches (first);
+    }
+  /* Scan the function looking for move instructions which have to be
+     changed to pc-relative loads and insert the literal tables.  */
+
+  mdep_reorg_phase = SH_FIXUP_PCLOAD;
+  for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
+    {
+      if (mova_p (insn))
+	{
+	  if (! num_mova++)
+	    mova = insn;
+	}
+      else if (GET_CODE (insn) == JUMP_INSN
+	       && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+	       && num_mova)
+	{
+	  rtx scan;
+	  int total;
+
+	  num_mova--;
+
+	  /* Some code might have been inserted between the mova and
+	     its ADDR_DIFF_VEC.  Check if the mova is still in range.  */
+	  for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+	    total += get_attr_length (scan);
+
+	  /* range of mova is 1020, add 4 because pc counts from address of
+	     second instruction after this one, subtract 2 in case pc is 2
+	     byte aligned.  Possible alignment needed for the ADDR_DIFF_VEC
+	     cancels out with alignment effects of the mova itself.  */
+	  if (total > 1022)
+	    {
+	      /* Change the mova into a load, and restart scanning
+		 there.  broken_move will then return true for mova.  */
+	      SET_SRC (PATTERN (mova))
+		= XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+	      INSN_CODE (mova) = -1;
+	      insn = mova;
+	    }
+	}
+      if (broken_move (insn))
+	{
+	  rtx scan;
+	  /* Scan ahead looking for a barrier to stick the constant table
+	     behind.  */
+	  rtx barrier = find_barrier (num_mova, mova, insn);
+	  rtx last_float_move, last_float = 0, *last_float_addr;
+
+	  if (num_mova && ! mova_p (mova))
+	    {
+	      /* find_barrier had to change the first mova into a
+		 pcload; thus, we have to start with this new pcload.  */
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	  /* Now find all the moves between the points and modify them.  */
+	  for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
+	    {
+	      if (GET_CODE (scan) == CODE_LABEL)
+		last_float = 0;
+	      if (broken_move (scan))
+		{
+		  rtx *patp = &PATTERN (scan), pat = *patp;
+		  rtx src, dst;
+		  rtx lab;
+		  rtx newinsn;
+		  rtx newsrc;
+		  enum machine_mode mode;
+
+		  if (GET_CODE (pat) == PARALLEL)
+		    patp = &XVECEXP (pat, 0, 0), pat = *patp;
+		  src = SET_SRC (pat);
+		  dst = SET_DEST (pat);
+		  mode = GET_MODE (dst);
+
+		  if (mode == SImode && hi_const (src)
+		      && REGNO (dst) != FPUL_REG)
+		    {
+		      int offset = 0;
+
+		      mode = HImode;
+		      while (GET_CODE (dst) == SUBREG)
+			{
+			  offset += SUBREG_WORD (dst);
+			  dst = SUBREG_REG (dst);
+			}
+		      dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
+		    }
+
+		  if (GET_CODE (dst) == REG
+		      && ((REGNO (dst) >= FIRST_FP_REG
+			   && REGNO (dst) <= LAST_XD_REG)
+			  || REGNO (dst) == FPUL_REG))
+		    {
+		      if (last_float
+			  && reg_set_between_p (r0_rtx, last_float_move, scan))
+			last_float = 0;
+		      lab = add_constant (src, mode, last_float);
+		      if (lab)
+			emit_insn_before (gen_mova (lab), scan);
+		      else
+			*last_float_addr = r0_inc_rtx;
+		      last_float_move = scan;
+		      last_float = src;
+		      newsrc = gen_rtx (MEM, mode,
+					((TARGET_SH4 && ! TARGET_FMOVD
+					  || REGNO (dst) == FPUL_REG)
+					 ? r0_inc_rtx
+					 : r0_rtx));
+		      last_float_addr = &XEXP (newsrc, 0);
+		    }
+		  else
+		    {
+		      lab = add_constant (src, mode, 0);
+		      newsrc = gen_rtx (MEM, mode,
+					gen_rtx (LABEL_REF, VOIDmode, lab));
+		    }
+		  RTX_UNCHANGING_P (newsrc) = 1;
+		  *patp = gen_rtx (SET, VOIDmode, dst, newsrc);
+		  INSN_CODE (scan) = -1;
+		}
+	    }
+	  dump_table (barrier);
+	  insn = barrier;
+	}
+    }
+
+  mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+  insn_addresses = 0;
+  split_branches (first);
+
+  /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+     also has an effect on the register that holds the addres of the sfunc.
+     Insert an extra dummy insn in front of each sfunc that pretends to
+     use this register.  */
+  if (flag_delayed_branch)
+    {
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx reg = sfunc_uses_reg (insn);
+
+	  if (! reg)
+	    continue;
+	  emit_insn_before (gen_use_sfunc_addr (reg), insn);
+	}
+    }
+#if 0
+  /* fpscr is not actually a user variable, but we pretend it is for the
+     sake of the previous optimization passes, since we want it handled like
+     one.  However, we don't have eny debugging information for it, so turn
+     it into a non-user variable now.  */
+  if (TARGET_SH4)
+    REG_USERVAR_P (get_fpscr_rtx ()) = 0;
+#endif
+  if (optimize)
+    sh_flag_remove_dead_before_cse = 1;
+  mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+int
+get_dest_uid (label, max_uid)
+     rtx label;
+     int max_uid;
+{
+  rtx dest = next_real_insn (label);
+  int dest_uid;
+  if (! dest)
+    /* This can happen for an undefined label.  */
+    return 0;
+  dest_uid = INSN_UID (dest);
+  /* If this is a newly created branch redirection blocking instruction,
+     we cannot index the branch_uid or insn_addresses arrays with its
+     uid.  But then, we won't need to, because the actual destination is
+     the following branch.  */
+  while (dest_uid >= max_uid)
+    {
+      dest = NEXT_INSN (dest);
+      dest_uid = INSN_UID (dest);
+    }
+  if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
+    return 0;
+  return dest_uid;
+}
+
+/* Split condbranches that are out of range.  Also add clobbers for
+   scratch registers that are needed in far jumps.
+   We do this before delay slot scheduling, so that it can take our
+   newly created instructions into account.  It also allows us to
+   find branches with common targets more easily.  */
+
+static void
+split_branches (first)
+     rtx first;
+{
+  rtx insn;
+  struct far_branch **uid_branch, *far_branch_list = 0;
+  int max_uid = get_max_uid ();
+
+  /* Find out which branches are out of range.  */
+  shorten_branches (first);
+
+  uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+  bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+      continue;
+    else if (INSN_DELETED_P (insn))
+      {
+	/* Shorten_branches would split this instruction again,
+	   so transform it into a note.  */
+	PUT_CODE (insn, NOTE);
+	NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+	NOTE_SOURCE_FILE (insn) = 0;
+      }
+    else if (GET_CODE (insn) == JUMP_INSN
+	     /* Don't mess with ADDR_DIFF_VEC */
+	     && (GET_CODE (PATTERN (insn)) == SET
+		 || GET_CODE (PATTERN (insn)) == RETURN))
+      {
+	enum attr_type type = get_attr_type (insn);
+	if (type == TYPE_CBRANCH)
+	  {
+	    rtx next, beyond;
+    
+	    if (get_attr_length (insn) > 4)
+	      {
+		rtx src = SET_SRC (PATTERN (insn));
+		rtx cond = XEXP (src, 0);
+		rtx olabel = XEXP (XEXP (src, 1), 0);
+		rtx jump;
+		int addr = insn_addresses[INSN_UID (insn)];
+		rtx label = 0;
+		int dest_uid = get_dest_uid (olabel, max_uid);
+		struct far_branch *bp = uid_branch[dest_uid];
+    
+		/* redirect_jump needs a valid JUMP_LABEL, and it might delete
+		   the label if the LABEL_NUSES count drops to zero.  There is
+		   always a jump_optimize pass that sets these values, but it
+		   proceeds to delete unreferenced code, and then if not
+		   optimizing, to un-delete the deleted instructions, thus
+		   leaving labels with too low uses counts.  */
+		if (! optimize)
+		  {
+		    JUMP_LABEL (insn) = olabel;
+		    LABEL_NUSES (olabel)++;
+		  }
+		if (! bp)
+		  {
+		    bp = (struct far_branch *) alloca (sizeof *bp);
+		    uid_branch[dest_uid] = bp;
+		    bp->prev = far_branch_list;
+		    far_branch_list = bp;
+		    bp->far_label
+		      = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+		    LABEL_NUSES (bp->far_label)++;
+		  }
+		else
+		  {
+		    label = bp->near_label;
+		    if (! label && bp->address - addr >= CONDJUMP_MIN)
+		      {
+			rtx block = bp->insert_place;
+
+			if (GET_CODE (PATTERN (block)) == RETURN)
+			  block = PREV_INSN (block);
+			else
+			  block = gen_block_redirect (block,
+						      bp->address, 2);
+			label = emit_label_after (gen_label_rtx (),
+						  PREV_INSN (block));
+			bp->near_label = label;
+		      }
+		    else if (label && ! NEXT_INSN (label))
+		      if (addr + 2 - bp->address <= CONDJUMP_MAX)
+			bp->insert_place = insn;
+		      else
+			gen_far_branch (bp);
+		  }
+		if (! label
+		    || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
+		  {
+		    bp->near_label = label = gen_label_rtx ();
+		    bp->insert_place = insn;
+		    bp->address = addr;
+		  }
+		if (! redirect_jump (insn, label))
+		  abort ();
+	      }
+	    else
+	      {
+		/* get_attr_length (insn) == 2 */
+		/* Check if we have a pattern where reorg wants to redirect
+		   the branch to a label from an unconditional branch that
+		   is too far away.  */
+		/* We can't use JUMP_LABEL here because it might be undefined
+		   when not optimizing.  */
+		/* A syntax error might cause beyond to be NULL_RTX.  */
+		beyond
+		  = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+					    0));
+	
+		if (beyond
+		    && (GET_CODE (beyond) == JUMP_INSN
+			|| (GET_CODE (beyond = next_active_insn (beyond))
+			    == JUMP_INSN))
+		    && GET_CODE (PATTERN (beyond)) == SET
+		    && recog_memoized (beyond) == CODE_FOR_jump
+		    && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
+			 - insn_addresses[INSN_UID (insn)] + 252U)
+			> 252 + 258 + 2))
+		  gen_block_redirect (beyond,
+				      insn_addresses[INSN_UID (beyond)], 1);
+	      }
+    
+	    next = next_active_insn (insn);
+
+	    if ((GET_CODE (next) == JUMP_INSN
+		 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
+		&& GET_CODE (PATTERN (next)) == SET
+		&& recog_memoized (next) == CODE_FOR_jump
+		&& ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
+		     - insn_addresses[INSN_UID (insn)] + 252U)
+		    > 252 + 258 + 2))
+	      gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
+	  }
+	else if (type == TYPE_JUMP || type == TYPE_RETURN)
+	  {
+	    int addr = insn_addresses[INSN_UID (insn)];
+	    rtx far_label = 0;
+	    int dest_uid = 0;
+	    struct far_branch *bp;
+
+	    if (type == TYPE_JUMP)
+	      {
+		far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+		dest_uid = get_dest_uid (far_label, max_uid);
+		if (! dest_uid)
+		  {
+		    /* Parse errors can lead to labels outside
+		      the insn stream.  */
+		    if (! NEXT_INSN (far_label))
+		      continue;
+
+		    if (! optimize)
+		      {
+			JUMP_LABEL (insn) = far_label;
+			LABEL_NUSES (far_label)++;
+		      }
+		    redirect_jump (insn, NULL_RTX);
+		    far_label = 0;
+		  }
+	      }
+	    bp = uid_branch[dest_uid];
+	    if (! bp)
+	      {
+		bp = (struct far_branch *) alloca (sizeof *bp);
+		uid_branch[dest_uid] = bp;
+		bp->prev = far_branch_list;
+		far_branch_list = bp;
+		bp->near_label = 0;
+		bp->far_label = far_label;
+		if (far_label)
+		  LABEL_NUSES (far_label)++;
+	      }
+	    else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+	      if (addr - bp->address <= CONDJUMP_MAX)
+		emit_label_after (bp->near_label, PREV_INSN (insn));
+	      else
+		{
+		  gen_far_branch (bp);
+		  bp->near_label = 0;
+		}
+	    else
+	      bp->near_label = 0;
+	    bp->address = addr;
+	    bp->insert_place = insn;
+	    if (! far_label)
+	      emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+	    else
+	      gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+	  }
+      }
+  /* Generate all pending far branches,
+     and free our references to the far labels.  */
+  while (far_branch_list)
+    {
+      if (far_branch_list->near_label
+	  && ! NEXT_INSN (far_branch_list->near_label))
+	gen_far_branch (far_branch_list);
+      if (optimize
+	  && far_branch_list->far_label
+	  && ! --LABEL_NUSES (far_branch_list->far_label))
+	delete_insn (far_branch_list->far_label);
+      far_branch_list = far_branch_list->prev;
+    }
+
+  /* Instruction length information is no longer valid due to the new
+     instructions that have been generated.  */
+  init_insn_lengths ();
+}
+
+/* Dump out instruction addresses, which is useful for debugging the
+   constant pool table stuff.
+
+   If relaxing, output the label and pseudo-ops used to link together
+   calls and the instruction which set the registers.  */
+
+/* ??? This is unnecessary, and probably should be deleted.  This makes
+   the insn_addresses declaration above unnecessary.  */
+
+/* ??? The addresses printed by this routine for insns are nonsense for
+   insns which are inside of a sequence where none of the inner insns have
+   variable length.  This is because the second pass of shorten_branches
+   does not bother to update them.  */
+
+void
+final_prescan_insn (insn, opvec, noperands)
+     rtx insn;
+     rtx *opvec;
+     int noperands;
+{
+  if (TARGET_DUMPISIZE)
+    fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);
+
+  if (TARGET_RELAX)
+    {
+      rtx note;
+
+      note = find_reg_note (insn, REG_LABEL, NULL_RTX);
+      if (note)
+	{
+	  rtx pattern;
+
+	  pattern = PATTERN (insn);
+	  if (GET_CODE (pattern) == PARALLEL)
+	    pattern = XVECEXP (pattern, 0, 0);
+	  if (GET_CODE (pattern) == CALL
+	      || (GET_CODE (pattern) == SET
+		  && (GET_CODE (SET_SRC (pattern)) == CALL
+		      || get_attr_type (insn) == TYPE_SFUNC)))
+	    asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
+			 CODE_LABEL_NUMBER (XEXP (note, 0)));
+	  else if (GET_CODE (pattern) == SET)
+	    ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+				       CODE_LABEL_NUMBER (XEXP (note, 0)));
+	  else
+	    abort ();
+	}
+    }
+}
+
+/* Dump out any constants accumulated in the final pass.  These will
+   only be labels.  */
+
+char *
+output_jump_label_table ()
+{
+  int i;
+
+  if (pool_size)
+    {
+      fprintf (asm_out_file, "\t.align 2\n");
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (p->label));
+	  output_asm_insn (".long	%O0", &p->value);
+	}
+      pool_size = 0;
+    }
+
+  return "";
+}
+
+/* A full frame looks like:
+
+   arg-5
+   arg-4
+   [ if current_function_anonymous_args
+   arg-3
+   arg-2
+   arg-1
+   arg-0 ]
+   saved-fp
+   saved-r10
+   saved-r11
+   saved-r12
+   saved-pr
+   local-n
+   ..
+   local-1
+   local-0        <- fp points here.  */
+
+/* Number of bytes pushed for anonymous args, used to pass information
+   between expand_prologue and expand_epilogue.  */
+
+static int extra_push;
+
+/* Adjust the stack by SIZE bytes.  REG holds the rtl of the register
+  to be adjusted, and TEMP, if nonnegative, holds the register number
+  of a general register that we may clobber.  */
+
+static void
+output_stack_adjust (size, reg, temp)
+     int size;
+     rtx reg;
+     int temp;
+{
+  if (size)
+    {
+      if (CONST_OK_FOR_I (size))
+	emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+      /* Try to do it with two partial adjustments; however, we must make
+	 sure that the stack is properly aligned at all times, in case
+	 an interrupt occurs between the two partial adjustments. */
+      else if (CONST_OK_FOR_I (size / 2 & -4)
+	       && CONST_OK_FOR_I (size - (size / 2 & -4)))
+	{
+	  emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
+	  emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
+	}
+      else
+	{
+	  rtx const_reg;
+
+	  /* If TEMP is invalid, we could temporarily save a general
+	     register to MACL.  However, there is currently no need
+	     to handle this case, so just abort when we see it.  */
+	  if (temp < 0)
+	    abort ();
+	  const_reg = gen_rtx (REG, SImode, temp);
+
+	  /* If SIZE is negative, subtract the positive value.
+	     This sometimes allows a constant pool entry to be shared
+	     between prologue and epilogue code.  */
+	  if (size < 0)
+	    {
+	      emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
+	      emit_insn (gen_subsi3 (reg, reg, const_reg));
+	    }
+	  else
+	    {
+	      emit_insn (gen_movsi (const_reg, GEN_INT (size)));
+	      emit_insn (gen_addsi3 (reg, reg, const_reg));
+	    }
+	}
+    }
+}
+
+/* Output RTL to push register RN onto the stack.  */
+
+static void
+push (rn)
+     int rn;
+{
+  rtx x;
+  if (rn == FPUL_REG)
+    x = gen_push_fpul ();
+  else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+	   && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
+    {
+      if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
+	return;
+      x = gen_push_4 (gen_rtx (REG, DFmode, rn));
+    }
+  else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
+    x = gen_push_e (gen_rtx (REG, SFmode, rn));
+  else
+    x = gen_push (gen_rtx (REG, SImode, rn));
+
+  x = emit_insn (x);
+  REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
+			   gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
+}
+
+/* Output RTL to pop register RN from the stack.  */
+
+static void
+pop (rn)
+     int rn;
+{
+  rtx x;
+  if (rn == FPUL_REG)
+    x = gen_pop_fpul ();
+  else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+	   && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
+    {
+      if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
+	return;
+      x = gen_pop_4 (gen_rtx (REG, DFmode, rn));
+    }
+  else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
+    x = gen_pop_e (gen_rtx (REG, SFmode, rn));
+  else
+    x = gen_pop (gen_rtx (REG, SImode, rn));
+    
+  x = emit_insn (x);
+  REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
+			   gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
+}
+
+/* Generate code to push the regs specified in the mask.  */
+
+static void
+push_regs (mask, mask2)
+     int mask, mask2;
+{
+  int i;
+
+  /* Push PR last; this gives better latencies after the prologue, and
+     candidates for the return delay slot when there are no general
+     registers pushed.  */
+  for (i = 0; i < 32; i++)
+    if (mask & (1 << i) && i != PR_REG)
+      push (i);
+  for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
+    if (mask2 & (1 << (i - 32)))
+      push (i);
+  if (mask & (1 << PR_REG))
+    push (PR_REG);
+}
+
+/* Work out the registers which need to be saved, both as a mask and a
+   count of saved words.
+
+   If doing a pragma interrupt function, then push all regs used by the
+   function, and if we call another function (we can tell by looking at PR),
+   make sure that all the regs it clobbers are safe too.  */
+
+static int
+calc_live_regs (count_ptr, live_regs_mask2)
+     int *count_ptr;
+     int *live_regs_mask2;
+{
+  int reg;
+  int live_regs_mask = 0;
+  int count;
+  int interrupt_handler;
+
+  if ((lookup_attribute
+       ("interrupt_handler",
+	DECL_MACHINE_ATTRIBUTES (current_function_decl)))
+      != NULL_TREE)
+    interrupt_handler = 1;
+  else
+    interrupt_handler = 0;
+
+  *live_regs_mask2 = 0;
+  /* If we can save a lot of saves by switching to double mode, do that.  */
+  if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
+    for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+      if (regs_ever_live[reg] && regs_ever_live[reg+1]
+	  && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
+	  && ++count > 2)
+	{
+	  target_flags &= ~FPU_SINGLE_BIT;
+	  break;
+	}
+  for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
+    {
+      if ((interrupt_handler && ! pragma_trapa)
+	  ? (/* Need to save all the regs ever live.  */
+	     (regs_ever_live[reg]
+	      || (call_used_regs[reg]
+		  && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
+		  && regs_ever_live[PR_REG]))
+	     && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+	     && reg != RETURN_ADDRESS_POINTER_REGNUM
+	     && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
+	  : (/* Only push those regs which are used and need to be saved.  */
+	     regs_ever_live[reg] && ! call_used_regs[reg]))
+	{
+	  if (reg >= 32)
+	    *live_regs_mask2 |= 1 << (reg - 32);
+	  else
+	    live_regs_mask |= 1 << reg;
+	  count++;
+	  if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG)
+	    if (reg <= LAST_FP_REG)
+	      {
+		if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
+		  {
+		    if (reg >= 32)
+		      *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
+		    else
+		      live_regs_mask |= 1 << (reg ^ 1);
+		    count++;
+		  }
+	      }
+	    else if (reg <= LAST_XD_REG)
+	      {
+		/* Must switch to double mode to access these registers.  */
+		target_flags &= ~FPU_SINGLE_BIT;
+		count++;
+	      }
+	}
+    }
+
+  *count_ptr = count;
+  return live_regs_mask;
+}
+
+/* Code to generate prologue and epilogue sequences */
+
+void
+sh_expand_prologue ()
+{
+  int live_regs_mask;
+  int d, i;
+  int live_regs_mask2;
+  int save_flags = target_flags;
+  int double_align = 0;
+
+  /* We have pretend args if we had an object sent partially in registers
+     and partially on the stack, e.g. a large structure.  */
+  output_stack_adjust (-current_function_pretend_args_size,
+		       stack_pointer_rtx, 3);
+
+  extra_push = 0;
+
+  /* This is set by SETUP_VARARGS to indicate that this is a varargs
+     routine.  Clear it here so that the next function isn't affected. */
+  if (current_function_anonymous_args)
+    {
+      current_function_anonymous_args = 0;
+
+      /* This is not used by the SH3E calling convention  */
+      if (!TARGET_SH3E)
+	{
+	  /* Push arg regs as if they'd been provided by caller in stack.  */
+	  for (i = 0; i < NPARM_REGS(SImode); i++)
+	    {
+	      int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
+	      if (i >= (NPARM_REGS(SImode) 
+			- current_function_args_info.arg_count[(int) SH_ARG_INT]
+			))
+		break;
+	      push (rn);
+	      extra_push += 4;
+	    }
+	}
+    }
+
+  /* If we're supposed to switch stacks at function entry, do so now.  */
+  if (sp_switch)
+    emit_insn (gen_sp_switch_1 ());
+
+  live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+  /* ??? Maybe we could save some switching if we can move a mode switch
+     that already happens to be at the function start into the prologue.  */
+  if (target_flags != save_flags)
+    emit_insn (gen_toggle_sz ());
+  push_regs (live_regs_mask, live_regs_mask2);
+  if (target_flags != save_flags)
+    emit_insn (gen_toggle_sz ());
+
+  if (TARGET_ALIGN_DOUBLE && d & 1)
+    double_align = 4;
+
+  target_flags = save_flags;
+
+  output_stack_adjust (-get_frame_size () - double_align,
+		       stack_pointer_rtx, 3);
+
+  if (frame_pointer_needed)
+    emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+}
+
+void
+sh_expand_epilogue ()
+{
+  int live_regs_mask;
+  int d, i;
+
+  int live_regs_mask2;
+  int save_flags = target_flags;
+  int frame_size = get_frame_size ();
+
+  live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+
+  if (TARGET_ALIGN_DOUBLE && d & 1)
+    frame_size += 4;
+
+  if (frame_pointer_needed)
+    {
+      output_stack_adjust (frame_size, frame_pointer_rtx, 7);
+
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+    }
+  else if (frame_size)
+    {
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, stack_pointer_rtx, 7);
+    }
+
+  /* Pop all the registers.  */
+
+  if (target_flags != save_flags)
+    emit_insn (gen_toggle_sz ());
+  if (live_regs_mask & (1 << PR_REG))
+    pop (PR_REG);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      int j = (FIRST_PSEUDO_REGISTER - 1) - i;
+      if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
+	pop (j);
+      else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
+	pop (j);
+    }
+  if (target_flags != save_flags)
+    emit_insn (gen_toggle_sz ());
+  target_flags = save_flags;
+
+  output_stack_adjust (extra_push + current_function_pretend_args_size,
+		       stack_pointer_rtx, 7);
+
+  /* Switch back to the normal stack if necessary.  */
+  if (sp_switch)
+    emit_insn (gen_sp_switch_2 ());
+}
+
+/* Clear variables at function end.  */
+
+void
+function_epilogue (stream, size)
+     FILE *stream;
+     int size;
+{
+  trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
+  sp_switch = NULL_RTX;
+}
+
+rtx
+sh_builtin_saveregs (arglist)
+     tree arglist;
+{
+  tree fntype = TREE_TYPE (current_function_decl);
+  /* First unnamed integer register.  */
+  int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
+  /* Number of integer registers we need to save.  */
+  int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
+  /* First unnamed SFmode float reg */
+  int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
+  /* Number of SFmode float regs to save.  */
+  int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
+  int ptrsize = GET_MODE_SIZE (Pmode);
+  rtx valist, regbuf, fpregs;
+  int bufsize, regno;
+
+  /* Allocate block of memory for the regs. */
+  /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
+     Or can assign_stack_local accept a 0 SIZE argument?  */
+  bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
+
+  regbuf = assign_stack_local (BLKmode, bufsize, 0);
+  MEM_SET_IN_STRUCT_P (regbuf, 1);
+
+  /* Save int args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.  */
+  if (n_intregs > 0)
+    move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
+			 gen_rtx (MEM, BLKmode, 
+			 	plus_constant (XEXP (regbuf, 0),
+					n_floatregs * UNITS_PER_WORD)), 
+			 n_intregs, n_intregs * UNITS_PER_WORD);
+
+  /* Save float args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.
+     We explicitly build a pointer to the buffer because it halves the insn
+     count when not optimizing (otherwise the pointer is built for each reg
+     saved).
+     We emit the moves in reverse order so that we can use predecrement.  */
+
+  fpregs = gen_reg_rtx (Pmode);
+  emit_move_insn (fpregs, XEXP (regbuf, 0));
+  emit_insn (gen_addsi3 (fpregs, fpregs,
+			 GEN_INT (n_floatregs * UNITS_PER_WORD)));
+  if (TARGET_SH4)
+    {
+      for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs,
+				 GEN_INT (-2 * UNITS_PER_WORD)));
+	  emit_move_insn (gen_rtx (MEM, DFmode, fpregs),
+			  gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
+	}
+      regno = first_floatreg;
+      if (regno & 1)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+	  emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+			  gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
+						- (TARGET_LITTLE_ENDIAN != 0)));
+	}
+    }
+  else
+    for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+      {
+	emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+	emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+			gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
+      }
+
+  /* Return the address of the regbuf.  */
+  return XEXP (regbuf, 0);
+}
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+initial_elimination_offset (from, to)
+     int from;
+     int to;
+{
+  int regs_saved;
+  int total_saved_regs_space;
+  int total_auto_space = get_frame_size ();
+  int save_flags = target_flags;
+
+  int live_regs_mask, live_regs_mask2;
+  live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
+  if (TARGET_ALIGN_DOUBLE && regs_saved & 1)
+    total_auto_space += 4;
+  target_flags = save_flags;
+
+  total_saved_regs_space = (regs_saved) * 4;
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space;
+
+  /* Initial gap between fp and sp is 0.  */
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return 0;
+
+  if (from == RETURN_ADDRESS_POINTER_REGNUM
+      && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
+    {
+      int i, n = total_saved_regs_space;
+      for (i = PR_REG-1; i >= 0; i--)
+	if (live_regs_mask & (1 << i))
+	  n -= 4;
+      return n + total_auto_space;
+    }
+
+  abort ();
+}
+
+/* Handle machine specific pragmas to be semi-compatible with Hitachi
+   compiler.  */
+
+int
+sh_handle_pragma (p_getc, p_ungetc, pname)
+     int (*  p_getc)   PROTO((void));
+     void (* p_ungetc) PROTO((int));
+     char *  pname;
+{
+  int retval = 0;
+
+  if (strcmp (pname, "interrupt") == 0)
+    pragma_interrupt = retval = 1;
+  else if (strcmp (pname, "trapa") == 0)
+    pragma_interrupt = pragma_trapa = retval = 1;
+  else if (strcmp (pname, "nosave_low_regs") == 0)
+    pragma_nosave_low_regs = retval = 1;
+
+  return retval;
+}
+
+/* Generate 'handle_interrupt' attribute for decls */
+
+void
+sh_pragma_insert_attributes (node, attributes, prefix)
+     tree node;
+     tree * attributes;
+     tree * prefix;
+{
+  tree a;
+
+  if (! pragma_interrupt
+      || TREE_CODE (node) != FUNCTION_DECL)
+    return;
+
+  /* We are only interested in fields.  */
+  if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
+    return;
+
+  /* Add a 'handle_interrupt' attribute.  */
+  * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
+
+  return;
+}
+
+/* Return nonzero if ATTR is a valid attribute for DECL.
+   ATTRIBUTES are any existing attributes and ARGS are the arguments
+   supplied with ATTR.
+
+   Supported attributes:
+
+   interrupt_handler -- specifies this function is an interrupt handler.
+
+   sp_switch -- specifies an alternate stack for an interrupt handler
+   to run on.
+
+   trap_exit -- use a trapa to exit an interrupt function instead of
+   an rte instruction.  */
+
+int
+sh_valid_machine_decl_attribute (decl, attributes, attr, args)
+     tree decl;
+     tree attributes;
+     tree attr;
+     tree args;
+{
+  int retval = 0;
+
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+
+  if (is_attribute_p ("interrupt_handler", attr))
+    {
+      return 1;
+    }
+
+  if (is_attribute_p ("sp_switch", attr))
+    {
+      /* The sp_switch attribute only has meaning for interrupt functions.  */
+      if (!pragma_interrupt)
+	return 0;
+
+      /* sp_switch must have an argument.  */
+      if (!args || TREE_CODE (args) != TREE_LIST)
+	return 0;
+
+      /* The argument must be a constant string.  */
+      if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+	return 0;
+
+      sp_switch = gen_rtx (SYMBOL_REF, VOIDmode,
+			   TREE_STRING_POINTER (TREE_VALUE (args)));
+      return 1;
+    }
+
+  if (is_attribute_p ("trap_exit", attr))
+    {
+      /* The trap_exit attribute only has meaning for interrupt functions.  */
+      if (!pragma_interrupt)
+	return 0;
+
+      /* trap_exit must have an argument.  */
+      if (!args || TREE_CODE (args) != TREE_LIST)
+	return 0;
+
+      /* The argument must be a constant integer.  */
+      if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+	return 0;
+
+      trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
+      return 1;
+    }
+}
+
+
+/* Predicates used by the templates.  */
+
+/* Returns 1 if OP is MACL, MACH or PR.  The input must be a REG rtx.
+   Used only in general_movsrc_operand.  */
+
+int
+system_reg_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  switch (REGNO (op))
+    {
+    case PR_REG:
+    case MACL_REG:
+    case MACH_REG:
+      return 1;
+    }
+  return 0;
+}
+
+/* Returns 1 if OP can be source of a simple move operation.
+   Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
+   invalid as are subregs of system registers.  */
+
+int
+general_movsrc_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+      if (GET_CODE (inside) == CONST)
+	inside = XEXP (inside, 0);
+
+      if (GET_CODE (inside) == LABEL_REF)
+	return 1;
+
+      if (GET_CODE (inside) == PLUS
+	  && GET_CODE (XEXP (inside, 0)) == LABEL_REF
+	  && GET_CODE (XEXP (inside, 1)) == CONST_INT)
+	return 1;
+
+      /* Only post inc allowed.  */
+      if (GET_CODE (inside) == PRE_DEC)
+	return 0;
+    }
+
+  if ((mode == QImode || mode == HImode)
+      && (GET_CODE (op) == SUBREG
+	  && GET_CODE (XEXP (op, 0)) == REG
+	  && system_reg_operand (XEXP (op, 0), mode)))
+    return 0;
+
+  return general_operand (op, mode);
+}
+
+/* Returns 1 if OP can be a destination of a move.
+   Same as general_operand, but no preinc allowed.  */
+
+int
+general_movdst_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  /* Only pre dec allowed.  */
+  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
+    return 0;
+
+  return general_operand (op, mode);
+}
+
+/* Returns 1 if OP is a normal arithmetic register.  */
+
+int
+arith_reg_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (GET_CODE (op) == REG)
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno != T_REG && regno != PR_REG
+	      && (regno != FPUL_REG || TARGET_SH4)
+	      && regno != MACH_REG && regno != MACL_REG);
+    }
+  return 0;
+}
+
+int
+fp_arith_reg_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (GET_CODE (op) == REG)
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno != T_REG && regno != PR_REG && regno > 15
+	      && regno != MACH_REG && regno != MACL_REG);
+    }
+  return 0;
+}
+
+int
+fp_extended_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode)
+    {
+      op = XEXP (op, 0);
+      mode = GET_MODE (op);
+    }
+  return fp_arith_reg_operand (op, mode);
+}
+
+/* Returns 1 if OP is a valid source operand for an arithmetic insn.  */
+
+int
+arith_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
+    return 1;
+
+  return 0;
+}
+
+/* Returns 1 if OP is a valid source operand for a compare insn.  */
+
+int
+arith_reg_or_0_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
+    return 1;
+
+  return 0;
+}
+
+/* Returns 1 if OP is a valid source operand for a logical operation.  */
+
+int
+logical_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
+    return 1;
+
+  return 0;
+}
+
+/* Nonzero if OP is a floating point value with value 0.0.  */
+
+int
+fp_zero_operand (op)
+     rtx op;
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
+}
+
+/* Nonzero if OP is a floating point value with value 1.0.  */
+
+int
+fp_one_operand (op)
+     rtx op;
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst1);
+}
+
+int
+tertiary_reload_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (op);
+  return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
+}
+
+int
+fpscr_operand (op)
+     rtx op;
+{
+  return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
+	  && GET_MODE (op) == PSImode);
+}
+
+int
+commutative_float_operator (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (GET_MODE (op) != mode)
+    return 0;
+  switch (GET_CODE (op))
+    {
+    case PLUS:
+    case MULT:
+      return 1;
+    }
+  return 0;
+}
+
+int
+noncommutative_float_operator (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (GET_MODE (op) != mode)
+    return 0;
+  switch (GET_CODE (op))
+    {
+    case MINUS:
+    case DIV:
+      return 1;
+    }
+  return 0;
+}
+
+int
+binary_float_operator (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (GET_MODE (op) != mode)
+    return 0;
+  switch (GET_CODE (op))
+    {
+    case PLUS:
+    case MINUS:
+    case MULT:
+    case DIV:
+      return 1;
+    }
+  return 0;
+}
+
+/* Return the destination address of a branch.  */
+   
+int
+branch_dest (branch)
+     rtx branch;
+{
+  rtx dest = SET_SRC (PATTERN (branch));
+  int dest_uid;
+
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, 1);
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+  return insn_addresses[dest_uid];
+}
+
+/* Return non-zero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+int
+reg_unused_after (reg, insn)
+     rtx reg;
+     rtx insn;
+{
+  enum rtx_code code;
+  rtx set;
+
+  /* If the reg is set by this instruction, then it is safe for our
+     case.  Disregard the case where this is a store to memory, since
+     we are checking a register used in the store address.  */
+  set = single_set (insn);
+  if (set && GET_CODE (SET_DEST (set)) != MEM
+      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+    return 1;
+
+  while (insn = NEXT_INSN (insn))
+    {
+      code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 if dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return 1;
+      /* else */
+#endif
+
+      if (code == JUMP_INSN)
+	return 0;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and a insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+	      rtx set = single_set (this_insn);
+
+	      if (GET_CODE (this_insn) == CALL_INSN)
+		code = CALL_INSN;
+	      else if (GET_CODE (this_insn) == JUMP_INSN)
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return 0;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return 0;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (GET_CODE (SET_DEST (set)) != MEM)
+		    retval = 1;
+		  else
+		    return 0;
+		}
+	      if (set == 0
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return 0;
+	    }
+	  if (retval == 1)
+	    return 1;
+	  else if (code == JUMP_INSN)
+	    return 0;
+	}
+      else if (GET_RTX_CLASS (code) == 'i')
+	{
+	  rtx set = single_set (insn);
+
+	  if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	    return 0;
+	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	    return GET_CODE (SET_DEST (set)) != MEM;
+	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	    return 0;
+	}
+
+      if (code == CALL_INSN && call_used_regs[REGNO (reg)])
+	return 1;
+    }
+  return 1;
+}
+
+extern struct obstack permanent_obstack;
+
+rtx
+get_fpscr_rtx ()
+{
+  static rtx fpscr_rtx;
+
+  if (! fpscr_rtx)
+    {
+      push_obstacks (&permanent_obstack, &permanent_obstack);
+      fpscr_rtx = gen_rtx (REG, PSImode, 48);
+      REG_USERVAR_P (fpscr_rtx) = 1;
+      pop_obstacks ();
+      mark_user_reg (fpscr_rtx);
+    }
+  if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
+    mark_user_reg (fpscr_rtx);
+  return fpscr_rtx;
+}
+
+void
+emit_sf_insn (pat)
+     rtx pat;
+{
+  rtx addr;
+  /* When generating reload insns,  we must not create new registers.  FPSCR
+     should already have the correct value, so do nothing to change it.  */
+  if (! TARGET_FPU_SINGLE && ! reload_in_progress)
+    {
+      addr = gen_reg_rtx (SImode);
+      emit_insn (gen_fpu_switch0 (addr));
+    }
+  emit_insn (pat);
+  if (! TARGET_FPU_SINGLE && ! reload_in_progress)
+    {
+      addr = gen_reg_rtx (SImode);
+      emit_insn (gen_fpu_switch1 (addr));
+    }
+}
+
+void
+emit_df_insn (pat)
+     rtx pat;
+{
+  rtx addr;
+  if (TARGET_FPU_SINGLE && ! reload_in_progress)
+    {
+      addr = gen_reg_rtx (SImode);
+      emit_insn (gen_fpu_switch0 (addr));
+    }
+  emit_insn (pat);
+  if (TARGET_FPU_SINGLE && ! reload_in_progress)
+    {
+      addr = gen_reg_rtx (SImode);
+      emit_insn (gen_fpu_switch1 (addr));
+    }
+}
+
+void
+expand_sf_unop (fun, operands)
+     rtx (*fun)();
+     rtx *operands;
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_sf_binop (fun, operands)
+     rtx (*fun)();
+     rtx *operands;
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
+			 get_fpscr_rtx ()));
+}
+
+void
+expand_df_unop (fun, operands)
+     rtx (*fun)();
+     rtx *operands;
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_df_binop (fun, operands)
+     rtx (*fun)();
+     rtx *operands;
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
+			 get_fpscr_rtx ()));
+}
+
+void
+expand_fp_branch (compare, branch)
+     rtx (*compare) (), (*branch) ();
+{
+  (GET_MODE (sh_compare_op0)  == SFmode ? emit_sf_insn : emit_df_insn)
+    ((*compare) ());
+  emit_jump_insn ((*branch) ());
+}
+
+/* We don't want to make fpscr call-saved, because that would prevent
+   channging it, and it would also cost an exstra instruction to save it.
+   We don't want it to be known as a global register either, because
+   that disables all flow analysis.  But it has to be live at the function
+   return.  Thus, we need to insert a USE at the end of the function.  */
+/* This should best be called at about the time FINALIZE_PIC is called,
+   but not dependent on flag_pic.  Alas, there is no suitable hook there,
+   so this gets called from HAVE_RETURN.  */
+int
+emit_fpscr_use ()
+{
+  static int fpscr_uses = 0;
+
+  if (rtx_equal_function_value_matters)
+    {
+      emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ()));
+      fpscr_uses++;
+    }
+  else
+    {
+      if (fpscr_uses > 1)
+	{
+	  /* Due to he crude way we emit the USEs, we might end up with
+	     some extra ones.  Delete all but the last one.  */
+	  rtx insn;
+
+	  for (insn = get_last_insn(); insn; insn = PREV_INSN (insn))
+	    if (GET_CODE (insn) == INSN
+		&& GET_CODE (PATTERN (insn)) == USE
+		&& GET_CODE (XEXP (PATTERN (insn), 0)) == REG
+		&& REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
+	      {
+		insn = PREV_INSN (insn);
+		break;
+	      }
+	  for (; insn; insn = PREV_INSN (insn))
+	    if (GET_CODE (insn) == INSN
+		&& GET_CODE (PATTERN (insn)) == USE
+		&& GET_CODE (XEXP (PATTERN (insn), 0)) == REG
+		&& REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
+	      {
+		PUT_CODE (insn, NOTE);
+		NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+		NOTE_SOURCE_FILE (insn) = 0;
+	      }
+	}
+      fpscr_uses = 0;
+    }
+}
+
+/* ??? gcc does flow analysis strictly after common subexpression
+   elimination.  As a result, common subespression elimination fails
+   when there are some intervening statements setting the same register.
+   If we did nothing about this, this would hurt the precision switching
+   for SH4 badly.  There is some cse after reload, but it is unable to
+   undo the extra register pressure from the unused instructions, and
+   it cannot remove auto-increment loads.
+
+   A C code example that shows this flow/cse weakness for (at least) SH
+   and sparc (as of gcc ss-970706) is this:
+
+double
+f(double a)
+{
+  double d;
+  d = 0.1;
+  a += d;
+  d = 1.1;
+  d = 0.1;
+  a *= d;
+  return a;
+}
+
+   So we add another pass before common subexpression elimination, to
+   remove assignments that are dead due to a following assignment in the
+   same basic block.  */
+
+int sh_flag_remove_dead_before_cse;
+
+static void 
+mark_use (x, reg_set_block)
+     rtx x, *reg_set_block;
+{
+  enum rtx_code code;
+
+  if (! x)
+    return;
+  code = GET_CODE (x);
+  switch (code)
+    {
+    case REG:
+      {
+	int regno = REGNO (x);
+	int nregs = (regno < FIRST_PSEUDO_REGISTER
+		     ? HARD_REGNO_NREGS (regno, GET_MODE (x))
+		     : 1);
+	do
+	  {
+	    reg_set_block[regno + nregs - 1] = 0;
+	  }
+	while (--nregs);
+	break;
+      }
+    case SET:
+      {
+	rtx dest = SET_DEST (x);
+
+	if (GET_CODE (dest) == SUBREG)
+	  dest = SUBREG_REG (dest);
+	if (GET_CODE (dest) != REG)
+	  mark_use (dest, reg_set_block);
+	mark_use (SET_SRC (x), reg_set_block);
+	break;
+      }
+    case CLOBBER:
+      break;
+    default:
+      {
+	char *fmt = GET_RTX_FORMAT (code);
+	int i, j;
+	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	  {
+	    if (fmt[i] == 'e')
+	      mark_use (XEXP (x, i), reg_set_block);
+	    else if (fmt[i] == 'E')
+	      for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+		mark_use (XVECEXP (x, i, j), reg_set_block);
+	  }
+	break;
+      }
+    }
+}
+
+int
+remove_dead_before_cse ()
+{
+  rtx *reg_set_block, last, last_call, insn, set;
+  int in_libcall = 0;
+
+  /* This pass should run just once, after rtl generation.  */
+
+  if (! sh_flag_remove_dead_before_cse
+      || rtx_equal_function_value_matters
+      || reload_completed)
+    return;
+
+  sh_flag_remove_dead_before_cse = 0;
+
+  reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx));
+  bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx));
+  last_call = last = get_last_insn ();
+  for (insn = last; insn; insn = PREV_INSN (insn))
+    {
+      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	continue;
+      if (GET_CODE (insn) == JUMP_INSN)
+	{
+	  last_call = last = insn;
+	  continue;
+	}
+      set = single_set (insn);
+
+      /* Don't delete parts of libcalls, since that would confuse cse, loop
+	 and flow.  */
+      if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
+	in_libcall = 1;
+      else if (in_libcall)
+	{
+	  if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
+	    in_libcall = 0;
+	}
+      else if (set && GET_CODE (SET_DEST (set)) == REG)
+	{
+	  int regno = REGNO (SET_DEST (set));
+	  rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno]
+			  ? last_call
+			  : last);
+	  if (reg_set_block[regno] == ref_insn
+	      && (regno >= FIRST_PSEUDO_REGISTER
+		  || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1)
+	      && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn)))
+	    {
+	      PUT_CODE (insn, NOTE);
+	      NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+	      NOTE_SOURCE_FILE (insn) = 0;
+	      continue;
+	    }
+	  else
+	    reg_set_block[REGNO (SET_DEST (set))] = ref_insn;
+	}
+      if (GET_CODE (insn) == CALL_INSN)
+	{
+	  last_call = insn;
+	  mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block);
+	}
+      mark_use (PATTERN (insn), reg_set_block);
+    }
+  return 0;
+}
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
new file mode 100755
index 0000000..eff316a
--- /dev/null
+++ b/gcc/config/sh/sh.h
@@ -0,0 +1,2232 @@
+/* Definitions of target machine for GNU compiler for Hitachi Super-H.
+   Copyright (C) 1993-1998 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+#define TARGET_VERSION \
+  fputs (" (Hitachi SH)", stderr);
+
+/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h.  We can't
+  include it here, because hconfig.h is also included by gencodes.c .  */
+extern int code_for_indirect_jump_scratch;
+
+/* Generate SDB debugging information.  */
+
+#define SDB_DEBUGGING_INFO
+
+/* Output DBX (stabs) debugging information if doing -gstabs.  */
+
+#include "dbxcoff.h"
+
+#define SDB_DELIM ";"
+
+#define CPP_SPEC "%{ml:-D__LITTLE_ENDIAN__} \
+%{m1:-D__sh1__} \
+%{m2:-D__sh2__} \
+%{m3:-D__sh3__} \
+%{m3e:-D__SH3E__} \
+%{m4-single-only:-D__SH4_SINGLE_ONLY__} \
+%{m4-single:-D__SH4_SINGLE__} \
+%{m4:-D__SH4__} \
+%{!m1:%{!m2:%{!m3:%{!m3e:%{!m4:%{!m4-single:%{!m4-single-only:-D__sh1__}}}}}}}"
+
+#define CPP_PREDEFINES "-D__sh__ -Acpu(sh) -Amachine(sh)"
+
+#define ASM_SPEC  "%{ml:-little} %{mrelax:-relax}"
+
+#define LINK_SPEC "%{ml:-m shl} %{mrelax:-relax}"
+
+/* We can not debug without a frame pointer.  */
+/* #define CAN_DEBUG_WITHOUT_FP */
+
+#define CONDITIONAL_REGISTER_USAGE					\
+  if (! TARGET_SH4 || ! TARGET_FMOVD)					\
+    {									\
+      int regno;							\
+      for (regno = FIRST_XD_REG; regno <= LAST_XD_REG; regno++)		\
+	fixed_regs[regno] = call_used_regs[regno] = 1;			\
+      if (! TARGET_SH4)							\
+	{								\
+	  if (! TARGET_SH3E)						\
+	    {								\
+	      int regno;						\
+	      for (regno = FIRST_FP_REG; regno <= LAST_FP_REG; regno++)	\
+		fixed_regs[regno] = call_used_regs[regno] = 1;		\
+	      fixed_regs[FPUL_REG] = call_used_regs[FPUL_REG] = 1;	\
+	    }								\
+	}								\
+    }									\
+  /* Hitachi saves and restores mac registers on call.  */		\
+  if (TARGET_HITACHI)							\
+    {									\
+      call_used_regs[MACH_REG] = 0;					\
+      call_used_regs[MACL_REG] = 0;					\
+    }
+
+/* ??? Need to write documentation for all SH options and add it to the
+   invoke.texi file.  */
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+extern int target_flags;
+#define ISIZE_BIT      	(1<<1)
+#define DALIGN_BIT     	(1<<6)
+#define SH1_BIT	       	(1<<8)
+#define SH2_BIT	       	(1<<9)
+#define SH3_BIT	       	(1<<10)
+#define SH3E_BIT	(1<<11)
+#define HARD_SH4_BIT	(1<<5)
+#define FPU_SINGLE_BIT	(1<<7)
+#define SH4_BIT	       	(1<<12)
+#define FMOVD_BIT	(1<<4)
+#define SPACE_BIT 	(1<<13)
+#define BIGTABLE_BIT  	(1<<14)
+#define RELAX_BIT	(1<<15)
+#define HITACHI_BIT     (1<<22)
+#define PADSTRUCT_BIT  (1<<28)
+#define LITTLE_ENDIAN_BIT (1<<29)
+#define IEEE_BIT (1<<30)
+
+/* Nonzero if we should dump out instruction size info.  */
+#define TARGET_DUMPISIZE  (target_flags & ISIZE_BIT)
+
+/* Nonzero to align doubles on 64 bit boundaries.  */
+#define TARGET_ALIGN_DOUBLE (target_flags & DALIGN_BIT)
+
+/* Nonzero if we should generate code using type 1 insns.  */
+#define TARGET_SH1 (target_flags & SH1_BIT)
+
+/* Nonzero if we should generate code using type 2 insns.  */
+#define TARGET_SH2 (target_flags & SH2_BIT)
+
+/* Nonzero if we should generate code using type 3 insns.  */
+#define TARGET_SH3 (target_flags & SH3_BIT)
+
+/* Nonzero if we should generate code using type 3E insns.  */
+#define TARGET_SH3E (target_flags & SH3E_BIT)
+
+/* Nonzero if the cache line size is 32. */
+#define TARGET_CACHE32 (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if we schedule for a superscalar implementation. */
+#define TARGET_SUPERSCALAR (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if the target has separate instruction and data caches.  */
+#define TARGET_HARVARD (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if compiling for SH4 hardware (to be used for insn costs etc.)  */
+#define TARGET_HARD_SH4 (target_flags & HARD_SH4_BIT)
+
+/* Nonzero if the default precision of th FPU is single */
+#define TARGET_FPU_SINGLE (target_flags & FPU_SINGLE_BIT)
+
+/* Nonzero if we should generate code using type 4 insns.  */
+#define TARGET_SH4 (target_flags & SH4_BIT)
+
+/* Nonzero if we should generate fmovd.  */
+#define TARGET_FMOVD (target_flags & FMOVD_BIT)
+
+/* Nonzero if we respect NANs.  */
+#define TARGET_IEEE (target_flags & IEEE_BIT)
+
+/* Nonzero if we should generate smaller code rather than faster code.  */
+#define TARGET_SMALLCODE   (target_flags & SPACE_BIT)
+
+/* Nonzero to use long jump tables.  */
+#define TARGET_BIGTABLE     (target_flags & BIGTABLE_BIT)
+
+/* Nonzero to generate pseudo-ops needed by the assembler and linker
+   to do function call relaxing.  */
+#define TARGET_RELAX (target_flags & RELAX_BIT)
+
+/* Nonzero if using Hitachi's calling convention.  */
+#define TARGET_HITACHI 		(target_flags & HITACHI_BIT)
+
+/* Nonzero if padding structures to a multiple of 4 bytes.  This is
+   incompatible with Hitachi's compiler, and gives unusual structure layouts
+   which confuse programmers.
+   ??? This option is not useful, but is retained in case there are people
+   who are still relying on it.  It may be deleted in the future.  */
+#define TARGET_PADSTRUCT       (target_flags & PADSTRUCT_BIT)
+
+/* Nonzero if generating code for a little endian SH.  */
+#define TARGET_LITTLE_ENDIAN     (target_flags & LITTLE_ENDIAN_BIT)
+
+#define TARGET_SWITCHES  			\
+{ {"1",	        SH1_BIT},			\
+  {"2",	        SH2_BIT},			\
+  {"3",	        SH3_BIT|SH2_BIT},		\
+  {"3e",	SH3E_BIT|SH3_BIT|SH2_BIT|FPU_SINGLE_BIT},	\
+  {"4-single-only",	SH3E_BIT|SH3_BIT|SH2_BIT|SH3E_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT},	\
+  {"4-single",	SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT},\
+  {"4",	        SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT},	\
+  {"b",		-LITTLE_ENDIAN_BIT},  		\
+  {"bigtable", 	BIGTABLE_BIT},			\
+  {"dalign",  	DALIGN_BIT},			\
+  {"fmovd",  	FMOVD_BIT},			\
+  {"hitachi",	HITACHI_BIT},			\
+  {"ieee",  	IEEE_BIT},			\
+  {"isize", 	ISIZE_BIT},			\
+  {"l",		LITTLE_ENDIAN_BIT},  		\
+  {"no-ieee",  	-IEEE_BIT},			\
+  {"padstruct", PADSTRUCT_BIT},    		\
+  {"relax",	RELAX_BIT},			\
+  {"space", 	SPACE_BIT},			\
+  SUBTARGET_SWITCHES                            \
+  {"",   	TARGET_DEFAULT} 		\
+}
+
+/* This are meant to be redefined in the host dependent files */
+#define SUBTARGET_SWITCHES
+
+#define TARGET_DEFAULT  (0)
+
+#define OPTIMIZATION_OPTIONS(LEVEL,SIZE)				\
+do {									\
+  if (LEVEL)								\
+    flag_omit_frame_pointer = -1;					\
+  if (LEVEL)								\
+    sh_flag_remove_dead_before_cse = 1;					\
+  if (SIZE)								\
+    target_flags |= SPACE_BIT;						\
+} while (0)
+
+#define ASSEMBLER_DIALECT assembler_dialect
+
+extern int assembler_dialect;
+
+#define OVERRIDE_OPTIONS 						\
+do {									\
+  sh_cpu = CPU_SH1;							\
+  assembler_dialect = 0;						\
+  if (TARGET_SH2)							\
+    sh_cpu = CPU_SH2;							\
+  if (TARGET_SH3)							\
+    sh_cpu = CPU_SH3;							\
+  if (TARGET_SH3E)							\
+    sh_cpu = CPU_SH3E;							\
+  if (TARGET_SH4)							\
+    {									\
+      assembler_dialect = 1;						\
+      sh_cpu = CPU_SH4;							\
+    }									\
+  if (! TARGET_SH4 || ! TARGET_FMOVD)					\
+    {									\
+      /* Prevent usage of explicit register names for variables		\
+	 for registers not present / not addressable in the		\
+	 target architecture.  */					\
+      int regno;							\
+      for (regno = (TARGET_SH3E) ? 17 : 0; 				\
+	   regno <= 24; regno++)					\
+	fp_reg_names[regno][0] = 0;					\
+    }									\
+  if (flag_omit_frame_pointer < 0)					\
+   /* The debugging information is sufficient,				\
+      but gdb doesn't implement this yet */				\
+   if (0)								\
+    flag_omit_frame_pointer						\
+      = (PREFERRED_DEBUGGING_TYPE == DWARF_DEBUG			\
+	 || PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);			\
+   else									\
+    flag_omit_frame_pointer = 0;					\
+									\
+  /* Never run scheduling before reload, since that can			\
+     break global alloc, and generates slower code anyway due		\
+     to the pressure on R0.  */						\
+  flag_schedule_insns = 0;						\
+  sh_addr_diff_vec_mode = TARGET_BIGTABLE ? SImode : HImode;		\
+} while (0)
+
+/* Target machine storage layout.  */
+
+/* Define to use software floating point emulator for REAL_ARITHMETIC and
+   decimal <-> binary conversion.  */
+#define REAL_ARITHMETIC
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this to set the endianness to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#if defined(__LITTLE_ENDIAN__)
+#define LIBGCC2_WORDS_BIG_ENDIAN 0
+#else
+#define LIBGCC2_WORDS_BIG_ENDIAN 1
+#endif
+
+/* Number of bits in an addressable storage unit.  */
+#define BITS_PER_UNIT  8
+
+/* Width in bits of a "word", which is the contents of a machine register.
+   Note that this is not necessarily the width of data type `int';
+   if using 16-bit ints on a 68000, this would still be 32.
+   But on a machine with 16-bit registers, this would be 16.  */
+#define BITS_PER_WORD  32
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD	4
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE  32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY  	32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY  BIGGEST_ALIGNMENT
+
+/* The log (base 2) of the cache line size, in bytes.  Processors prior to
+   SH3 have no actual cache, but they fetch code in chunks of 4 bytes.  */
+#define CACHE_LOG (TARGET_CACHE32 ? 5 : TARGET_SH3 ? 4 : 2)
+
+/* Allocation boundary (in *bits*) for the code of a function.
+   32 bit alignment is faster, because instructions are always fetched as a
+   pair from a longword boundary.  */
+#define FUNCTION_BOUNDARY  (TARGET_SMALLCODE ? 16 : (1 << CACHE_LOG) * 8)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT  (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)	\
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+    ? FASTEST_ALIGNMENT : (ALIGN))
+
+#ifndef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT 128
+#endif
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Number of bits which any structure or union's size must be a
+   multiple of.  Each structure or union's size is rounded up to a
+   multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm.  */
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
+  barrier_align (LABEL_AFTER_BARRIER)
+
+#define LOOP_ALIGN(A_LABEL) \
+  ((! optimize || TARGET_HARVARD || TARGET_SMALLCODE) \
+   ? 0 : sh_loop_align (A_LABEL))
+
+#define LABEL_ALIGN(A_LABEL) \
+(									\
+  (PREV_INSN (A_LABEL)							\
+   && GET_CODE (PREV_INSN (A_LABEL)) == INSN				\
+   && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE	\
+   && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == 1)			\
+   /* explicit alignment insn in constant tables. */			\
+  ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0))		\
+  : 0)
+
+/* Jump tables must be 32 bit aligned, no matter the size of the element.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* The base two logarithm of the known minimum alignment of an insn length.  */
+#define INSN_LENGTH_ALIGNMENT(A_INSN)					\
+  (GET_CODE (A_INSN) == INSN						\
+   ? 1									\
+   : GET_CODE (A_INSN) == JUMP_INSN || GET_CODE (A_INSN) == CALL_INSN	\
+   ? 1									\
+   : CACHE_LOG)
+
+/* Standard register usage.  */
+
+/* Register allocation for the Hitachi calling convention:
+
+        r0		arg return
+	r1..r3          scratch
+	r4..r7		args in
+	r8..r13		call saved
+	r14		frame pointer/call saved
+	r15		stack pointer
+	ap		arg pointer (doesn't really exist, always eliminated)
+	pr		subroutine return address
+	t               t bit
+	mach		multiply/accumulate result, high part
+	macl		multiply/accumulate result, low part.
+	fpul		fp/int communication register
+	rap		return address pointer register
+	fr0		fp arg return
+	fr1..fr3	scratch floating point registers
+	fr4..fr11	fp args in
+	fr12..fr15	call saved floating point registers  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define AP_REG   16
+#define PR_REG   17
+#define T_REG    18
+#define GBR_REG  19
+#define MACH_REG 20
+#define MACL_REG 21
+#define SPECIAL_REG(REGNO) ((REGNO) >= 18 && (REGNO) <= 21)
+#define FPUL_REG 22
+#define RAP_REG 23
+#define FIRST_FP_REG 24
+#define LAST_FP_REG 39
+#define FIRST_XD_REG 40
+#define LAST_XD_REG 47
+#define FPSCR_REG 48
+
+#define FIRST_PSEUDO_REGISTER 49
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   Mach register is fixed 'cause it's only 10 bits wide for SH1.
+   It is 32 bits wide for SH2.  */
+
+#define FIXED_REGISTERS  	\
+  { 0,  0,  0,  0, 		\
+    0,  0,  0,  0, 		\
+    0,  0,  0,  0, 		\
+    0,  0,  0,  1, 		\
+    1,  1,  1,  1, 		\
+    1,  1,  0,  1,		\
+    0,  0,  0,  0,		\
+    0,  0,  0,  0,		\
+    0,  0,  0,  0,		\
+    0,  0,  0,  0,		\
+    0,  0,  0,  0,		\
+    0,  0,  0,  0,		\
+    1,				\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS 	\
+  { 1,  1,  1,  1,		\
+    1,  1,  1,  1, 		\
+    0,  0,  0,  0,		\
+    0,  0,  0,  1,		\
+    1,  0,  1,  1,		\
+    1,  1,  1,  1,		\
+    1,  1,  1,  1,		\
+    1,  1,  1,  1,		\
+    1,  1,  1,  1,		\
+    0,  0,  0,  0,		\
+    1,  1,  1,  1,		\
+    1,  1,  0,  0,		\
+    1,				\
+}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the SH all but the XD regs are UNITS_PER_WORD bits wide.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+   ((REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \
+    ? (GET_MODE_SIZE (MODE) / (2 * UNITS_PER_WORD)) \
+    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) \
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   We can allow any mode in any general register.  The special registers
+   only allow SImode.  Don't allow any mode in the PR.  */
+
+/* We cannot hold DCmode values in the XD registers because alter_reg
+   handles subregs of them incorrectly.  We could work around this by
+   spacing the XD registers like the DR registers, but this would require
+   additional memory in every compilation to hold larger register vectors.
+   We could hold SFmode / SCmode values in XD registers, but that
+   would require a tertiary reload when reloading from / to memory,
+   and a secondary reload to reload from / to general regs; that
+   seems to be a loosing proposition.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)		\
+  (SPECIAL_REG (REGNO) ? (MODE) == SImode	\
+   : (REGNO) == FPUL_REG ? (MODE) == SImode || (MODE) == SFmode	\
+   : (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG && (MODE) == SFmode \
+   ? 1 \
+   : (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG \
+   ? ((MODE) == SFmode \
+      || (TARGET_SH3E && (MODE) == SCmode) \
+      || (((TARGET_SH4 && (MODE) == DFmode) || (MODE) == DCmode) \
+	  && (((REGNO) - FIRST_FP_REG) & 1) == 0)) \
+   : (REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \
+   ? (MODE) == DFmode \
+   : (REGNO) == PR_REG ? 0			\
+   : (REGNO) == FPSCR_REG ? (MODE) == PSImode \
+   : 1)
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2) || GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Define this if the program counter is overloaded on a register.  */
+/* #define PC_REGNUM		15*/
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM	15
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM	14
+
+/* Fake register that holds the address on the stack of the
+   current function's return address.  */
+#define RETURN_ADDRESS_POINTER_REGNUM 23
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may be accessed
+   via the stack pointer) in functions that seem suitable.  */
+
+#define FRAME_POINTER_REQUIRED	0
+
+/* Definitions for register eliminations.
+
+   We have three registers that can be eliminated on the SH.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.
+   Third, there is the return address pointer, which can also be replaced
+   with either the stack or the frame pointer.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+
+/* If you add any registers here that are not actually hard registers,
+   and that have any alternative of elimination that doesn't always
+   apply, you need to amend calc_live_regs to exclude it, because
+   reload spills all eliminable registers where it sees an
+   can_eliminate == 0 entry, thus making them 'live' .
+   If you add any hard registers that can be eliminated in different
+   ways, you have to patch reload to spill them only when all alternatives
+   of elimination fail.  */
+
+#define ELIMINABLE_REGS						\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},}
+
+/* Given FROM and TO register numbers, say whether this elimination
+   is allowed.  */
+#define CAN_ELIMINATE(FROM, TO) \
+  (!((FROM) == FRAME_POINTER_REGNUM && FRAME_POINTER_REQUIRED))
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = initial_elimination_offset ((FROM), (TO))
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	16
+
+/* Register in which the static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	13
+
+/* The register in which a struct value address is passed.  */
+
+#define STRUCT_VALUE_REGNUM 2
+
+/* If the structure value address is not passed in a register, define
+   `STRUCT_VALUE' as an expression returning an RTX for the place
+   where the address is passed.  If it returns 0, the address is
+   passed as an "invisible" first argument.  */
+
+/*#define STRUCT_VALUE ((rtx)0)*/
+
+/* Don't default to pcc-struct-return, because we have already specified
+   exactly how to return structures in the RETURN_IN_MEMORY macro.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The SH has two sorts of general registers, R0 and the rest.  R0 can
+   be used as the destination of some of the arithmetic ops. There are
+   also some special purpose registers; the T bit register, the
+   Procedure Return Register and the Multiply Accumulate Registers.  */
+/* Place GENERAL_REGS after FPUL_REGS so that it will be preferred by
+   reg_class_subunion.  We don't want to have an actual union class
+   of these, because it would only be used when both classes are calculated
+   to give the same cost, but there is only one FPUL register.
+   Besides, regclass fails to notice the different REGISTER_MOVE_COSTS
+   applying to the actual instruction alternative considered.  E.g., the
+   y/r alternative of movsi_ie is considered to have no more cost that
+   the r/r alternative, which is patently untrue.  */
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  PR_REGS,
+  T_REGS,
+  MAC_REGS,
+  FPUL_REGS,
+  GENERAL_REGS,
+  FP0_REGS,
+  FP_REGS,
+  DF_REGS,
+  FPSCR_REGS,
+  GENERAL_FP_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES	\
+{			\
+  "NO_REGS",		\
+  "R0_REGS",		\
+  "PR_REGS",		\
+  "T_REGS",		\
+  "MAC_REGS",		\
+  "FPUL_REGS",		\
+  "GENERAL_REGS",	\
+  "FP0_REGS",		\
+  "FP_REGS",		\
+  "DF_REGS",		\
+  "FPSCR_REGS",		\
+  "GENERAL_FP_REGS",	\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS				\
+{							\
+  { 0x00000000, 0x00000000 }, /* NO_REGS	*/	\
+  { 0x00000001, 0x00000000 }, /* R0_REGS	*/	\
+  { 0x00020000, 0x00000000 }, /* PR_REGS	*/	\
+  { 0x00040000, 0x00000000 }, /* T_REGS		*/	\
+  { 0x00300000, 0x00000000 }, /* MAC_REGS	*/	\
+  { 0x00400000, 0x00000000 }, /* FPUL_REGS	*/	\
+  { 0x0081FFFF, 0x00000000 }, /* GENERAL_REGS	*/	\
+  { 0x01000000, 0x00000000 }, /* FP0_REGS	*/	\
+  { 0xFF000000, 0x000000FF }, /* FP_REGS	*/	\
+  { 0xFF000000, 0x0000FFFF }, /* DF_REGS	*/	\
+  { 0x00000000, 0x00010000 }, /* FPSCR_REGS	*/	\
+  { 0xFF81FFFF, 0x0000FFFF }, /* GENERAL_FP_REGS */	\
+  { 0xFFFFFFFF, 0x0001FFFF }, /* ALL_REGS	*/	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern int regno_reg_class[];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)]
+
+/* When defined, the compiler allows registers explicitly used in the
+   rtl to be used as spill registers but prevents the compiler from
+   extending the lifetime of these registers.  */
+
+#define SMALL_REGISTER_CLASSES 1
+
+/* The order in which register should be allocated.  */
+/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo,
+   and GENERAL_FP_REGS the alternate class.  Since FP0 is likely to be
+   spilled or used otherwise, we better have the FP_REGS allocated first.  */
+#define REG_ALLOC_ORDER \
+  { 25,26,27,28,29,30,31,24,32,33,34,35,36,37,38,39,	\
+    40,41,42,43,44,45,46,47,48,				\
+    1,2,3,7,6,5,4,0,8,9,10,11,12,13,14,			\
+    22,15,16,17,18,19,20,21,23 }
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS  R0_REGS
+#define BASE_REG_CLASS	 GENERAL_REGS
+
+/* Get reg_class from a letter such as appears in the machine
+   description.  */
+extern enum reg_class reg_class_from_letter[];
+
+#define REG_CLASS_FROM_LETTER(C) \
+   ( (C) >= 'a' && (C) <= 'z' ? reg_class_from_letter[(C)-'a'] : NO_REGS )
+
+/* The letters I, J, K, L and M in a register constraint string
+   can be used to stand for particular ranges of immediate operands.
+   This macro defines what the ranges are.
+   C is the letter, and VALUE is a constant value.
+   Return 1 if VALUE is in the range specified by C.
+	I: arithmetic operand -127..128, as used in add, sub, etc
+	K: shift operand 1,2,8 or 16
+	L: logical operand 0..255, as used in and, or, etc.
+	M: constant 1
+	N: constant 0  */
+
+#define CONST_OK_FOR_I(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \
+			       && ((HOST_WIDE_INT)(VALUE)) <= 127)
+#define CONST_OK_FOR_K(VALUE) ((VALUE)==1||(VALUE)==2||(VALUE)==8||(VALUE)==16)
+#define CONST_OK_FOR_L(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+			       && ((HOST_WIDE_INT)(VALUE)) <= 255)
+#define CONST_OK_FOR_M(VALUE) ((VALUE)==1)
+#define CONST_OK_FOR_N(VALUE) ((VALUE)==0)
+#define CONST_OK_FOR_LETTER_P(VALUE, C)		\
+     ((C) == 'I' ? CONST_OK_FOR_I (VALUE)	\
+    : (C) == 'K' ? CONST_OK_FOR_K (VALUE)	\
+    : (C) == 'L' ? CONST_OK_FOR_L (VALUE)	\
+    : (C) == 'M' ? CONST_OK_FOR_M (VALUE)	\
+    : (C) == 'N' ? CONST_OK_FOR_N (VALUE)	\
+    : 0)
+
+/* Similar, but for floating constants, and defining letters G and H.
+   Here VALUE is the CONST_DOUBLE rtx itself.  */
+
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C)	\
+((C) == 'G' ? fp_zero_operand (VALUE)		\
+ : (C) == 'H' ? fp_one_operand (VALUE)		\
+ : (C) == 'F')
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) (CLASS)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS,MODE,X) \
+  ((((((CLASS) == FP_REGS || (CLASS) == FP0_REGS			\
+	|| (CLASS) == DF_REGS)						\
+      && (GET_CODE (X) == REG && REGNO (X) <= AP_REG))			\
+     || (((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS)		\
+	 && GET_CODE (X) == REG						\
+	 && REGNO (X) >= FIRST_FP_REG && REGNO (X) <= LAST_FP_REG))	\
+    && MODE == SFmode)							\
+   ? FPUL_REGS								\
+   : ((CLASS) == FPUL_REGS						\
+      && (GET_CODE (X) == MEM						\
+	  || (GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER)))\
+   ? GENERAL_REGS							\
+   : (((CLASS) == MAC_REGS || (CLASS) == PR_REGS)			\
+      && GET_CODE (X) == REG && REGNO (X) > 15				\
+      && (CLASS) != REGNO_REG_CLASS (REGNO (X)))			\
+   ? GENERAL_REGS : NO_REGS)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X)  \
+  ((((CLASS) == FP_REGS || (CLASS) == FP0_REGS || (CLASS) == DF_REGS)	\
+    && immediate_operand ((X), (MODE))					\
+    && ! ((fp_zero_operand (X) || fp_one_operand (X)) && (MODE) == SFmode))\
+   ? R0_REGS								\
+   : CLASS == FPUL_REGS && immediate_operand ((X), (MODE))		\
+   ? (GET_CODE (X) == CONST_INT && CONST_OK_FOR_I (INTVAL (X))		\
+      ? GENERAL_REGS							\
+      : R0_REGS)							\
+   : (CLASS == FPSCR_REGS						\
+      && ((GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER)	\
+	  || GET_CODE (X) == MEM && GET_CODE (XEXP ((X), 0)) == PLUS))	\
+   ? GENERAL_REGS							\
+   : SECONDARY_OUTPUT_RELOAD_CLASS((CLASS),(MODE),(X)))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+
+   On SH this is the size of MODE in words.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+     ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the size of the object.  */
+
+#define CLASS_CANNOT_CHANGE_SIZE	DF_REGS
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define the number of registers that can hold parameters.
+   These macros are used only in other macro definitions below.  */
+
+#define NPARM_REGS(MODE) \
+  (TARGET_SH3E && (MODE) == SFmode \
+   ? 8 \
+   : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? 8 \
+   : 4)
+
+#define FIRST_PARM_REG 4
+#define FIRST_RET_REG  0
+
+#define FIRST_FP_PARM_REG (FIRST_FP_REG + 4)
+#define FIRST_FP_RET_REG FIRST_FP_REG
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/*  Define this macro if the addresses of local variable slots are at
+    negative offsets from the frame pointer.
+
+    The SH only has positive indexes, so grow the frame up.  */
+/* #define FRAME_GROWS_DOWNWARD */
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.  */
+/* Don't define PUSH_ROUNDING, since the hardware doesn't do this.
+   When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to
+   do correct alignment.  */
+#if 0
+#define PUSH_ROUNDING(NPUSHED)  (((NPUSHED) + 3) & ~3)
+#endif
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+/* Value is the number of byte of arguments automatically
+   popped when returning from a subroutine call.
+   FUNDECL is the declaration node of the function (as a tree),
+   FUNTYPE is the data type of the function (as a tree),
+   or for a library call it is an identifier node for the subroutine name.
+   SIZE is the number of bytes of arguments passed on the stack.
+
+   On the SH, the caller does not pop any of its arguments that were passed
+   on the stack.  */
+#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE)  0
+
+/* Nonzero if we do not know how to pass TYPE solely in registers.
+   Values that come in registers with inconvenient padding are stored
+   to memory at the function start.  */
+
+#define MUST_PASS_IN_STACK(MODE,TYPE)			\
+  ((TYPE) != 0						\
+   && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST	\
+       || TREE_ADDRESSABLE (TYPE)))
+/* Some subroutine macros specific to this machine. */
+
+#define BASE_RETURN_VALUE_REG(MODE) \
+  ((TARGET_SH3E && ((MODE) == SFmode))			\
+   ? FIRST_FP_RET_REG					\
+   : TARGET_SH3E && (MODE) == SCmode		\
+   ? FIRST_FP_RET_REG					\
+   : (TARGET_SH4					\
+      && ((MODE) == DFmode || (MODE) == SFmode		\
+	  || (MODE) == DCmode || (MODE) == SCmode ))	\
+   ? FIRST_FP_RET_REG					\
+   : FIRST_RET_REG)
+
+#define BASE_ARG_REG(MODE) \
+  ((TARGET_SH3E && ((MODE) == SFmode))			\
+   ? FIRST_FP_PARM_REG					\
+   : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT	\
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\
+   ? FIRST_FP_PARM_REG					\
+   : FIRST_PARM_REG)
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.
+   For the SH, this is like LIBCALL_VALUE, except that we must change the
+   mode like PROMOTE_MODE does.
+   ??? PROMOTE_MODE is ignored for non-scalar types.  The set of types
+   tested here has to be kept in sync with the one in explow.c:promote_mode.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+  gen_rtx (REG,								\
+	   ((GET_MODE_CLASS (TYPE_MODE (VALTYPE)) == MODE_INT		\
+	     && GET_MODE_SIZE (TYPE_MODE (VALTYPE)) < UNITS_PER_WORD	\
+	     && (TREE_CODE (VALTYPE) == INTEGER_TYPE			\
+		 || TREE_CODE (VALTYPE) == ENUMERAL_TYPE		\
+		 || TREE_CODE (VALTYPE) == BOOLEAN_TYPE			\
+		 || TREE_CODE (VALTYPE) == CHAR_TYPE			\
+		 || TREE_CODE (VALTYPE) == REAL_TYPE			\
+		 || TREE_CODE (VALTYPE) == OFFSET_TYPE))		\
+	    ? SImode : TYPE_MODE (VALTYPE)),				\
+	   BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
+     
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE) \
+  gen_rtx (REG, (MODE), BASE_RETURN_VALUE_REG (MODE))
+
+/* 1 if N is a possible register number for a function value. */
+#define FUNCTION_VALUE_REGNO_P(REGNO) \
+  ((REGNO) == FIRST_RET_REG || (TARGET_SH3E && (REGNO) == FIRST_FP_RET_REG))
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  (((REGNO) >= FIRST_PARM_REG && (REGNO) < (FIRST_PARM_REG + 4))        \
+   || (TARGET_SH3E                                                      \
+       && (REGNO) >= FIRST_FP_PARM_REG && (REGNO) < (FIRST_FP_PARM_REG + 8)))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On SH, this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus NARGREGS or more means all following args should go on the stack.  */
+
+enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 };
+struct sh_args {
+    int arg_count[2];
+};
+
+#define CUMULATIVE_ARGS  struct sh_args
+
+#define GET_SH_ARG_CLASS(MODE) \
+  ((TARGET_SH3E && (MODE) == SFmode) \
+   ? SH_ARG_FLOAT \
+   : TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? SH_ARG_FLOAT : SH_ARG_INT)
+
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode
+   MODE.
+
+   The SH doesn't care about double alignment, so we only
+   round doubles to even regs when asked to explicitly.  */
+
+#define ROUND_REG(CUM, MODE) \
+   (((TARGET_ALIGN_DOUBLE					\
+      || (TARGET_SH4 && ((MODE) == DFmode || (MODE) == DCmode)	\
+	  && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))\
+     && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD)		\
+    ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]		\
+       + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1))	\
+    : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)])
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On SH, the offset always starts at 0: the first parm reg is always
+   the same reg for a given argument class.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT) \
+  do {								\
+    (CUM).arg_count[(int) SH_ARG_INT] = 0;			\
+    (CUM).arg_count[(int) SH_ARG_FLOAT] = 0;			\
+  } while (0)
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be
+   available.)  */
+
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED)	\
+ if (! TARGET_SH4 || PASS_IN_REG_P ((CUM), (MODE), (TYPE))) \
+   ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]	\
+    = (ROUND_REG ((CUM), (MODE))			\
+       + ((MODE) == BLKmode				\
+	  ? ROUND_ADVANCE (int_size_in_bytes (TYPE))	\
+	  : ROUND_ADVANCE (GET_MODE_SIZE (MODE)))))
+
+/* Return boolean indicating arg of mode MODE will be passed in a reg.
+   This macro is only used in this file. */
+
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (((TYPE) == 0 || ! TREE_ADDRESSABLE ((tree)(TYPE))) \
+   && (TARGET_SH3E \
+       ? ((MODE) == BLKmode \
+	  ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \
+	      + int_size_in_bytes (TYPE)) \
+	     <= NPARM_REGS (SImode) * UNITS_PER_WORD) \
+	  : ((ROUND_REG((CUM), (MODE)) \
+	      + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \
+	     <= NPARM_REGS (MODE))) \
+       : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE)))
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On SH the first args are normally in registers
+   and the rest are pushed.  Any arg that starts within the first
+   NPARM_REGS words is at least partially passed in a register unless
+   its data type forbids.  */
+
+extern int current_function_varargs;
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+  ((PASS_IN_REG_P ((CUM), (MODE), (TYPE))				\
+    && ((NAMED) || TARGET_SH3E || ! current_function_varargs))		\
+   ? gen_rtx (REG, (MODE),						\
+	      ((BASE_ARG_REG (MODE) + ROUND_REG ((CUM), (MODE))) 	\
+	       ^ ((MODE) == SFmode && TARGET_SH4			\
+		  && TARGET_LITTLE_ENDIAN != 0)))			\
+   : 0)
+
+/* For an arg passed partly in registers and partly in memory,
+   this is the number of registers used.
+   For args passed entirely in registers or entirely in memory, zero.
+
+   We sometimes split args.  */
+
+#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
+  ((PASS_IN_REG_P ((CUM), (MODE), (TYPE))			\
+    && ! TARGET_SH4						\
+    && (ROUND_REG ((CUM), (MODE))				\
+	+ ((MODE) != BLKmode					\
+	   ? ROUND_ADVANCE (GET_MODE_SIZE (MODE))		\
+	   : ROUND_ADVANCE (int_size_in_bytes (TYPE)))		\
+	- NPARM_REGS (MODE) > 0))				\
+   ? NPARM_REGS (MODE) - ROUND_REG ((CUM), (MODE))		\
+   : 0)
+
+extern int current_function_anonymous_args;
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+#define SETUP_INCOMING_VARARGS(ASF, MODE, TYPE, PAS, ST) \
+  current_function_anonymous_args = 1;
+
+/* Call the function profiler with a given profile label.
+   We use two .aligns, so as to make sure that both the .long is aligned
+   on a 4 byte boundary, and that the .long is a fixed distance (2 bytes)
+   from the trapa instruction.  */
+
+#define FUNCTION_PROFILER(STREAM,LABELNO)			\
+{								\
+	fprintf((STREAM), "\t.align\t2\n");			\
+	fprintf((STREAM), "\ttrapa\t#33\n");			\
+ 	fprintf((STREAM), "\t.align\t2\n");			\
+	asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+}
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+
+#define PROFILE_BEFORE_PROLOGUE
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Generate the assembly code for function exit
+   Just dump out any accumulated constant table.  */
+
+#define FUNCTION_EPILOGUE(STREAM, SIZE)  function_epilogue ((STREAM), (SIZE))
+
+/* 
+   On the SH, the trampoline looks like
+   2 0002 DD02     	   	mov.l	l2,r13
+   1 0000 D301     		mov.l	l1,r3
+   3 0004 4D2B     		jmp	@r13
+   4 0006 0009     		nop
+   5 0008 00000000 	l1:  	.long   function
+   6 000c 00000000 	l2:	.long   area  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  16
+
+/* Alignment required for a trampoline in bits .  */
+#define TRAMPOLINE_ALIGNMENT \
+  ((CACHE_LOG < 3 || TARGET_SMALLCODE && ! TARGET_HARVARD) ? 32 : 64)
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT)			\
+{									\
+  emit_move_insn (gen_rtx (MEM, SImode, (TRAMP)),			\
+		  GEN_INT (TARGET_LITTLE_ENDIAN ? 0xd301dd02 : 0xdd02d301));\
+  emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 4)),	\
+		  GEN_INT (TARGET_LITTLE_ENDIAN ? 0x00094d2b : 0x4d2b0009));\
+  emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 8)),	\
+		  (CXT));						\
+  emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 12)),	\
+		  (FNADDR));						\
+  if (TARGET_HARVARD)							\
+    emit_insn (gen_ic_invalidate_line (TRAMP));				\
+}
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, so we
+   can ignore COUNT.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)	\
+  (((COUNT) == 0)				\
+   ? gen_rtx (MEM, Pmode, gen_rtx (REG, Pmode, RETURN_ADDRESS_POINTER_REGNUM)) \
+   : (rtx) 0)
+
+/* Generate necessary RTL for __builtin_saveregs().
+   ARGLIST is the argument list; see expr.c.  */
+extern struct rtx_def *sh_builtin_saveregs ();
+#define EXPAND_BUILTIN_SAVEREGS(ARGLIST) sh_builtin_saveregs (ARGLIST)
+
+/* Addressing modes, and classification of registers for them.  */
+#define HAVE_POST_INCREMENT  1
+/*#define HAVE_PRE_INCREMENT   1*/
+/*#define HAVE_POST_DECREMENT  1*/
+#define HAVE_PRE_DECREMENT   1
+
+#define USE_LOAD_POST_INCREMENT(mode)    ((mode == SImode || mode == DImode) \
+                                           ? 0 : 1)
+#define USE_LOAD_PRE_DECREMENT(mode)     0
+#define USE_STORE_POST_INCREMENT(mode)   0
+#define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
+                                           ? 0 : 1)
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN)  (move_by_pieces_ninsns (SIZE, ALIGN) \
+                                        < (TARGET_SMALLCODE ? 2 :           \
+                                           ((ALIGN >= 4) ? 16 : 2)))
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  ((REGNO) < PR_REG || (unsigned) reg_renumber[(REGNO)] < PR_REG)
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+  ((REGNO) == 0 || (unsigned) reg_renumber[(REGNO)] == 0)
+
+/* Maximum number of registers that can appear in a valid memory
+   address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X)	(GET_CODE (X) == LABEL_REF)
+
+/* Nonzero if the constant value X is a legitimate general operand.  */
+
+#define LEGITIMATE_CONSTANT_P(X) \
+  (GET_CODE (X) != CONST_DOUBLE						\
+   || GET_MODE (X) == DFmode || GET_MODE (X) == SFmode			\
+   || (TARGET_SH3E && (fp_zero_operand (X) || fp_one_operand (X))))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+  (REGNO (X) <= 16 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) \
+  (REGNO (X) == 0 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X/OFFSET is a hard reg that can be used as an index
+   or if X is a pseudo reg.  */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET) \
+  ((REGNO (X) == 0 && OFFSET == 0) || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+  REGNO_OK_FOR_BASE_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) \
+  REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+/* Nonzero if X/OFFSET is a hard reg that can be used as an index.  */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET) \
+  (REGNO_OK_FOR_INDEX_P (REGNO (X)) && (OFFSET) == 0)
+
+#endif
+
+/* The 'Q' constraint is a pc relative load operand.  */
+#define EXTRA_CONSTRAINT_Q(OP)                          		\
+  (GET_CODE (OP) == MEM && 						\
+   ((GET_CODE (XEXP ((OP), 0)) == LABEL_REF)				\
+    || (GET_CODE (XEXP ((OP), 0)) == CONST                		\
+	&& GET_CODE (XEXP (XEXP ((OP), 0), 0)) == PLUS 			\
+	&& GET_CODE (XEXP (XEXP (XEXP ((OP), 0), 0), 0)) == LABEL_REF	\
+	&& GET_CODE (XEXP (XEXP (XEXP ((OP), 0), 0), 1)) == CONST_INT)))
+
+#define EXTRA_CONSTRAINT(OP, C)		\
+  ((C) == 'Q' ? EXTRA_CONSTRAINT_Q (OP)	\
+   : 0)
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS.  */
+
+#define MODE_DISP_OK_4(X,MODE) \
+(GET_MODE_SIZE (MODE) == 4 && (unsigned) INTVAL (X) < 64	\
+ && ! (INTVAL (X) & 3) && ! (TARGET_SH3E && (MODE) == SFmode))
+
+#define MODE_DISP_OK_8(X,MODE) \
+((GET_MODE_SIZE(MODE)==8) && ((unsigned)INTVAL(X)<60)	\
+ && ! (INTVAL(X) & 3) && ! (TARGET_SH4 && (MODE) == DFmode))
+
+#define BASE_REGISTER_RTX_P(X)				\
+  ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))	\
+   || (GET_CODE (X) == SUBREG				\
+       && GET_CODE (SUBREG_REG (X)) == REG		\
+       && REG_OK_FOR_BASE_P (SUBREG_REG (X))))
+
+/* Since this must be r0, which is a single register class, we must check
+   SUBREGs more carefully, to be sure that we don't accept one that extends
+   outside the class.  */
+#define INDEX_REGISTER_RTX_P(X)				\
+  ((GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X))	\
+   || (GET_CODE (X) == SUBREG				\
+       && GET_CODE (SUBREG_REG (X)) == REG		\
+       && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_WORD (X))))
+
+/* Jump to LABEL if X is a valid address RTX.  This must also take
+   REG_OK_STRICT into account when deciding about valid registers, but it uses
+   the above macros so we are in luck.
+
+   Allow  REG
+	  REG+disp
+	  REG+r0
+	  REG++
+	  --REG  */
+
+/* ??? The SH3e does not have the REG+disp addressing mode when loading values
+   into the FRx registers.  We implement this by setting the maximum offset
+   to zero when the value is SFmode.  This also restricts loading of SFmode
+   values into the integer registers, but that can't be helped.  */
+
+/* The SH allows a displacement in a QI or HI amode, but only when the
+   other operand is R0. GCC doesn't handle this very well, so we forgo
+   all of that.
+
+   A legitimate index for a QI or HI is 0, SI can be any number 0..63,
+   DI can be any number 0..60.  */
+
+#define GO_IF_LEGITIMATE_INDEX(MODE, OP, LABEL)  			\
+  do {									\
+    if (GET_CODE (OP) == CONST_INT) 					\
+      {									\
+	if (MODE_DISP_OK_4 ((OP), (MODE)))  goto LABEL;		      	\
+	if (MODE_DISP_OK_8 ((OP), (MODE)))  goto LABEL;		      	\
+      }									\
+  } while(0)
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)			\
+{									\
+  if (BASE_REGISTER_RTX_P (X))						\
+    goto LABEL;								\
+  else if ((GET_CODE (X) == POST_INC || GET_CODE (X) == PRE_DEC)	\
+	   && BASE_REGISTER_RTX_P (XEXP ((X), 0)))			\
+    goto LABEL;								\
+  else if (GET_CODE (X) == PLUS						\
+	   && ((MODE) != PSImode || reload_completed))			\
+    {									\
+      rtx xop0 = XEXP ((X), 0);						\
+      rtx xop1 = XEXP ((X), 1);						\
+      if (GET_MODE_SIZE (MODE) <= 8 && BASE_REGISTER_RTX_P (xop0))	\
+	GO_IF_LEGITIMATE_INDEX ((MODE), xop1, LABEL);			\
+      if (GET_MODE_SIZE (MODE) <= 4					\
+	  || TARGET_SH4 && TARGET_FMOVD && MODE == DFmode)	\
+	{								\
+	  if (BASE_REGISTER_RTX_P (xop1) && INDEX_REGISTER_RTX_P (xop0))\
+	    goto LABEL;							\
+	  if (INDEX_REGISTER_RTX_P (xop1) && BASE_REGISTER_RTX_P (xop0))\
+	    goto LABEL;							\
+	}								\
+    }									\
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   MODE and WIN are passed so that this macro can use
+   GO_IF_LEGITIMATE_ADDRESS.
+
+   It is always safe for this macro to do nothing.  It exists to recognize
+   opportunities to optimize the output.
+
+   For the SH, if X is almost suitable for indexing, but the offset is
+   out of range, convert it into a normal form so that cse has a chance
+   of reducing the number of address registers used.  */
+
+#define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN)			\
+{								\
+  if (GET_CODE (X) == PLUS					\
+      && (GET_MODE_SIZE (MODE) == 4				\
+	  || GET_MODE_SIZE (MODE) == 8)				\
+      && GET_CODE (XEXP ((X), 1)) == CONST_INT			\
+      && BASE_REGISTER_RTX_P (XEXP ((X), 0))			\
+      && ! (TARGET_SH4 && (MODE) == DFmode)			\
+      && ! (TARGET_SH3E && (MODE) == SFmode))			\
+    {								\
+      rtx index_rtx = XEXP ((X), 1);				\
+      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;	\
+      rtx sum;							\
+								\
+      GO_IF_LEGITIMATE_INDEX ((MODE), index_rtx, WIN);		\
+      /* On rare occasions, we might get an unaligned pointer	\
+	 that is indexed in a way to give an aligned address.	\
+	 Therefore, keep the lower two bits in offset_base.  */ \
+      /* Instead of offset_base 128..131 use 124..127, so that	\
+	 simple add suffices.  */				\
+      if (offset > 127)						\
+	{							\
+	  offset_base = ((offset + 4) & ~60) - 4;		\
+	}							\
+      else							\
+	offset_base = offset & ~60;				\
+      /* Sometimes the normal form does not suit DImode.  We	\
+	 could avoid that by using smaller ranges, but that	\
+	 would give less optimized code when SImode is		\
+	 prevalent.  */						\
+      if (GET_MODE_SIZE (MODE) + offset - offset_base <= 64)	\
+	{							\
+	  sum = expand_binop (Pmode, add_optab, XEXP ((X), 0),	\
+			      GEN_INT (offset_base), NULL_RTX, 0, \
+			      OPTAB_LIB_WIDEN);			\
+                                                                \
+	  (X) = gen_rtx (PLUS, Pmode, sum, GEN_INT (offset - offset_base)); \
+	  goto WIN;						\
+	}							\
+    }								\
+}
+
+/* A C compound statement that attempts to replace X, which is an address
+   that needs reloading, with a valid memory address for an operand of
+   mode MODE.  WIN is a C statement label elsewhere in the code.
+
+   Like for LEGITIMIZE_ADDRESS, for the SH we try to get a normal form
+   of the address.  That will allow inheritance of the address reloads.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	\
+{									\
+  if (GET_CODE (X) == PLUS						\
+      && (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8)	\
+      && GET_CODE (XEXP (X, 1)) == CONST_INT				\
+      && BASE_REGISTER_RTX_P (XEXP (X, 0))				\
+      && ! (TARGET_SH4 && (MODE) == DFmode)				\
+      && ! ((MODE) == PSImode && (TYPE) == RELOAD_FOR_INPUT_ADDRESS))	\
+    {									\
+      rtx index_rtx = XEXP (X, 1);					\
+      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;		\
+      rtx sum;								\
+									\
+      if (TARGET_SH3E && MODE == SFmode)				\
+	{								\
+	  X = copy_rtx (X);						\
+	  push_reload (index_rtx, NULL_RTX, &XEXP (X, 1), NULL_PTR,	\
+		       INDEX_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM),	\
+		       (TYPE));						\
+	  goto WIN;							\
+	}								\
+      /* Instead of offset_base 128..131 use 124..127, so that		\
+	 simple add suffices.  */					\
+      if (offset > 127)							\
+	{								\
+	  offset_base = ((offset + 4) & ~60) - 4;			\
+	}								\
+      else								\
+	offset_base = offset & ~60;					\
+      /* Sometimes the normal form does not suit DImode.  We		\
+	 could avoid that by using smaller ranges, but that		\
+	 would give less optimized code when SImode is			\
+	 prevalent.  */							\
+      if (GET_MODE_SIZE (MODE) + offset - offset_base <= 64)		\
+	{								\
+	  sum = gen_rtx (PLUS, Pmode, XEXP (X, 0),			\
+			 GEN_INT (offset_base));			\
+	  X = gen_rtx (PLUS, Pmode, sum, GEN_INT (offset - offset_base));\
+	  push_reload (sum, NULL_RTX, &XEXP (X, 0), NULL_PTR,	\
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM),	\
+		       (TYPE));						\
+	  goto WIN;							\
+	}								\
+    }									\
+  /* We must re-recognize what we created before.  */			\
+  else if (GET_CODE (X) == PLUS						\
+	   && (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8)	\
+	   && GET_CODE (XEXP (X, 0)) == PLUS				\
+	   && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT		\
+	   && BASE_REGISTER_RTX_P (XEXP (XEXP (X, 0), 0))		\
+	   && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	   && ! (TARGET_SH3E && MODE == SFmode))			\
+    {									\
+      /* Because this address is so complex, we know it must have	\
+	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,	\
+	 it is already unshared, and needs no further unsharing.  */	\
+      push_reload (XEXP ((X), 0), NULL_RTX, &XEXP ((X), 0), NULL_PTR,	\
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), (TYPE));\
+      goto WIN;								\
+    }									\
+}
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   ??? Strictly speaking, we should also include all indexed addressing,
+   because the index scale factor is the length of the operand.
+   However, the impact of GO_IF_MODE_DEPENDENT_ADDRESS would be to
+   high if we did that.  So we rely on reload to fix things up.  */
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL)			\
+{									\
+  if (GET_CODE(ADDR) == PRE_DEC || GET_CODE(ADDR) == POST_INC)		\
+    goto LABEL;								\
+}
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (TARGET_BIGTABLE ? SImode : HImode)
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \
+ : SImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses. */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Specify the tree operation to be used to convert reals to integers.  */
+#define IMPLICIT_FIX_EXPR  FIX_ROUND_EXPR
+
+/* This is the kind of divide that is easiest to do in the general case.  */
+#define EASY_DIV_EXPR  TRUNC_DIV_EXPR
+
+/* Since the SH3e has only `float' support, it is desirable to make all
+   floating point types equivalent to `float'.  */
+#define DOUBLE_TYPE_SIZE ((TARGET_SH3E && ! TARGET_SH4) ? 32 : 64)
+
+/* 'char' is signed by default.  */
+#define DEFAULT_SIGNED_CHAR  1
+
+/* The type of size_t unsigned int.  */
+#define SIZE_TYPE "unsigned int"
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Don't cse the address of the function being compiled.  */
+/*#define NO_RECURSIVE_FUNCTION_CSE 1*/
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Max number of bytes we want move_by_pieces to be able to copy
+   efficiently.  */
+#define MOVE_MAX_PIECES (TARGET_SH4 ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, NIL if none.  */
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Define this if zero-extension is slow (more than one real instruction).
+   On the SH, it's only one instruction.  */
+/* #define SLOW_ZERO_EXTEND */
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 0
+
+/* We assume that the store-condition-codes instructions store 0 for false
+   and some other value for true.  This is the value stored for true.  */
+
+#define STORE_FLAG_VALUE 1
+
+/* Immediate shift counts are truncated by the output routines (or was it
+   the assembler?).  Shift counts in a register are truncated by SH.  Note
+   that the native compiler puts too large (> 32) immediate shift counts
+   into a register and shifts by the register, letting the SH decide what
+   to do instead of doing that itself.  */
+/* ??? The library routines in lib1funcs.asm truncate the shift count.
+   However, the SH3 has hardware shifts that do not truncate exactly as gcc
+   expects - the sign bit is significant - so it appears that we need to
+   leave this zero for correct SH3 code.  */
+#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3)
+
+/* All integers have the same format so truncation is easy.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC)  1
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/*#define NO_FUNCTION_CSE 1*/
+
+/* Chars and shorts should be passed as ints.  */
+#define PROMOTE_PROTOTYPES 1
+
+/* The machine modes of pointers and functions.  */
+#define Pmode  SImode
+#define FUNCTION_MODE  Pmode
+
+/* The relative costs of various types of constants.  Note that cse.c defines
+   REG = 1, SUBREG = 2, any node = (2 + sum of subnodes).  */
+
+#define CONST_COSTS(RTX, CODE, OUTER_CODE)	\
+  case CONST_INT:				\
+    if (INTVAL (RTX) == 0)			\
+      return 0;					\
+    else if (CONST_OK_FOR_I (INTVAL (RTX)))	\
+      return 1;					\
+    else if (((OUTER_CODE) == AND || (OUTER_CODE) == IOR || (OUTER_CODE) == XOR) \
+	     && CONST_OK_FOR_L (INTVAL (RTX)))	\
+      return 1;					\
+    else					\
+      return 8;					\
+  case CONST: 					\
+  case LABEL_REF:				\
+  case SYMBOL_REF:				\
+    return 5;					\
+  case CONST_DOUBLE:				\
+      return 10;
+
+#define RTX_COSTS(X, CODE, OUTER_CODE)			\
+  case PLUS:						\
+    return (COSTS_N_INSNS (1)				\
+	    + rtx_cost (XEXP ((X), 0), PLUS)		\
+	    + (rtx_equal_p (XEXP ((X), 0), XEXP ((X), 1))\
+	       ? 0 : rtx_cost (XEXP ((X), 1), PLUS)));\
+  case AND:						\
+    return COSTS_N_INSNS (andcosts (X));		\
+  case MULT:						\
+    return COSTS_N_INSNS (multcosts (X));		\
+  case ASHIFT:						\
+  case ASHIFTRT:					\
+  case LSHIFTRT:					\
+    /* Add one extra unit for the matching constraint.	\
+       Otherwise loop strength reduction would think that\
+       a shift with different sourc and destination is	\
+       as cheap as adding a constant to a register.  */	\
+    return (COSTS_N_INSNS (shiftcosts (X))		\
+	    + rtx_cost (XEXP ((X), 0), (CODE))		\
+	    + 1);					\
+  case DIV:						\
+  case UDIV:						\
+  case MOD:						\
+  case UMOD:						\
+    return COSTS_N_INSNS (20);				\
+  case FLOAT:						\
+  case FIX:						\
+    return 100;
+
+/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2
+   are actually function calls with some special constraints on arguments
+   and register usage.
+
+   These macros tell reorg that the references to arguments and
+   register clobbers for insns of type sfunc do not appear to happen
+   until after the millicode call.  This allows reorg to put insns
+   which set the argument registers into the delay slot of the millicode
+   call -- thus they act more like traditional CALL_INSNs.
+
+   get_attr_is_sfunc will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+
+#define INSN_SETS_ARE_DELAYED(X) 		\
+  ((GET_CODE (X) == INSN			\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+#define INSN_REFERENCES_ARE_DELAYED(X) 		\
+  ((GET_CODE (X) == INSN			\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+/* Compute the cost of an address.  For the SH, all valid addresses are
+   the same cost.  */
+/* ??? Perhaps we should make reg+reg addresses have higher cost because
+   they add to register pressure on r0.  */
+
+#define ADDRESS_COST(RTX) 1
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+
+/* Regclass always uses 2 for moves in the same register class;
+   If SECONDARY*_RELOAD_CLASS says something about the src/dst pair,
+   it uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+#define REGISTER_MOVE_COST(SRCCLASS, DSTCLASS) \
+  ((((DSTCLASS) == T_REGS) || ((DSTCLASS) == PR_REG)) ? 10		\
+   : ((((DSTCLASS) == FP0_REGS || (DSTCLASS) == FP_REGS || (DSTCLASS) == DF_REGS) \
+       && ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS))	\
+      || (((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS)		\
+	  && ((SRCCLASS) == FP0_REGS || (SRCCLASS) == FP_REGS		\
+	      || (SRCCLASS) == DF_REGS)))				\
+   ? TARGET_FMOVD ? 8 : 12						\
+   : (((DSTCLASS) == FPUL_REGS						\
+       && ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS))	\
+      || (SRCCLASS == FPUL_REGS						\
+	  && ((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS)))	\
+   ? 5									\
+   : (((DSTCLASS) == FPUL_REGS						\
+       && ((SRCCLASS) == PR_REGS || (SRCCLASS) == MAC_REGS))		\
+      || ((SRCCLASS) == FPUL_REGS					\
+	  && ((DSTCLASS) == PR_REGS || (DSTCLASS) == MAC_REGS)))	\
+   ? 7									\
+   : 2)
+
+/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option?  This
+   would be so that people would slow memory systems could generate
+   different code that does fewer memory accesses.  */
+
+/* Assembler output control.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+#define ASM_COMMENT_START "!"
+
+/* The text to go at the start of the assembler file.  */
+#define ASM_FILE_START(STREAM) \
+  output_file_start (STREAM)
+
+#define ASM_FILE_END(STREAM)
+
+#define ASM_APP_ON  		""
+#define ASM_APP_OFF  		""
+#define FILE_ASM_OP 		"\t.file\n"
+#define IDENT_ASM_OP 		"\t.ident\n"
+#define SET_ASM_OP		".set"
+
+/* How to change between sections.  */
+
+#define TEXT_SECTION_ASM_OP  		"\t.text"
+#define DATA_SECTION_ASM_OP  		"\t.data"
+#define CTORS_SECTION_ASM_OP 		"\t.section\t.ctors\n"
+#define DTORS_SECTION_ASM_OP 		"\t.section\t.dtors\n"
+#define EXTRA_SECTIONS 			in_ctors, in_dtors
+#define EXTRA_SECTION_FUNCTIONS					\
+void								\
+ctors_section()							\
+{								\
+  if (in_section != in_ctors)					\
+    {								\
+      fprintf (asm_out_file, "%s\n", CTORS_SECTION_ASM_OP);	\
+      in_section = in_ctors;					\
+    }								\
+}								\
+void								\
+dtors_section()							\
+{								\
+  if (in_section != in_dtors)					\
+    {								\
+      fprintf (asm_out_file, "%s\n", DTORS_SECTION_ASM_OP);	\
+      in_section = in_dtors;					\
+    }								\
+}
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	".section\t.bss"
+#endif
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used
+   in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.
+
+   Try to use function `asm_output_aligned_bss' defined in file
+   `varasm.c' when defining this macro. */
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
+
+/* Define this so that jump tables go in same section as the current function,
+   which could be text or it could be a user defined section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* A C statement to output something to the assembler file to switch to section
+   NAME for object DECL which is either a FUNCTION_DECL, a VAR_DECL or
+   NULL_TREE.  Some target formats do not support arbitrary sections.  Do not
+   define this macro in such cases.  */
+
+#define ASM_OUTPUT_SECTION_NAME(FILE, DECL, NAME, RELOC) \
+   do { fprintf (FILE, ".section\t%s\n", NAME); } while (0)
+
+#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME) \
+   do { ctors_section();  asm_fprintf((FILE),"\t.long\t%U%s\n", (NAME)); } while (0)
+
+#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME) \
+   do {  dtors_section();  asm_fprintf((FILE),"\t.long\t%U%s\n", (NAME)); } while (0)
+
+#undef DO_GLOBAL_CTORS_BODY
+
+#define DO_GLOBAL_CTORS_BODY			\
+{						\
+  typedef (*pfunc)();				\
+  extern pfunc __ctors[];			\
+  extern pfunc __ctors_end[];			\
+  pfunc *p;					\
+  for (p = __ctors_end; p > __ctors; )		\
+    {						\
+      (*--p)();					\
+    }						\
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY			\
+{						\
+  typedef (*pfunc)();				\
+  extern pfunc __dtors[];			\
+  extern pfunc __dtors_end[];			\
+  pfunc *p;					\
+  for (p = __dtors; p < __dtors_end; p++)	\
+    {						\
+      (*p)();					\
+    }						\
+}
+
+#define ASM_OUTPUT_REG_PUSH(file, v) \
+  fprintf ((file), "\tmov.l\tr%s,-@r15\n", (v));
+
+#define ASM_OUTPUT_REG_POP(file, v) \
+  fprintf ((file), "\tmov.l\t@r15+,r%s\n", (v));
+
+/* The assembler's names for the registers.  RFP need not always be used as
+   the Real framepointer; it can also be used as a normal general register.
+   Note that the name `fp' is horribly misleading since `fp' is in fact only
+   the argument-and-return-context pointer.  */
+
+extern char fp_reg_names[][5];
+
+#define REGISTER_NAMES  				\
+{				                   	\
+  "r0", "r1", "r2",  "r3",  "r4",  "r5",  "r6",  "r7", 	\
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",	\
+  "ap", "pr", "t",   "gbr", "mach","macl", fp_reg_names[16], "rap", \
+  fp_reg_names[0],  fp_reg_names[1] , fp_reg_names[2],  fp_reg_names[3], \
+  fp_reg_names[4],  fp_reg_names[5],  fp_reg_names[6],  fp_reg_names[7], \
+  fp_reg_names[8],  fp_reg_names[9],  fp_reg_names[10], fp_reg_names[11], \
+  fp_reg_names[12], fp_reg_names[13], fp_reg_names[14], fp_reg_names[15], \
+  fp_reg_names[17], fp_reg_names[18], fp_reg_names[19], fp_reg_names[20], \
+  fp_reg_names[21], fp_reg_names[22], fp_reg_names[23], fp_reg_names[24], \
+  "fpscr", \
+}
+
+#define DEBUG_REGISTER_NAMES  				\
+{				                   	\
+  "r0", "r1", "r2",  "r3",  "r4",  "r5",  "r6",  "r7", 	\
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",	\
+  "ap", "pr", "t",  "gbr", "mach","macl", "fpul","rap", \
+  "fr0","fr1","fr2", "fr3", "fr4", "fr5", "fr6", "fr7", \
+  "fr8","fr9","fr10","fr11","fr12","fr13","fr14","fr15",\
+  "xd0","xd2","xd4", "xd6", "xd8", "xd10","xd12","xd14", \
+  "fpscr", \
+}
+
+/* DBX register number for a given compiler register number.  */
+/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers
+   to match gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO)	\
+  (((REGNO) >= 22 && (REGNO) <= 39) ? ((REGNO) + 1) : (REGNO))
+
+/* Output a label definition.  */
+#define ASM_OUTPUT_LABEL(FILE,NAME) \
+  do { assemble_name ((FILE), (NAME)); fputs (":\n", (FILE)); } while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* Output a function label definition.  */
+#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \
+    ASM_OUTPUT_LABEL((STREAM), (NAME))
+
+/* Output a globalising directive for a label.  */
+#define ASM_GLOBALIZE_LABEL(STREAM,NAME)	\
+  (fprintf ((STREAM), "\t.global\t"),		\
+   assemble_name ((STREAM), (NAME)),		\
+   fputc ('\n', (STREAM)))
+
+/* The prefix to add to user-visible assembler symbols. */
+
+#define USER_LABEL_PREFIX "_"
+
+/* The prefix to add to an internally generated label. */
+
+#define LOCAL_LABEL_PREFIX ""
+
+/* Make an internal label into a string.  */
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+  sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM))
+
+/* Output an internal label definition.  */
+#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
+  asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM))
+
+/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE)	    */
+
+/* Construct a private name.  */
+#define ASM_FORMAT_PRIVATE_NAME(OUTVAR,NAME,NUMBER)	\
+  ((OUTVAR) = (char *) alloca (strlen (NAME) + 10),	\
+   sprintf ((OUTVAR), "%s.%d", (NAME), (NUMBER)))
+
+/* Output a relative address table.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)  		\
+  switch (GET_MODE (BODY))						\
+    {									\
+    case SImode:							\
+      asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case HImode:							\
+      asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case QImode:							\
+      asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    }
+
+/* Output an absolute table element.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  				\
+  if (TARGET_BIGTABLE) 							\
+    asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); 			\
+  else									\
+    asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE)); 			\
+
+/* Output various types of constants.  */
+
+/* This is how to output an assembler line defining a `double'.  */
+
+#define ASM_OUTPUT_DOUBLE(FILE,VALUE)			\
+do { char dstr[30];					\
+     REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", dstr);	\
+     fprintf ((FILE), "\t.double %s\n", dstr);		\
+   } while (0)
+
+/* This is how to output an assembler line defining a `float' constant.  */
+#define ASM_OUTPUT_FLOAT(FILE,VALUE)			\
+do { char dstr[30];					\
+     REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", dstr);	\
+     fprintf ((FILE), "\t.float %s\n", dstr);		\
+   } while (0)
+
+#define ASM_OUTPUT_INT(STREAM, EXP)		\
+  (fprintf ((STREAM), "\t.long\t"),      	\
+   output_addr_const ((STREAM), (EXP)),  	\
+   fputc ('\n', (STREAM)))
+
+#define ASM_OUTPUT_SHORT(STREAM, EXP)	\
+  (fprintf ((STREAM), "\t.short\t"),	\
+   output_addr_const ((STREAM), (EXP)),	\
+   fputc ('\n', (STREAM)))
+
+#define ASM_OUTPUT_CHAR(STREAM, EXP)		\
+  (fprintf ((STREAM), "\t.byte\t"),      	\
+   output_addr_const ((STREAM), (EXP)),  	\
+   fputc ('\n', (STREAM)))
+
+#define ASM_OUTPUT_BYTE(STREAM, VALUE)  	\
+  fprintf ((STREAM), "\t.byte\t%d\n", (VALUE)) 	\
+
+/* The next two are used for debug info when compiling with -gdwarf.  */
+#define UNALIGNED_SHORT_ASM_OP	".uaword"
+#define UNALIGNED_INT_ASM_OP	".ualong"
+
+/* Loop alignment is now done in machine_dependent_reorg, so that
+   branch shortening can know about it.  */
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE) \
+  fprintf ((FILE), "\t.space %d\n", (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+( fputs ("\t.comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%d\n", (SIZE)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+( fputs ("\t.lcomm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ",%d\n", (SIZE)))
+
+/* The assembler's parentheses characters.  */
+#define ASM_OPEN_PAREN "("
+#define ASM_CLOSE_PAREN ")"
+
+/* Target characters.  */
+#define TARGET_BELL	007
+#define TARGET_BS	010
+#define TARGET_TAB	011
+#define TARGET_NEWLINE	012
+#define TARGET_VT	013
+#define TARGET_FF	014
+#define TARGET_CR	015
+
+/* A C statement to be executed just prior to the output of
+   assembler code for INSN, to modify the extracted operands so
+   they will be output differently.
+
+   Here the argument OPVEC is the vector containing the operands
+   extracted from INSN, and NOPERANDS is the number of elements of
+   the vector which contain meaningful data for this insn.
+   The contents of this vector are what will be used to convert the insn
+   template into assembler code, so you can change the assembler output
+   by changing the contents of the vector.  */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  final_prescan_insn ((INSN), (OPVEC), (NOPERANDS))
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+#define PRINT_OPERAND(STREAM, X, CODE)  print_operand ((STREAM), (X), (CODE))
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(STREAM,X)  print_operand_address ((STREAM), (X))
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+  ((CHAR) == '.' || (CHAR) == '#' || (CHAR) == '@' || (CHAR) == ','	\
+   || (CHAR) == '$')
+
+extern struct rtx_def *sh_compare_op0;
+extern struct rtx_def *sh_compare_op1;
+extern struct rtx_def *prepare_scc_operands();
+
+/* Which processor to schedule for.  The elements of the enumeration must
+   match exactly the cpu attribute in the sh.md file.  */
+
+enum processor_type {
+  PROCESSOR_SH1,
+  PROCESSOR_SH2,
+  PROCESSOR_SH3,
+  PROCESSOR_SH3E,
+  PROCESSOR_SH4
+};
+
+#define sh_cpu_attr ((enum attr_cpu)sh_cpu)
+extern enum processor_type sh_cpu;
+
+extern enum machine_mode sh_addr_diff_vec_mode;
+
+extern int optimize; /* needed for gen_casesi.  */
+
+/* Declare functions defined in sh.c and used in templates.  */
+
+extern char *output_branch();
+extern char *output_ieee_ccmpeq();
+extern char *output_branchy_insn();
+extern char *output_shift();
+extern char *output_movedouble();
+extern char *output_movepcrel();
+extern char *output_jump_label_table();
+extern char *output_far_jump();
+
+enum mdep_reorg_phase_e
+{
+  SH_BEFORE_MDEP_REORG,
+  SH_INSERT_USES_LABELS,
+  SH_SHORTEN_BRANCHES0,
+  SH_FIXUP_PCLOAD,
+  SH_SHORTEN_BRANCHES1,
+  SH_AFTER_MDEP_REORG
+};
+
+extern enum mdep_reorg_phase_e mdep_reorg_phase;
+
+void machine_dependent_reorg ();
+struct rtx_def *sfunc_uses_reg ();
+int barrier_align ();
+int sh_loop_align ();
+
+#define MACHINE_DEPENDENT_REORG(X) machine_dependent_reorg(X)
+
+/* Generate calls to memcpy, memcmp and memset.  */
+
+#define TARGET_MEM_FUNCTIONS
+
+/* Define this macro if you want to implement any pragmas.  If defined, it
+   is a C expression whose value is 1 if the pragma was handled by the
+   macro, zero otherwise.  */
+#define HANDLE_PRAGMA(GETC, UNGETC, NODE) sh_handle_pragma (GETC, UNGETC, NODE)
+extern int sh_handle_pragma ();
+
+/* Set when processing a function with pragma interrupt turned on.  */
+
+extern int pragma_interrupt;
+
+/* Set to an RTX containing the address of the stack to switch to
+   for interrupt functions.  */
+extern struct rtx_def *sp_switch;
+
+/* A C expression whose value is nonzero if IDENTIFIER with arguments ARGS
+   is a valid machine specific attribute for DECL.
+   The attributes in ATTRIBUTES have previously been assigned to DECL.  */
+extern int sh_valid_machine_decl_attribute ();
+#define VALID_MACHINE_DECL_ATTRIBUTE(DECL, ATTRIBUTES, IDENTIFIER, ARGS) \
+sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS)
+
+extern void sh_pragma_insert_attributes ();
+#define PRAGMA_INSERT_ATTRIBUTES(node, pattr, prefix_attr) \
+  sh_pragma_insert_attributes (node, pattr, prefix_attr)
+
+extern int sh_flag_remove_dead_before_cse;
+extern int rtx_equal_function_value_matters;
+extern struct rtx_def *fpscr_rtx;
+extern struct rtx_def *get_fpscr_rtx ();
+
+
+/* Instructions with unfilled delay slots take up an extra two bytes for
+   the nop in the delay slot.  */
+
+#define ADJUST_INSN_LENGTH(X, LENGTH)				\
+  if (((GET_CODE (X) == INSN					\
+	&& GET_CODE (PATTERN (X)) != USE			\
+	&& GET_CODE (PATTERN (X)) != CLOBBER)			\
+       || GET_CODE (X) == CALL_INSN				\
+       || (GET_CODE (X) == JUMP_INSN				\
+	   && GET_CODE (PATTERN (X)) != ADDR_DIFF_VEC		\
+	   && GET_CODE (PATTERN (X)) != ADDR_VEC))		\
+      && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (X)))) != SEQUENCE \
+      && get_attr_needs_delay_slot (X) == NEEDS_DELAY_SLOT_YES)	\
+    (LENGTH) += 2;
+
+/* Define the codes that are matched by predicates in sh.c.  */
+#define PREDICATE_CODES \
+  {"arith_operand", {SUBREG, REG, CONST_INT}},				\
+  {"arith_reg_operand", {SUBREG, REG}},					\
+  {"arith_reg_or_0_operand", {SUBREG, REG, CONST_INT}},			\
+  {"binary_float_operator", {PLUS, MULT}},				\
+  {"commutative_float_operator", {PLUS, MULT}},				\
+  {"fp_arith_reg_operand", {SUBREG, REG}},				\
+  {"fp_extended_operand", {SUBREG, REG, FLOAT_EXTEND}},			\
+  {"fpscr_operand", {REG}},						\
+  {"general_movsrc_operand", {SUBREG, REG, CONST_INT, MEM}},		\
+  {"general_movdst_operand", {SUBREG, REG, CONST_INT, MEM}},		\
+  {"logical_operand", {SUBREG, REG, CONST_INT}},			\
+  {"noncommutative_float_operator", {MINUS, DIV}},			\
+  {"register_operand", {SUBREG, REG}},
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases, 
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   Leaving the unsignedp unchanged gives better code than always setting it
+   to 0.  This is despite the fact that we have only signed char and short
+   load instructions.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  if (GET_MODE_CLASS (MODE) == MODE_INT			\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)		\
+    (MODE) = SImode;
+
+/* Defining PROMOTE_FUNCTION_ARGS eliminates some unnecessary zero/sign
+   extensions applied to char/short functions arguments.  Defining
+   PROMOTE_FUNCTION_RETURN does the same for function returns.  */
+
+#define PROMOTE_FUNCTION_ARGS
+#define PROMOTE_FUNCTION_RETURN
+
+/* ??? Define ACCUMULATE_OUTGOING_ARGS?  This is more efficient than pushing
+   and poping arguments.  However, we do have push/pop instructions, and
+   rather limited offsets (4 bits) in load/store instructions, so it isn't
+   clear if this would give better code.  If implemented, should check for
+   compatibility problems.  */
+
+/* A C statement (sans semicolon) to update the integer variable COST
+   based on the relationship between INSN that is dependent on
+   DEP_INSN through the dependence LINK.  The default is to make no
+   adjustment to COST.  This can be used for example to specify to
+   the scheduler that an output- or anti-dependence does not incur
+   the same cost as a data-dependence.  */
+
+#define ADJUST_COST(insn,link,dep_insn,cost)				\
+do {									\
+  rtx reg;								\
+									\
+  if (GET_CODE(insn) == CALL_INSN)					\
+    {									\
+      /* The only input for a call that is timing-critical is the	\
+	 function's address.  */					\
+      rtx call = PATTERN (insn);					\
+									\
+      if (GET_CODE (call) == PARALLEL)					\
+	call = XVECEXP (call, 0 ,0);					\
+      if (GET_CODE (call) == SET)					\
+	call = SET_SRC (call);						\
+      if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM	\
+	  && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))		\
+	(cost) = 0;							\
+    }									\
+  /* All sfunc calls are parallels with at least four components.	\
+     Exploit this to avoid unnecessary calls to sfunc_uses_reg.  */	\
+  else if (GET_CODE (PATTERN (insn)) == PARALLEL			\
+	   && XVECLEN (PATTERN (insn), 0) >= 4				\
+	   && (reg = sfunc_uses_reg (insn)))				\
+    {									\
+      /* Likewise, the most timing critical input for an sfuncs call	\
+	 is the function address.  However, sfuncs typically start	\
+	 using their arguments pretty quickly.				\
+	 Assume a four cycle delay before they are needed.  */		\
+      if (! reg_set_p (reg, dep_insn))					\
+	cost -= TARGET_SUPERSCALAR ? 40 : 4;				\
+    }									\
+  /* Adjust load_si / pcload_si type insns latency.  Use the known	\
+     nominal latency and form of the insn to speed up the check.  */	\
+  else if (cost == 3							\
+	   && GET_CODE (PATTERN (dep_insn)) == SET			\
+	   /* Latency for dmpy type insns is also 3, so check the that	\
+	      it's actually a move insn.  */				\
+	   && general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))\
+    cost = 2;								\
+  else if (cost == 30							\
+	   && GET_CODE (PATTERN (dep_insn)) == SET			\
+	   && GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode)	\
+    cost = 20;								\
+} while (0)								\
+
+/* For the sake of libgcc2.c, indicate target supports atexit.  */
+#define HAVE_ATEXIT
+
+#define SH_DYNAMIC_SHIFT_COST \
+  (TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20)
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
new file mode 100755
index 0000000..7e417ab
--- /dev/null
+++ b/gcc/config/sh/sh.md
@@ -0,0 +1,4654 @@
+;; CYGNUS LOCAL SH4 Phase III: REG_LIBCALL / REG_RETVAL wrapping of
+;; MACH_REG / MACL_REG usage.
+;;- Machine description for the Hitachi SH.
+;;  Copyright (C) 1993 - 1999 Free Software Foundation, Inc.
+;;  Contributed by Steve Chamberlain (sac@cygnus.com).
+;;  Improved by Jim Wilson (wilson@cygnus.com).
+
+;; This file is part of GNU CC.
+
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU CC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+
+;; ??? Should prepend a * to all pattern names which are not used.
+;; This will make the compiler smaller, and rebuilds after changes faster.
+
+;; ??? Should be enhanced to include support for many more GNU superoptimizer
+;; sequences.  Especially the sequences for arithmetic right shifts.
+
+;; ??? Should check all DImode patterns for consistency and usefulness.
+
+;; ??? The MAC.W and MAC.L instructions are not supported.  There is no
+;; way to generate them.
+
+;; ??? The cmp/str instruction is not supported.  Perhaps it can be used
+;; for a str* inline function.
+
+;; BSR is not generated by the compiler proper, but when relaxing, it
+;; generates .uses pseudo-ops that allow linker relaxation to create
+;; BSR.  This is actually implemented in bfd/{coff,elf32}-sh.c
+
+;; Special constraints for SH machine description:
+;;
+;;    t -- T
+;;    x -- mac
+;;    l -- pr
+;;    z -- r0
+;;
+;; Special formats used for outputting SH instructions:
+;;
+;;   %.  --  print a .s if insn needs delay slot
+;;   %@  --  print rte/rts if is/isn't an interrupt function
+;;   %#  --  output a nop if there is nothing to put in the delay slot
+;;   %O  --  print a constant without the #
+;;   %R  --  print the lsw reg of a double
+;;   %S  --  print the msw reg of a double
+;;   %T  --  print next word of a double REG or MEM
+;;
+;; Special predicates:
+;;
+;;  arith_operand          -- operand is valid source for arithmetic op
+;;  arith_reg_operand      -- operand is valid register for arithmetic op
+;;  general_movdst_operand -- operand is valid move destination
+;;  general_movsrc_operand -- operand is valid move source
+;;  logical_operand        -- operand is valid source for logical op
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+;; Target CPU.
+
+(define_attr "cpu"
+ "sh1,sh2,sh3,sh3e,sh4"
+  (const (symbol_ref "sh_cpu_attr")))
+
+(define_attr "endian" "big,little"
+ (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
+		      (const_string "little") (const_string "big"))))
+
+(define_attr "fmovd" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FMOVD")
+		       (const_string "yes") (const_string "no"))))
+;; issues/clock
+(define_attr "issues" "1,2"
+  (const (if_then_else (symbol_ref "TARGET_SUPERSCALAR") (const_string "2") (const_string "1"))))
+
+;; cbranch	conditional branch instructions
+;; jump		unconditional jumps
+;; arith	ordinary arithmetic
+;; arith3	a compound insn that behaves similarly to a sequence of
+;;		three insns of type arith
+;; arith3b	like above, but might end with a redirected branch
+;; load		from memory
+;; load_si	Likewise, SImode variant for general register.
+;; store	to memory
+;; move		register to register
+;; fmove	register to register, floating point
+;; smpy		word precision integer multiply
+;; dmpy		longword or doublelongword precision integer multiply
+;; return	rts
+;; pload	load of pr reg, which can't be put into delay slot of rts
+;; pstore	store of pr reg, which can't be put into delay slot of jsr
+;; pcload	pc relative load of constant value
+;; pcload_si	Likewise, SImode variant for general register.
+;; rte		return from exception
+;; sfunc	special function call with known used registers
+;; call		function call
+;; fp		floating point
+;; fdiv		floating point divide (or square root)
+;; gp_fpul	move between general purpose register and fpul
+;; dfp_arith, dfp_cmp,dfp_conv
+;; dfdiv	double precision floating point divide (or square root)
+;; nil		no-op move, will be deleted.
+
+(define_attr "type"
+ "cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil"
+  (const_string "other"))
+
+; If a conditional branch destination is within -252..258 bytes away
+; from the instruction it can be 2 bytes long.  Something in the
+; range -4090..4100 bytes can be 6 bytes long.  All other conditional
+; branches are initially assumed to be 16 bytes long.
+; In machine_dependent_reorg, we split all branches that are longer than
+; 2 bytes.
+
+;; The maximum range used for SImode constant pool entrys is 1018.  A final
+;; instruction can add 8 bytes while only being 4 bytes in size, thus we
+;; can have a total of 1022 bytes in the pool.  Add 4 bytes for a branch
+;; instruction around the pool table, 2 bytes of alignment before the table,
+;; and 30 bytes of alignment after the table.  That gives a maximum total
+;; pool size of 1058 bytes.
+;; Worst case code/pool content size ratio is 1:2 (using asms).
+;; Thus, in the worst case, there is one instruction in front of a maximum
+;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of
+;; code.  For the last n bytes of code, there are 2n + 36 bytes of pool.
+;; If we have a forward branch, the initial table will be put after the
+;; unconditional branch.
+;;
+;; ??? We could do much better by keeping track of the actual pcloads within
+;; the branch range and in the pcload range in front of the branch range.
+
+;; ??? This looks ugly because genattrtab won't allow if_then_else or cond
+;; inside an le.
+(define_attr "short_cbranch_p" "no,yes"
+  (cond [(ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506))
+	 (const_string "yes")
+	 (ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) != insn") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508))
+	 (const_string "yes")
+         ] (const_string "no")))
+
+(define_attr "med_branch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990))
+	      (const_int 1988))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4092))
+	      (const_int 8186))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "med_cbranch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988))
+	      (const_int 1986))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4090))
+	       (const_int 8184))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_branch_p" "no,yes"
+  (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10330))
+	      (const_int 20660))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32764))
+	      (const_int 65530))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_cbranch_p" "no,yes"
+  (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10328))
+	      (const_int 20658))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32762))
+	      (const_int 65528))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+; An unconditional jump in the range -4092..4098 can be 2 bytes long.
+; For wider ranges, we need a combination of a code and a data part.
+; If we can get a scratch register for a long range jump, the code
+; part can be 4 bytes long; otherwise, it must be 8 bytes long.
+; If the jump is in the range -32764..32770, the data part can be 2 bytes
+; long; otherwise, it must be 6 bytes long.
+
+; All other instructions are two bytes long by default.
+
+;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)),
+;; but getattrtab doesn't understand this.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "cbranch")
+	 (cond [(eq_attr "short_cbranch_p" "yes")
+		(const_int 2)
+		(eq_attr "med_cbranch_p" "yes")
+		(const_int 6)
+		(eq_attr "braf_cbranch_p" "yes")
+		(const_int 12)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 14)
+		] (const_int 16))
+	 (eq_attr "type" "jump")
+	 (cond [(eq_attr "med_branch_p" "yes")
+		(const_int 2)
+		(and (eq (symbol_ref "GET_CODE (PREV_INSN (insn))")
+			 (symbol_ref "INSN"))
+		     (eq (symbol_ref "INSN_CODE (PREV_INSN (insn))")
+			 (symbol_ref "code_for_indirect_jump_scratch")))
+		(if_then_else (eq_attr "braf_branch_p" "yes")
+			      (const_int 6)
+			      (const_int 10))
+		(eq_attr "braf_branch_p" "yes")
+		(const_int 10)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 12)
+		] (const_int 14))
+	 ] (const_int 2)))
+
+;; (define_function_unit {name} {num-units} {n-users} {test}
+;;                       {ready-delay} {issue-delay} [{conflict-list}])
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; Loads have a latency of two.
+;; However, call insns can have a delay slot, so that we want one more
+;; insn to be scheduled between the load of the function address and the call.
+;; This is equivalent to a latency of three.
+;; We cannot use a conflict list for this, because we need to distinguish
+;; between the actual call address and the function arguments.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+(define_function_unit "memory" 1 0
+  (and (eq_attr "issues" "1")
+       (eq_attr "type" "load_si,pcload_si"))
+  3 2)
+(define_function_unit "memory" 1 0
+  (and (eq_attr "issues" "1")
+       (eq_attr "type" "load,pcload,pload,store,pstore"))
+  2 2)
+
+(define_function_unit "int"    1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "arith3,arith3b")) 3 3)
+
+(define_function_unit "int"    1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "dyn_shift")) 2 2)
+
+(define_function_unit "int"    1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1)
+
+;; ??? These are approximations.
+(define_function_unit "mpy"    1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "smpy")) 2 2)
+(define_function_unit "mpy"    1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "dmpy")) 3 3)
+
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "fp,fmove")) 2 1)
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "1") (eq_attr "type" "fdiv")) 13 12)
+
+
+;; SH4 scheduling
+;; The SH4 is a dual-issue implementation, thus we have to multiply all
+;; costs by at least two.
+;; There will be single increments of the modeled that don't correspond
+;; to the actual target ;; whenever two insns to be issued depend one a
+;; single resource, and the scheduler picks to be the first one.
+;; If we multiplied the costs just by two, just two of these single
+;; increments would amount to an actual cycle.  By picking a larger
+;; factor, we can ameliorate the effect; However, we then have to make sure
+;; that only two insns are modeled as issued per actual cycle.
+;; Moreover, we need a way to specify the latency of insns that don't
+;; use an actual function unit.
+;; We use an 'issue' function unit to do that, and a cost factor of 10.
+
+(define_function_unit "issue" 2 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "!nil,arith3"))
+  10 10)
+
+(define_function_unit "issue" 2 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "arith3"))
+  30 30)
+
+;; There is no point in providing exact scheduling information about branches,
+;; because they are at the starts / ends of basic blocks anyways.
+
+;; Some insns cannot be issued before/after another insn in the same cycle,
+;; irrespective of the type of the other insn.
+
+;; default is dual-issue, but can't be paired with an insn that
+;; uses multiple function units.
+(define_function_unit "single_issue"     1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "!smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul,call,sfunc,arith3,arith3b"))
+  1 10
+  [(eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")])
+
+(define_function_unit "single_issue"     1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul"))
+  10 10
+  [(const_int 1)])
+
+;; arith3 insns are always pairable at the start, but not inecessarily at
+;; the end; however, there doesn;t seem to be a way to express that.
+(define_function_unit "single_issue"     1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "arith3"))
+  30 20
+  [(const_int 1)])
+
+;; arith3b insn are pairable at the end and have latency that prevents pairing
+;; with the following branch, but we don't want this latency be respected;
+;; When the following branch is immediately adjacent, we can redirect the
+;; internal branch, which is likly to be a larger win.
+(define_function_unit "single_issue"     1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "arith3b"))
+  20 20
+  [(const_int 1)])
+
+;; calls introduce a longisch delay that is likely to flush the pipelines.
+(define_function_unit "single_issue"     1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "call,sfunc"))
+  160 160
+  [(eq_attr "type" "!call") (eq_attr "type" "call")])
+
+;; Load and store instructions have no alignment peculiarities for the SH4,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of two.
+;; However, call insns can only paired with a preceding insn, and have
+;; a delay slot, so that we want two more insns to be scheduled between the
+;; load of the function address and the call.  This is equivalent to a
+;; latency of three.
+;; We cannot use a conflict list for this, because we need to distinguish
+;; between the actual call address and the function arguments.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here, which gets multiplied by 10 to yield 30.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+
+;; When specifying different latencies for different insns using the
+;; the same function unit, genattrtab.c assumes a 'FIFO constraint'
+;; so that the blockage is at least READY-COST (E) + 1 - READY-COST (C)
+;; for an executing insn E and a candidate insn C.
+;; Therefore, we define three different function units for load_store:
+;; load_store, load and load_si.
+
+(define_function_unit "load_si" 1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "load_si,pcload_si")) 30 10)
+(define_function_unit "load" 1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "load,pcload,pload")) 20 10)
+(define_function_unit "load_store" 1 0
+  (and (eq_attr "issues" "2")
+       (eq_attr "type" "load_si,pcload_si,load,pcload,pload,store,pstore,fmove"))
+  10 10)
+
+(define_function_unit "int"    1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "arith,dyn_shift")) 10 10)
+
+;; Again, we have to pretend a lower latency for the "int" unit to avoid a
+;; spurious FIFO constraint; the multiply instructions use the "int"
+;; unit actually only for two cycles.
+(define_function_unit "int"    1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 20 20)
+
+;; We use a fictous "mpy" unit to express the actual latency.
+(define_function_unit "mpy"    1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 20)
+
+;; Again, we have to pretend a lower latency for the "int" unit to avoid a
+;; spurious FIFO constraint.
+(define_function_unit "int"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 10 10)
+
+;; We use a fictous "gp_fpul" unit to express the actual latency.
+(define_function_unit "gp_fpul"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 20 10)
+
+;; ??? multiply uses the floating point unit, but with a two cycle delay.
+;; Thus, a simple single-precision fp operation could finish if issued in
+;; the very next cycle, but stalls when issued two or three cycles later.
+;; Similarily, a divide / sqrt can work without stalls if issued in
+;; the very next cycle, while it would have to block if issued two or
+;; three cycles later.
+;; There is no way to model this with gcc's function units.  This problem is
+;; actually mentioned in md.texi.  Tackling this problem requires first that
+;; it is possible to speak about the target in an open discussion.
+;; 
+;; However, simple double-precision operations always conflict.
+
+(define_function_unit "fp"    1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 40
+  [(eq_attr "type" "dfp_cmp,dfp_conv,dfp_arith")])
+
+;; The "fp" unit is for pipeline stages F1 and F2.
+
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "fp")) 30 10)
+
+;; Again, we have to pretend a lower latency for the "fp" unit to avoid a
+;; spurious FIFO constraint; the bulk of the fdiv type insns executes in
+;; the F3 stage.
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 30 10)
+
+;; The "fdiv" function unit models the aggregate effect of the F1, F2 and F3
+;; pipeline stages on the pipelining of fdiv/fsqrt insns.
+;; We also use it to give the actual latency here.
+;; fsqrt is actually one cycle faster than fdiv (and the value used here),
+;; but that will hardly matter in practice for scheduling.
+(define_function_unit "fdiv"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 120 100)
+
+;; There is again a late use of the "fp" unit by [d]fdiv type insns
+;; that we can't express.
+
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "dfp_cmp,dfp_conv")) 40 20)
+
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "dfp_arith")) 80 60)
+
+(define_function_unit "fp"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 10)
+
+(define_function_unit "fdiv"     1 0
+  (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 210)
+
+; Definitions for filling branch delay slots.
+
+(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
+
+;; ??? This should be (nil) instead of (const_int 0)
+(define_attr "hit_stack" "yes,no"
+	(cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, 15)") (const_int 0))
+	       (const_string "no")]
+	      (const_string "yes")))
+
+(define_attr "interrupt_function" "no,yes"
+  (const (symbol_ref "pragma_interrupt")))
+
+(define_attr "in_delay_slot" "yes,no"
+  (cond [(eq_attr "type" "cbranch") (const_string "no")
+	 (eq_attr "type" "pcload,pcload_si") (const_string "no")
+	 (eq_attr "needs_delay_slot" "yes") (const_string "no")
+	 (eq_attr "length" "2") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "is_sfunc" ""
+  (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))
+
+(define_delay
+  (eq_attr "needs_delay_slot" "yes")
+  [(eq_attr "in_delay_slot" "yes") (nil) (nil)])
+
+;; On the SH and SH2, the rte instruction reads the return pc from the stack,
+;; and thus we can't put a pop instruction in its delay slot.
+;; ??? On the SH3, the rte instruction does not use the stack, so a pop
+;; instruction can go in the delay slot.
+
+;; Since a normal return (rts) implicitly uses the PR register,
+;; we can't allow PR register loads in an rts delay slot.
+
+(define_delay
+  (eq_attr "type" "return")
+  [(and (eq_attr "in_delay_slot" "yes")
+	(ior (and (eq_attr "interrupt_function" "no")
+		  (eq_attr "type" "!pload"))
+	     (and (eq_attr "interrupt_function" "yes")
+		  (eq_attr "hit_stack" "no")))) (nil) (nil)])
+
+;; Since a call implicitly uses the PR register, we can't allow
+;; a PR register store in a jsr delay slot.
+
+(define_delay
+  (ior (eq_attr "type" "call") (eq_attr "type" "sfunc"))
+  [(and (eq_attr "in_delay_slot" "yes")
+	(eq_attr "type" "!pstore")) (nil) (nil)])
+
+;; Say that we have annulled true branches, since this gives smaller and
+;; faster code when branches are predicted as not taken.
+
+(define_delay
+  (and (eq_attr "type" "cbranch")
+       (ne (symbol_ref "TARGET_SH2") (const_int 0)))
+  [(eq_attr "in_delay_slot" "yes") (eq_attr "in_delay_slot" "yes") (nil)])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI 18)
+	(eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")
+		       (match_operand:SI 1 "arith_operand" "L,r"))
+	       (const_int 0)))]
+  ""
+  "tst	%1,%0")
+
+;; ??? Perhaps should only accept reg/constant if the register is reg 0.
+;; That would still allow reload to create cmpi instructions, but would
+;; perhaps allow forcing the constant into a register when that is better.
+;; Probably should use r0 for mem/imm compares, but force constant into a
+;; register for pseudo/imm compares.
+
+(define_insn "cmpeqsi_t"
+  [(set (reg:SI 18) (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r")
+			   (match_operand:SI 1 "arith_operand" "N,rI,r")))]
+  ""
+  "@
+	tst	%0,%0
+	cmp/eq	%1,%0
+	cmp/eq	%1,%0")
+
+(define_insn "cmpgtsi_t"
+  [(set (reg:SI 18) (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+			   (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]
+  ""
+  "@
+	cmp/gt	%1,%0
+	cmp/pl	%0")
+
+(define_insn "cmpgesi_t"
+  [(set (reg:SI 18) (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+			   (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]
+  ""
+  "@
+	cmp/ge	%1,%0
+	cmp/pz	%0")
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeusi_t"
+  [(set (reg:SI 18) (geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			    (match_operand:SI 1 "arith_reg_operand" "r")))]
+  ""
+  "cmp/hs	%1,%0")
+
+(define_insn "cmpgtusi_t"
+  [(set (reg:SI 18) (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			    (match_operand:SI 1 "arith_reg_operand" "r")))]
+  ""
+  "cmp/hi	%1,%0")
+
+;; We save the compare operands in the cmpxx patterns and use them when
+;; we generate the branch.
+
+(define_expand "cmpsi"
+  [(set (reg:SI 18) (compare (match_operand:SI 0 "arith_operand" "")
+			     (match_operand:SI 1 "arith_operand" "")))]
+  ""
+  "
+{
+  sh_compare_op0 = operands[0];
+  sh_compare_op1 = operands[1];
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+;; ??? Could get better scheduling by splitting the initial test from the
+;; rest of the insn after reload.  However, the gain would hardly justify
+;; the sh.md size increase necessary to do that.
+
+(define_insn ""
+  [(set (reg:SI 18)
+	(eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")
+		       (match_operand:DI 1 "arith_operand" "r"))
+	       (const_int 0)))]
+  ""
+  "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\",
+				 insn, operands);"
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_insn "cmpeqdi_t"
+  [(set (reg:SI 18) (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+			   (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]
+  ""
+  "*
+  return output_branchy_insn
+   (EQ,
+    (which_alternative
+     ? \"cmp/eq\\t%S1,%S0\;bf\\t%l9\;cmp/eq\\t%R1,%R0\"
+     : \"tst\\t%S0,%S0\;bf\\t%l9\;tst\\t%R0,%R0\"),
+    insn, operands);"
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_insn "cmpgtdi_t"
+  [(set (reg:SI 18) (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+			   (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+  "@
+	cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:
+	tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgedi_t"
+  [(set (reg:SI 18) (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+			   (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+  "@
+	cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:
+	cmp/pz\\t%S0"
+  [(set_attr "length" "8,2")
+   (set_attr "type" "arith3,arith")])
+
+;; -------------------------------------------------------------------------
+;; DImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeudi_t"
+  [(set (reg:SI 18) (geu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+			    (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgtudi_t"
+  [(set (reg:SI 18) (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+			    (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+;; We save the compare operands in the cmpxx patterns and use them when
+;; we generate the branch.
+
+(define_expand "cmpdi"
+  [(set (reg:SI 18) (compare (match_operand:DI 0 "arith_operand" "")
+			     (match_operand:DI 1 "arith_operand" "")))]
+  "TARGET_SH2"
+  "
+{
+  sh_compare_op0 = operands[0];
+  sh_compare_op1 = operands[1];
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Addition instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%0")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (reg:SI 18))]
+  ""
+  "#"
+  [(set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%0")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (reg:SI 18))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+  high0 = gen_rtx (REG, SImode,
+		   true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  high2 = gen_rtx (REG, SImode,
+		   true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  emit_insn (gen_clrt ());
+  emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_addc1 (high0, high0, high2));
+  DONE;
+}")
+
+(define_insn "addc"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			  (match_operand:SI 2 "arith_reg_operand" "r"))
+		 (reg:SI 18)))
+   (set (reg:SI 18)
+	(ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+  ""
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "addc1"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			  (match_operand:SI 2 "arith_reg_operand" "r"))
+		 (reg:SI 18)))
+   (clobber (reg:SI 18))]
+  ""
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "arith_operand" "%0")
+		 (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  "add	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Subtraction instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(minus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (reg:SI 18))]
+  ""
+  "#"
+  [(set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(minus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (reg:SI 18))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+  high0 = gen_rtx (REG, SImode,
+		   true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  high2 = gen_rtx (REG, SImode,
+		   true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  emit_insn (gen_clrt ());
+  emit_insn (gen_subc (low0, low0, gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_subc1 (high0, high0, high2));
+  DONE;
+}")
+
+(define_insn "subc"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_operand" "r"))
+		  (reg:SI 18)))
+   (set (reg:SI 18)
+	(gtu:SI (minus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+  ""
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "subc1"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_operand" "r"))
+		  (reg:SI 18)))
+   (clobber (reg:SI 18))]
+  ""
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*subsi3_internal"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		  (match_operand:SI 2 "arith_reg_operand" "r")))]
+  ""
+  "sub	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Convert `constant - reg' to `neg rX; add rX, #const' since this
+;; will sometimes save one instruction.  Otherwise we might get
+;; `mov #const, rY; sub rY,rX; mov rX, rY' if the source and dest regs
+;; are the same.
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(minus:SI (match_operand:SI 1 "arith_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      emit_insn (gen_negsi2 (operands[0], operands[2]));
+      emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+;; -------------------------------------------------------------------------
+;; Division instructions
+;; -------------------------------------------------------------------------
+
+;; We take advantage of the library routines which don't clobber as many
+;; registers as a normal function call would.
+
+;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+;; also has an effect on the register that holds the address of the sfunc.
+;; To make this work, we have an extra dummy insns that shows the use
+;; of this register for reorg.
+
+(define_insn "use_sfunc_addr"
+  [(set (reg:SI 17) (unspec [(match_operand:SI 0 "register_operand" "r")] 5))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
+;; hard register 0.  If we used hard register 0, then the next instruction
+;; would be a move from hard register 0 to a pseudo-reg.  If the pseudo-reg
+;; gets allocated to a stack slot that needs its address reloaded, then
+;; there is nothing to prevent reload from using r0 to reload the address.
+;; This reload would clobber the value in r0 we are trying to store.
+;; If we let reload allocate r0, then this problem can never happen.
+
+(define_insn "udivsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 18))
+   (clobber (reg:SI 17))
+   (clobber (reg:SI 4))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "! TARGET_SH4"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 17))
+   (clobber (reg:DF 24))
+   (clobber (reg:DF 26))
+   (clobber (reg:DF 28))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 4))
+   (clobber (reg:SI 5))
+   (use (reg:PSI 48))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH4 && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 17))
+   (clobber (reg:DF 24))
+   (clobber (reg:DF 26))
+   (clobber (reg:DF 28))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 4))
+   (clobber (reg:SI 5))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_HARD_SH4 && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "udivsi3"
+  [(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
+   (set (match_dup 3) (symbol_ref:SI "__udivsi3"))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (udiv:SI (reg:SI 4)
+			    (reg:SI 5)))
+	      (clobber (reg:SI 18))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 4))
+	      (use (match_dup 3))])]
+  ""
+  "
+{
+  rtx first, last;
+
+  first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  operands[3] = gen_reg_rtx(SImode);
+  if (TARGET_HARD_SH4)
+    {
+      emit_move_insn (operands[3],
+		      gen_rtx_SYMBOL_REF (SImode, \"__udivsi3_i4\"));
+      if (TARGET_FPU_SINGLE)
+	last = emit_insn (gen_udivsi3_i4_single (operands[0], operands[3]));
+      else
+	last = emit_insn (gen_udivsi3_i4 (operands[0], operands[3]));
+    }
+  else
+    {
+      emit_move_insn (operands[3],
+		      gen_rtx_SYMBOL_REF (SImode, \"__udivsi3\"));
+      last = emit_insn (gen_udivsi3_i1 (operands[0], operands[3]));
+    }
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+(define_insn "divsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 18))
+   (clobber (reg:SI 17))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "! TARGET_SH4"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 17))
+   (clobber (reg:DF 24))
+   (clobber (reg:DF 26))
+   (use (reg:PSI 48))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH4 && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 17))
+   (clobber (reg:DF 24))
+   (clobber (reg:DF 26))
+   (clobber (reg:SI 2))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_HARD_SH4 && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "divsi3"
+  [(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
+   (set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (reg:SI 4)
+			   (reg:SI 5)))
+	      (clobber (reg:SI 18))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 3))
+	      (use (match_dup 3))])]
+  ""
+  "
+{
+  rtx first, last;
+
+  first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  operands[3] = gen_reg_rtx(SImode);
+  if (TARGET_HARD_SH4)
+    {
+      emit_move_insn (operands[3],
+		      gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\"));
+      if (TARGET_FPU_SINGLE)
+	last = emit_insn (gen_divsi3_i4_single (operands[0], operands[3]));
+      else
+	last = emit_insn (gen_divsi3_i4 (operands[0], operands[3]));
+    }
+  else
+    {
+      emit_move_insn (operands[3], gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3\"));
+      last = emit_insn (gen_divsi3_i1 (operands[0], operands[3]));
+    }
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "umulhisi3_i"
+  [(set (reg:SI 21)
+	(mult:SI (zero_extend:SI (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  ""
+  "mulu	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_insn "mulhisi3_i"
+  [(set (reg:SI 21)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  ""
+  "muls	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_expand "mulhisi3"
+  [(set (reg:SI 21)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI 21))]
+  ""
+  "
+{
+  rtx first, last;
+
+  first = emit_insn (gen_mulhisi3_i (operands[1], operands[2]));
+  last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 21));
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+(define_expand "umulhisi3"
+  [(set (reg:SI 21)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI 21))]
+  ""
+  "
+{
+  rtx first, last;
+
+  first = emit_insn (gen_umulhisi3_i (operands[1], operands[2]));
+  last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 21));
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate
+;; a call to a routine which clobbers known registers.
+
+(define_insn ""
+  [(set (match_operand:SI 1 "register_operand" "=z")
+	(mult:SI (reg:SI 4) (reg:SI 5)))
+   (clobber (reg:SI 21))
+   (clobber (reg:SI 18))
+   (clobber (reg:SI 17))
+   (clobber (reg:SI 3))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 1))
+   (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+  ""
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "mulsi3_call"
+  [(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
+   (parallel[(set (match_operand:SI 0 "register_operand" "")
+		  (mult:SI (reg:SI 4)
+			   (reg:SI 5)))
+	     (clobber (reg:SI 21))
+	     (clobber (reg:SI 18))
+	     (clobber (reg:SI 17))
+	     (clobber (reg:SI 3))
+	     (clobber (reg:SI 2))
+	     (clobber (reg:SI 1))
+	     (use (match_operand:SI 3 "register_operand" ""))])]
+  ""
+  "")
+
+(define_insn "mul_l"
+  [(set (reg:SI 21)
+	(mult:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		 (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "mul.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsi3"
+  [(set (reg:SI 21)
+	(mult:SI  (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI 21))]
+  ""
+  "
+{
+  rtx first, last;
+
+  if (!TARGET_SH2)
+    {
+      /* The address must be set outside the libcall,
+	 since it goes into a pseudo.  */
+      rtx addr = force_reg (SImode, gen_rtx_SYMBOL_REF (SImode, \"__mulsi3\"));
+      rtx insns = gen_mulsi3_call (operands[0], operands[1], operands[2], addr);
+      first = XVECEXP (insns, 0, 0);
+      last = XVECEXP (insns, 0, XVECLEN (insns, 0) - 1);
+      emit_insn (insns);
+    }
+  else
+    {
+      rtx macl = gen_rtx_REG (SImode, MACL_REG);
+      first = emit_insn (gen_mul_l (operands[1], operands[2]));
+      last = emit_insn (gen_movsi_i ((operands[0]), macl));
+    }
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+(define_insn "mulsidi3_i"
+  [(set (reg:SI 20)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+			       (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+		      (const_int 32))))
+   (set (reg:SI 21)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+		 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:DI 20))]
+  "TARGET_SH2"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))
+   (clobber (reg:DI 20))]
+  "TARGET_SH2"
+  [(const_int 0)]
+  "
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_mulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, 21));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, 20));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+}")
+
+(define_insn "umulsidi3_i"
+  [(set (reg:SI 20)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+			       (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+		      (const_int 32))))
+   (set (reg:SI 21)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+		 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:DI 20))]
+  "TARGET_SH2"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))
+   (clobber (reg:DI 20))]
+  "TARGET_SH2"
+  [(const_int 0)]
+  "
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_umulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, 21));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, 20));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+}")
+
+(define_insn "smulsi3_highpart_i"
+  [(set (reg:SI 20)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+			       (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+		      (const_int 32))))
+   (clobber (reg:SI 21))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "smulsi3_highpart"
+  [(parallel [(set (reg:SI 20)
+		   (truncate:SI
+		    (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+					  (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+				 (const_int 32))))
+	      (clobber (reg:SI 21))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI 20))]
+  "TARGET_SH2"
+  "
+{
+  rtx first, last;
+
+  first = emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2]));
+  last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 20));
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+(define_insn "umulsi3_highpart_i"
+  [(set (reg:SI 20)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+			       (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+		      (const_int 32))))
+   (clobber (reg:SI 21))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel [(set (reg:SI 20)
+		   (truncate:SI
+		    (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+					  (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+				 (const_int 32))))
+	      (clobber (reg:SI 21))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI 20))]
+  "TARGET_SH2"
+  "
+{
+  rtx first, last;
+
+  first = emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2]));
+  last = emit_move_insn (operands[0], gen_rtx_REG (SImode, 20));
+  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
+     invariant code motion can move it.  */
+  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
+  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r,z")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "r,L")))]
+  ""
+  "and	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; If the constant is 255, then emit a extu.b instruction instead of an
+;; and, since that will give better code.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "")
+		(match_operand:SI 2 "logical_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 255)
+    {
+      emit_insn (gen_zero_extendqisi2 (operands[0],
+				       gen_lowpart (QImode, operands[1])));
+      DONE;
+    }
+}")
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r,z")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "r,L")))]
+  ""
+  "or	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=z,r")
+	(xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "L,r")))]
+  ""
+  "xor	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+(define_insn "rotlsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (set (reg:SI 18)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  ""
+  "rotl	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_31"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 31)))
+   (clobber (reg:SI 18))]
+  ""
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_16"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		   (const_int 16)))]
+  ""
+  "swap.w	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+  "
+{
+  static char rot_tab[] = {
+    000, 000, 000, 000, 000, 000, 010, 001,
+    001, 001, 011, 013, 003, 003, 003, 003,
+    003, 003, 003, 003, 003, 013, 012, 002,
+    002, 002, 010, 000, 000, 000, 000, 000,
+  };
+
+  int count, choice;
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    FAIL;
+  count = INTVAL (operands[2]);
+  choice = rot_tab[count];
+  if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1)
+    FAIL;
+  choice &= 7;
+  switch (choice)
+    {
+    case 0:
+      emit_move_insn (operands[0], operands[1]);
+      count -= (count & 16) * 2;
+      break;
+    case 3:
+     emit_insn (gen_rotlsi3_16 (operands[0], operands[1]));
+     count -= 16;
+     break;
+    case 1:
+    case 2:
+      {
+	rtx parts[2];
+	parts[0] = gen_reg_rtx (SImode);
+	parts[1] = gen_reg_rtx (SImode);
+	emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1]));
+	parts[choice-1] = operands[1];
+	emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8)));
+	emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8)));
+	emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1]));
+	count = (count & ~16) - 8;
+      }
+    }
+
+  for (; count > 0; count--)
+    emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+  for (; count < 0; count++)
+    emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+
+  DONE;
+}")
+
+(define_insn "*rotlhi3_8"
+  [(set (match_operand:HI 0 "arith_reg_operand" "=r")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
+		   (const_int 8)))]
+  ""
+  "swap.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "arith_reg_operand" "")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand" "")
+		   (match_operand:HI 2 "immediate_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 8)
+    FAIL;
+}")
+
+;;
+;; shift left
+
+(define_insn "ashlsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH3"
+  "shld	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashlsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0")
+		   (match_operand:SI 2 "const_int_operand" "M,K")))]
+  "CONST_OK_FOR_K (INTVAL (operands[2]))"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashlhi3_k"
+  [(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0")
+		   (match_operand:HI 2 "const_int_operand" "M,K")))]
+  "CONST_OK_FOR_K (INTVAL (operands[2]))"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashlsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  "! sh_dynamicalize_shift_p (operands[2])"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
+	       (const_string "6")]
+	      (const_string "8")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  ""
+  [(use (reg:SI 0))]
+  "
+{
+  gen_shifty_op (ASHIFT, operands);
+  DONE;
+}")
+
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+		   (ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+			      (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI 18))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && sh_dynamicalize_shift_p (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+  if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
+    {
+      emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (! immediate_operand (operands[2], GET_MODE (operands[2])))
+    FAIL;
+}")
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "arith_reg_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0")
+		   (match_operand:HI 2 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  ""
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+	       (const_string "4")]
+	      (const_string "6")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:HI 0 "arith_reg_operand" "")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "")
+		   (match_operand:HI 2 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  ""
+  [(use (reg:SI 0))]
+  "
+{
+  gen_shifty_hi_op (ASHIFT, operands);
+  DONE;
+}")
+
+;
+; arithmetic shift right
+;
+
+(define_insn "ashrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "M")))
+   (clobber (reg:SI 18))]
+  "INTVAL (operands[2]) == 1"
+  "shar	%0"
+  [(set_attr "type" "arith")])
+
+;; We can't do HImode right shifts correctly unless we start out with an
+;; explicit zero / sign extension; doing that would result in worse overall
+;; code, so just let the machine independent code widen the mode.
+;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 .
+
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrsi2_16"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+                     (const_int 16)))]
+  ""
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		     (const_int 16)))]
+  ""
+  [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16)))
+   (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrsi2_31"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 31)))
+   (clobber (reg:SI 18))]
+  ""
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 31)))
+   (clobber (reg:SI 18))]
+  ""
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_ashlsi_c (operands[0], operands[1]));
+  emit_insn (gen_subc1 (operands[0], operands[0], operands[0]));
+  DONE;
+}")
+
+(define_insn "ashlsi_c"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
+   (set (reg:SI 18) (lt:SI (match_dup 1)
+			   (const_int 0)))]
+  ""
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH3"
+  "shad	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashrsi3_n"
+  [(set (reg:SI 4)
+	(ashiftrt:SI (reg:SI 4)
+		     (match_operand:SI 0 "const_int_operand" "i")))
+   (clobber (reg:SI 18))
+   (clobber (reg:SI 17))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  ""
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+		   (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+				(match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI 18))])]
+  ""
+  "if (expand_ashiftrt (operands)) DONE; else FAIL;")
+
+;; logical shift right
+
+(define_insn "lshrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH3"
+  "shld	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+;;  Only the single bit shift clobbers the T bit.
+
+(define_insn "lshrsi3_m"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "M")))
+   (clobber (reg:SI 18))]
+  "CONST_OK_FOR_M (INTVAL (operands[2]))"
+  "shlr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "K")))]
+  "CONST_OK_FOR_K (INTVAL (operands[2]))
+   && ! CONST_OK_FOR_M (INTVAL (operands[2]))"
+  "shlr%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  "! sh_dynamicalize_shift_p (operands[2])"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
+	       (const_string "6")]
+	      (const_string "8")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+		     (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  ""
+  [(use (reg:SI 0))]
+  "
+{
+  gen_shifty_op (LSHIFTRT, operands);
+  DONE;
+}")
+
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+		   (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+				(match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI 18))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && sh_dynamicalize_shift_p (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+  if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
+    {
+      rtx count = copy_to_mode_reg (SImode, operands[2]);
+      emit_insn (gen_negsi2 (count, count));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], count));
+      DONE;
+    }
+  if (! immediate_operand (operands[2], GET_MODE (operands[2])))
+    FAIL;
+}")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashldi3_k"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:SI 18))]
+  ""
+  "shll	%R0\;rotcl	%S0"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_expand "ashldi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashift:DI (match_operand:DI 1 "arith_reg_operand" "")
+			      (match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI 18))])]
+  ""
+  "{ if (GET_CODE (operands[2]) != CONST_INT
+	 || INTVAL (operands[2]) != 1) FAIL;} ")
+
+;; ??? This should be a define expand.
+
+(define_insn "lshrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI 18))]
+  ""
+  "shlr	%S0\;rotcr	%R0"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_expand "lshrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+			       (match_operand:DI 2 "immediate_operand" "")))
+	     (clobber (reg:SI 18))])]
+  ""
+  "{ if (GET_CODE (operands[2]) != CONST_INT
+	 || INTVAL (operands[2]) != 1) FAIL;} ")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI 18))]
+  ""
+  "shar	%S0\;rotcr	%R0"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_expand "ashrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+				(match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI 18))])]
+  ""
+  "{ if (GET_CODE (operands[2]) != CONST_INT
+	 || INTVAL (operands[2]) != 1) FAIL; } ")
+
+;; combined left/right shift
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))]
+  "(unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI 0))]
+  "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL;
+   DONE;")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  "(unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI 0))]
+  "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL;
+   DONE;")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  "shl_and_kind (operands[2], operands[3], 0) == 1"
+ "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+   (set_attr "type" "arith")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI 18))]
+  "shl_and_kind (operands[2], operands[3], 0) == 2"
+ "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")]
+	      (const_string "10")))
+   (set_attr "type" "arith")])
+
+;; shift left / and combination with a scratch register: The combine pass
+;; does not accept the individual instructions, even though they are
+;; cheap.  But it needs a precise description so that it is usable after
+;; reload.
+(define_insn "and_shl_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(lshiftrt:SI (ashift:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+						     (match_operand:SI 2 "const_int_operand" "N,n"))
+					(match_operand:SI 3 "" "0,r"))
+				(match_operand:SI 4 "const_int_operand" "n,n"))
+		     (match_operand:SI 5 "const_int_operand" "n,n")))
+   (clobber (reg:SI 18))]
+  ""
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5))
+	       (const_string "10")]
+	      (const_string "12")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(lshiftrt:SI (ashift:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+						     (match_operand:SI 2 "const_int_operand" "N,n"))
+					(match_operand:SI 3 "register_operand" "0,r"))
+				(match_operand:SI 4 "const_int_operand" "n,n"))
+		     (match_operand:SI 5 "const_int_operand" "n,n")))
+   (clobber (reg:SI 18))]
+  ""
+  [(use (reg:SI 0))]
+  "
+{
+  rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1];
+
+  if (INTVAL (operands[2]))
+    {
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  emit_insn (gen_andsi3 (operands[0], operands[0], and_source));
+  operands[2] = operands[4];
+  gen_shifty_op (ASHIFT, operands);
+  if (INTVAL (operands[5]))
+    {
+      operands[2] = operands[5];
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  DONE;
+}")
+
+;; signed left/right shift combination.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+				    (match_operand:SI 2 "const_int_operand" "n"))
+			 (match_operand:SI 3 "const_int_operand" "n")
+			 (const_int 0)))
+   (clobber (reg:SI 18))]
+  ""
+  [(use (reg:SI 0))]
+  "if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1])) FAIL;
+   DONE;")
+
+(define_insn "shl_sext_ext"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				    (match_operand:SI 2 "const_int_operand" "n"))
+			 (match_operand:SI 3 "const_int_operand" "n")
+			 (const_int 0)))
+   (clobber (reg:SI 18))]
+  "(unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+    (set_attr "type" "arith")])
+
+(define_insn "shl_sext_sub"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				    (match_operand:SI 2 "const_int_operand" "n"))
+			 (match_operand:SI 3 "const_int_operand" "n")
+			 (const_int 0)))
+   (clobber (reg:SI 18))]
+  "(shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")]
+	      (const_string "14")))
+    (set_attr "type" "arith")])
+
+;; These patterns are found in expansions of DImode shifts by 16, and
+;; allow the xtrct instruction to be generated from C source.
+
+(define_insn "xtrct_left"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+			   (const_int 16))
+ 	        (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0")
+			     (const_int 16))))]
+  ""
+  "xtrct	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "xtrct_right"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			     (const_int 16))
+ 	        (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r")
+			   (const_int 16))))]
+  ""
+  "xtrct	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negc"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(neg:SI (plus:SI (reg:SI 18)
+			 (match_operand:SI 1 "arith_reg_operand" "r"))))
+   (set (reg:SI 18)
+	(ne:SI (ior:SI (reg:SI 18) (match_dup 1))
+	       (const_int 0)))]
+  ""
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "")))
+   (clobber (reg:SI 18))]
+  ""
+  "
+{
+  int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
+  int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
+
+  rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
+  rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
+
+  rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
+  rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
+
+  emit_insn (gen_clrt ());
+  emit_insn (gen_negc (low_dst, low_src));
+  emit_insn (gen_negc (high_dst, high_src));
+  DONE;
+}")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  ""
+  "neg	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  ""
+  "not	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))]
+  ""
+  "extu.w	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  ""
+  "extu.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  ""
+  "extu.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+;; ??? Or perhaps it should be dropped?
+
+/* There is no point in defining extendsidi2; convert_move generates good
+   code for that.  */
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_movsrc_operand" "r,m")))]
+  ""
+  "@
+	exts.w	%1,%0
+   	mov.w	%1,%0"
+  [(set_attr "type" "arith,load")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+  ""
+  "@
+	exts.b	%1,%0
+	mov.b	%1,%0"
+  [(set_attr "type" "arith,load")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+  ""
+  "@
+	exts.b	%1,%0
+	mov.b	%1,%0"
+  [(set_attr "type" "arith,load")])
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; define push and pop so it is easy for sh.c
+
+(define_expand "push"
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "register_operand" "r,l,x"))]
+  ""
+  "")
+
+(define_expand "pop"
+  [(set (match_operand:SI 0 "register_operand" "=r,l,x")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  ""
+  "")
+
+(define_expand "push_e"
+  [(parallel [(set (mem:SF (pre_dec:SI (reg:SI 15)))
+		   (match_operand:SF 0 "" ""))
+	      (use (reg:PSI 48))
+	      (clobber (scratch:SI))])]
+  ""
+  "")
+
+(define_insn "push_fpul"
+  [(set (mem:SF (pre_dec:SI (reg:SI 15))) (reg:SF 22))]
+  "TARGET_SH3E"
+  "sts.l	fpul,@-r15"
+  [(set_attr "type" "store")
+   (set_attr "hit_stack" "yes")])
+
+;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4,
+;; so use that.
+(define_expand "push_4"
+  [(parallel [(set (mem:DF (pre_dec:SI (reg:SI 15))) (match_operand:DF 0 "" ""))
+	      (use (reg:PSI 48))
+	      (clobber (scratch:SI))])]
+  ""
+  "")
+
+(define_expand "pop_e"
+  [(parallel [(set (match_operand:SF 0 "" "")
+	      (mem:SF (post_inc:SI (reg:SI 15))))
+	      (use (reg:PSI 48))
+	      (clobber (scratch:SI))])]
+  ""
+  "")
+
+(define_insn "pop_fpul"
+  [(set (reg:SF 22) (mem:SF (post_inc:SI (reg:SI 15))))]
+  "TARGET_SH3E"
+  "lds.l	@r15+,fpul"
+  [(set_attr "type" "load")
+   (set_attr "hit_stack" "yes")])
+
+(define_expand "pop_4"
+  [(parallel [(set (match_operand:DF 0 "" "")
+		   (mem:DF (post_inc:SI (reg:SI 15))))
+	      (use (reg:PSI 48))
+	      (clobber (scratch:SI))])]
+  ""
+  "")
+
+;; These two patterns can happen as the result of optimization, when
+;; comparisons get simplified to a move of zero or 1 into the T reg.
+;; They don't disappear completely, because the T reg is a fixed hard reg.
+
+(define_insn "clrt"
+  [(set (reg:SI 18) (const_int 0))]
+  ""
+  "clrt")
+
+(define_insn "sett"
+  [(set (reg:SI 18) (const_int 1))]
+  ""
+  "sett")
+
+;; t/r is first, so that it will be preferred over r/r when reloading a move
+;; of a pseudo-reg into the T reg
+(define_insn "movsi_i"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,<,xl,x,l,r")
+	(match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,mr,xl,t,r,x,l,r,>,>,i"))]
+  "
+   ! TARGET_SH3E
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	cmp/pl	%1
+	mov.l	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	movt	%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "*,pcload_si,move,load_si,move,move,store,store,pstore,move,load,pload,pcload_si")
+   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (subreg:SI (reg:SF 38 fr14) 0) into T (compiling stdlib/strtod.c -m3e -O2)
+;; ??? This allows moves from macl to fpul to be recognized, but these moves
+;; will require a reload.
+(define_insn "movsi_ie"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,y,r,y,r,y")
+	(match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,mr,xl,t,r,x,l,r,>,>,>,i,r,y,y"))]
+  "TARGET_SH3E
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	cmp/pl	%1
+	mov.l	%1,%0
+	sts	%1,%0
+	movt	%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	fake	%1,%0
+	lds	%1,%0
+	sts	%1,%0
+	! move optimized away"
+  [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,load,pcload_si,gp_fpul,gp_fpul,nil")
+   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
+
+(define_insn "movsi_i_lowpart"
+  [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "=r,r,r,r,r,m,r"))
+	(match_operand:SI 1 "general_movsrc_operand" "Q,rI,mr,xl,t,r,i"))]
+   "register_operand (operands[0], SImode)
+    || register_operand (operands[1], SImode)"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	movt	%0
+	mov.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload,move,load,move,move,store,pcload")])
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "general_movsrc_operand" ""))]
+  ""
+  "{ if (prepare_move_operands (operands, SImode)) DONE; }")
+
+(define_expand "ic_invalidate_line"
+  [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r")
+				(match_dup 1)] 12)
+	      (clobber (scratch:SI))])]
+  "TARGET_HARD_SH4"
+  "
+{
+  operands[0] = force_reg (Pmode, operands[0]);
+  operands[1] = force_reg (Pmode, GEN_INT (0xf0000008));
+}")
+
+;; The address %0 is assumed to be 4-aligned at least.  Thus, by ORing
+;; 0xf0000008, we get the low-oder bits *1*00 (binary), ;; which fits
+;; the requirement *0*00 for associative address writes.  The alignment of
+;; %0 implies that its least significant bit is cleared,
+;; thus we clear the V bit of a matching entry if there is one.
+(define_insn "ic_invalidate_line_i"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r,r")
+		     (match_operand:SI 1 "register_operand" "r,r")] 12)
+   (clobber (match_scratch:SI 2 "=&r,1"))]
+  "TARGET_HARD_SH4"
+  "ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%r1,%r2\;mov.l\\t%0,@%2"
+  [(set_attr "length" "8")])
+
+(define_insn "movqi_i"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l")
+	(match_operand:QI 1 "general_movsrc_operand"  "ri,m,r,t,l,r"))]
+  "arith_reg_operand (operands[0], QImode)
+   || arith_reg_operand (operands[1], QImode)"
+  "@
+	mov	%1,%0
+	mov.b	%1,%0
+	mov.b	%1,%0
+	movt	%0
+	sts	%1,%0
+	lds	%1,%0"
+ [(set_attr "type" "move,load,store,move,move,move")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand"  ""))]
+  ""
+  "{ if (prepare_move_operands (operands, QImode)) DONE; }")
+
+(define_insn "movhi_i"
+  [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,r,m,r,l,r")
+	(match_operand:HI 1 "general_movsrc_operand" "Q,rI,m,t,r,l,r,i"))]
+  "arith_reg_operand (operands[0], HImode)
+   || arith_reg_operand (operands[1], HImode)"
+  "@
+	mov.w	%1,%0
+	mov	%1,%0
+	mov.w	%1,%0
+	movt	%0
+	mov.w	%1,%0
+	sts	%1,%0
+	lds	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload,move,load,move,store,move,move,pcload")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_movdst_operand" "")
+	(match_operand:HI 1 "general_movsrc_operand"  ""))]
+  ""
+  "{ if (prepare_move_operands (operands, HImode)) DONE; }")
+
+;; ??? This should be a define expand.
+
+;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
+;; compiled with -m2 -ml -O3 -funroll-loops
+(define_insn ""
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
+	(match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I,i,x,r"))]
+  "arith_reg_operand (operands[0], DImode)
+   || arith_reg_operand (operands[1], DImode)"
+  "* return output_movedouble (insn, operands, DImode);"
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.  
+
+(define_split
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  int regno;
+
+  if ((GET_CODE (operands[0]) == MEM
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (GET_CODE (operands[1]) == MEM
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  if (GET_CODE (operands[0]) == REG)
+    regno = REGNO (operands[0]);
+  else if (GET_CODE (operands[0]) == SUBREG)
+    regno = REGNO (SUBREG_REG (operands[0])) + SUBREG_WORD (operands[0]);
+  else if (GET_CODE (operands[0]) == MEM)
+    regno = -1;
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DImode);
+      operands[3] = operand_subword (operands[1], 0, 0, DImode);
+      operands[4] = operand_subword (operands[0], 1, 0, DImode);
+      operands[5] = operand_subword (operands[1], 1, 0, DImode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DImode);
+      operands[3] = operand_subword (operands[1], 1, 0, DImode);
+      operands[4] = operand_subword (operands[0], 0, 0, DImode);
+      operands[5] = operand_subword (operands[1], 0, 0, DImode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+}")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  ""
+  "{ if ( prepare_move_operands (operands, DImode)) DONE; }")
+
+;; ??? This should be a define expand.
+
+(define_insn "movdf_k"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
+  "(! TARGET_SH4 || reload_completed
+    /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
+    || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3
+    || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3)
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+  "* return output_movedouble (insn, operands, DFmode);"
+  [(set_attr "length" "4")
+   (set_attr "type" "move,pcload,load,store")])
+
+;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
+;; However, the d/F/c/z alternative cannot be split directly; it is converted
+;; with special code in machine_dependent_reorg into a load of the R0_REG and
+;; the d/m/c/X alternative, which is split later into single-precision
+;; instructions.  And when not optimizing, no splits are done before fixing
+;; up pcloads, so we need usable length information for that.
+(define_insn "movdf_i4"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
+	(match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r"))
+   (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))]
+  "TARGET_SH4
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+  "@
+	fmov	%1,%0
+	#
+	#
+	fmov.d	%1,%0
+	fmov.d	%1,%0
+	#
+	#
+	#
+	#
+	#"
+  [(set_attr_alternative "length"
+     [(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4))
+      (const_int 4)
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
+      (const_int 4)
+      (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
+      (const_int 8) (const_int 8)])
+   (set_attr "type" "fmove,move,pcload,load,store,pcload,load,store,load,load")])
+
+;; Moving DFmode between fp/general registers through memory
+;; (the top of the stack) is faster than moving through fpul even for
+;; little endian.  Because the type of an instruction is important for its
+;; scheduling,  it is beneficial to split these operations, rather than
+;; emitting them in one single chunk, even if this will expose a stack
+;; use that will prevent scheduling of other stack accesses beyond this
+;; instruction.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (match_scratch:SI 3 "=X"))]
+  "TARGET_SH4 && reload_completed
+   && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
+  [(const_int 0)]
+  "
+{
+  rtx insn, tos;
+
+  tos = gen_rtx (MEM, DFmode, gen_rtx (PRE_DEC, Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2]));
+  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX);
+  tos = gen_rtx (MEM, DFmode, gen_rtx (POST_INC, Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2]));
+  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX);
+  DONE;
+}")
+
+;; local-alloc sometimes allocates scratch registers even when not required,
+;; so we must be prepared to handle these.
+
+;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k.
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (match_scratch:SI 3 "X"))]
+  "TARGET_SH4
+   && reload_completed
+   && true_regnum (operands[0]) < 16
+   && true_regnum (operands[1]) < 16"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  /* If this was a reg <-> mem operation with base + index reg addressing,
+     we have to handle this in a special way.  */
+  rtx mem = operands[0];
+  int store_p = 1;
+  if (! memory_operand (mem, DFmode))
+    {
+      mem = operands[1];
+      store_p = 0;
+    }
+  if (GET_CODE (mem) == SUBREG && SUBREG_WORD (mem) == 0)
+    mem = SUBREG_REG (mem);
+  if (GET_CODE (mem) == MEM)
+    {
+      rtx addr = XEXP (mem, 0);
+      if (GET_CODE (addr) == PLUS
+	  && GET_CODE (XEXP (addr, 0)) == REG
+	  && GET_CODE (XEXP (addr, 1)) == REG)
+	{
+	  int offset;
+	  rtx reg0 = gen_rtx (REG, Pmode, 0);
+	  rtx regop = operands[store_p], word0 ,word1;
+
+	  if (GET_CODE (regop) == SUBREG)
+	    regop = alter_subreg (regop);
+	  if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1)))
+	    offset = 2;
+	  else
+	    offset = 4;
+	  mem = copy_rtx (mem);
+	  PUT_MODE (mem, SImode);
+	  word0 = gen_rtx(SUBREG, SImode, regop, 0);
+	  emit_insn (store_p
+		     ? gen_movsi_ie (mem, word0) : gen_movsi_ie (word0, mem));
+	  emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	  mem = copy_rtx (mem);
+	  word1 = gen_rtx(SUBREG, SImode, regop, 1);
+	  emit_insn (store_p
+		     ? gen_movsi_ie (mem, word1) : gen_movsi_ie (word1, mem));
+	  emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	  DONE;
+	}
+    }
+}")
+
+;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (reg:SI 0))]
+  "TARGET_SH4 && reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "reload_indf"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=f")
+		   (match_operand:DF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI 48))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  ""
+  "")
+
+(define_expand "reload_outdf"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f")
+		   (match_operand:DF 1 "register_operand" "af,r"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])]
+  ""
+  "")
+
+;; Simplify no-op moves.
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 "X"))]
+  "TARGET_SH3E && reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 0) (match_dup 0))]
+  "")
+
+;; fmovd substitute post-reload splits
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (match_scratch:SI 3 "X"))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && true_regnum (operands[0]) >= FIRST_FP_REG
+   && true_regnum (operands[1]) >= FIRST_FP_REG"
+  [(const_int 0)]
+  "
+{
+  int dst = true_regnum (operands[0]), src = true_regnum (operands[1]);
+  emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst),
+			   gen_rtx (REG, SFmode, src), operands[2]));
+  emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst + 1),
+			   gen_rtx (REG, SFmode, src + 1), operands[2]));
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(mem:DF (match_operand:SI 1 "register_operand" "")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (match_scratch:SI 3 "X"))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && true_regnum (operands[0]) >= FIRST_FP_REG
+   && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
+  [(const_int 0)]
+  "
+{
+  int regno = true_regnum (operands[0]);
+  rtx insn;
+  rtx mem2 = gen_rtx (MEM, SFmode, gen_rtx (POST_INC, Pmode, operands[1]));
+
+  insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode,
+					   regno + !! TARGET_LITTLE_ENDIAN),
+				  mem2, operands[2]));
+  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[1], NULL_RTX);
+  insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode,
+					   regno + ! TARGET_LITTLE_ENDIAN),
+				  gen_rtx (MEM, SFmode, operands[1]),
+				  operands[2]));
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (match_scratch:SI 3 "X"))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && true_regnum (operands[0]) >= FIRST_FP_REG"
+  [(const_int 0)]
+  "
+{
+  int regno = true_regnum (operands[0]);
+  rtx addr, insn, adjust = NULL_RTX;
+  rtx mem2 = copy_rtx (operands[1]);
+  rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN);
+  rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN);
+
+  PUT_MODE (mem2, SFmode);
+  operands[1] = copy_rtx (mem2);
+  addr = XEXP (mem2, 0);
+  if (GET_CODE (addr) != POST_INC)
+    {
+      /* If we have to modify the stack pointer, the value that we have
+	 read with post-increment might be modified by an interrupt,
+	 so write it back.  */
+      if (REGNO (addr) == STACK_POINTER_REGNUM)
+	adjust = gen_push_e (reg0);
+      else
+	adjust = gen_addsi3 (addr, addr, GEN_INT (-4));
+      XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
+    }
+  addr = XEXP (addr, 0);
+  insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2]));
+  REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX);
+  insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+  if (adjust)
+    emit_insn (adjust);
+  else
+    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX);
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (match_scratch:SI 3 "X"))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && true_regnum (operands[1]) >= FIRST_FP_REG"
+  [(const_int 0)]
+  "
+{
+  int regno = true_regnum (operands[1]);
+  rtx insn, addr, adjust = NULL_RTX;
+
+  operands[0] = copy_rtx (operands[0]);
+  PUT_MODE (operands[0], SFmode);
+  insn = emit_insn (gen_movsf_ie (operands[0],
+				  gen_rtx (REG, SFmode,
+					   regno + ! TARGET_LITTLE_ENDIAN),
+				  operands[2]));
+  operands[0] = copy_rtx (operands[0]);
+  addr = XEXP (operands[0], 0);
+  if (GET_CODE (addr) != PRE_DEC)
+    {
+      adjust = gen_addsi3 (addr, addr, GEN_INT (4));
+      emit_insn_before (adjust, insn);
+      XEXP (operands[0], 0) = addr = gen_rtx (PRE_DEC, SImode, addr);
+    }
+  addr = XEXP (addr, 0);
+  if (! adjust)
+    REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
+  insn = emit_insn (gen_movsf_ie (operands[0],
+				  gen_rtx (REG, SFmode,
+					   regno + !! TARGET_LITTLE_ENDIAN),
+				  operands[2]));
+  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
+  DONE;
+}")
+
+;; The '&' for operand 2 is not really true, but push_secondary_reload
+;; insists on it.
+;; Operand 1 must accept FPUL_REGS in case fpul is reloaded to memory,
+;; to avoid a bogus tertiary reload.
+;; We need a tertiary reload when a floating point register is reloaded
+;; to memory, so the predicate for operand 0 must accept this, while the 
+;; constraint of operand 1 must reject the secondary reload register.
+;; Thus, the secondary reload register for this case has to be GENERAL_REGS,
+;; too.
+;; By having the predicate for operand 0 reject any register, we make
+;; sure that the ordinary moves that just need an intermediate register
+;; won't get a bogus tertiary reload.
+;; We use tertiary_reload_operand instead of memory_operand here because
+;; memory_operand rejects operands that are not directly addressible, e.g.:
+;; (mem:SF (plus:SI (reg:SI 14 r14)
+;;         (const_int 132)))
+
+(define_expand "reload_outsf"
+  [(parallel [(set (match_operand:SF 2 "register_operand" "=&r")
+		   (match_operand:SF 1 "register_operand" "y"))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_operand:SF 0 "tertiary_reload_operand" "=m")
+		   (match_dup 2))
+	      (clobber (scratch:SI))])]
+  ""
+  "")
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.  
+
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  int regno;
+
+  if ((GET_CODE (operands[0]) == MEM
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (GET_CODE (operands[1]) == MEM
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  if (GET_CODE (operands[0]) == REG)
+    regno = REGNO (operands[0]);
+  else if (GET_CODE (operands[0]) == SUBREG)
+    regno = REGNO (SUBREG_REG (operands[0])) + SUBREG_WORD (operands[0]);
+  else if (GET_CODE (operands[0]) == MEM)
+    regno = -1;
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 0, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 1, 0, DFmode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 1, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 0, 0, DFmode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+}")
+
+;; If a base address generated by LEGITIMIZE_ADDRESS for SImode is
+;; used only once, let combine add in the index again.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "" ""))
+   (clobber (match_operand 2 "register_operand" ""))]
+  "! reload_in_progress && ! reload_completed"
+  [(use (reg:SI 0))]
+  "
+{
+  rtx addr, reg, const_int;
+
+  if (GET_CODE (operands[1]) != MEM)
+    FAIL;
+  addr = XEXP (operands[1], 0);
+  if (GET_CODE (addr) != PLUS)
+    FAIL;
+  reg = XEXP (addr, 0);
+  const_int = XEXP (addr, 1);
+  if (GET_CODE (reg) != REG || GET_CODE (const_int) != CONST_INT)
+    FAIL;
+  emit_move_insn (operands[2], const_int);
+  emit_move_insn (operands[0],
+		  change_address (operands[1], VOIDmode,
+				  gen_rtx (PLUS, SImode, reg, operands[2])));
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:SI 1 "" "")
+	(match_operand:SI 0 "register_operand" ""))
+   (clobber (match_operand 2 "register_operand" ""))]
+  "! reload_in_progress && ! reload_completed"
+  [(use (reg:SI 0))]
+  "
+{
+  rtx addr, reg, const_int;
+
+  if (GET_CODE (operands[1]) != MEM)
+    FAIL;
+  addr = XEXP (operands[1], 0);
+  if (GET_CODE (addr) != PLUS)
+    FAIL;
+  reg = XEXP (addr, 0);
+  const_int = XEXP (addr, 1);
+  if (GET_CODE (reg) != REG || GET_CODE (const_int) != CONST_INT)
+    FAIL;
+  emit_move_insn (operands[2], const_int);
+  emit_move_insn (change_address (operands[1], VOIDmode,
+				  gen_rtx (PLUS, SImode, reg, operands[2])),
+		  operands[0]);
+  DONE;
+}")
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  ""
+  "
+{
+  if (prepare_move_operands (operands, DFmode)) DONE;
+  if (TARGET_SH4)
+    {
+      if (no_new_pseudos)
+	{
+	  /* ??? FIXME: This is only a stopgap fix.  There is no guarantee
+	     that fpscr is in the right state. */
+	  emit_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+	  DONE;
+	}
+      emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+      /* We need something to tag possible REG_LIBCALL notes on to.  */
+      if (TARGET_FPU_SINGLE && rtx_equal_function_value_matters
+	  && GET_CODE (operands[0]) == REG)
+	emit_insn (gen_mov_nop (operands[0]));
+      DONE;
+    }
+}")
+
+
+(define_insn "movsf_i"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
+	(match_operand:SF 1 "general_movsrc_operand"  "r,I,FQ,mr,r,r,l"))]
+  "
+   (! TARGET_SH3E
+    /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */
+    || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3
+    || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3)
+   && (arith_reg_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode))"
+  "@
+	mov	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	lds	%1,%0
+	sts	%1,%0"
+  [(set_attr "type" "move,move,pcload,load,store,move,move")])
+
+;; We may not split the ry/yr/XX alternatives to movsi_ie, since
+;; update_flow_info would not know where to put REG_EQUAL notes
+;; when the destination changes mode.
+(define_insn "movsf_ie"
+  [(set (match_operand:SF 0 "general_movdst_operand"
+	 "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,y")
+	(match_operand:SF 1 "general_movsrc_operand"
+	  "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y"))
+   (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))]
+
+  "TARGET_SH3E
+   && (arith_reg_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode))"
+  "@
+	fmov	%1,%0
+	mov	%1,%0
+	fldi0	%0
+	fldi1	%0
+	#
+	fmov.s	%1,%0
+	fmov.s	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	lds.l	%1,%0
+	#
+	sts	%1,%0
+	lds	%1,%0
+	! move optimized away"
+  [(set_attr "type" "fmove,move,fmove,fmove,pcload,load,store,pcload,load,store,fmove,fmove,load,*,gp_fpul,gp_fpul,nil")
+   (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,4,2,2,0")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (reg:SI 22))]
+  ""
+  [(parallel [(set (reg:SF 22) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_dup 0) (reg:SF 22))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_movdst_operand" "")
+        (match_operand:SF 1 "general_movsrc_operand" ""))]
+  ""
+  "
+{
+  if (prepare_move_operands (operands, SFmode))
+    DONE;
+  if (TARGET_SH3E)
+    {
+      if (no_new_pseudos)
+	{
+	  /* ??? FIXME: This is only a stopgap fix.  There is no guarantee
+	     that fpscr is in the right state. */
+	  emit_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+	  DONE;
+	}
+      emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+      /* We need something to tag possible REG_LIBCALL notes on to.  */
+      if (! TARGET_FPU_SINGLE && rtx_equal_function_value_matters
+	  && GET_CODE (operands[0]) == REG)
+	emit_insn (gen_mov_nop (operands[0]));
+      DONE;
+    }
+}")
+
+(define_insn "mov_nop"
+  [(set (match_operand 0 "register_operand" "") (match_dup 0))]
+  "TARGET_SH3E"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "nil")])
+
+(define_expand "reload_insf"
+  [(parallel [(set (match_operand:SF 0 "register_operand" "=f")
+		   (match_operand:SF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI 48))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  ""
+  "")
+
+(define_expand "reload_insi"
+  [(parallel [(set (match_operand:SF 0 "register_operand" "=y")
+		   (match_operand:SF 1 "immediate_operand" "FQ"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  ""
+  "")
+
+(define_insn "*movsi_y"
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
+	(match_operand:SI 1 "immediate_operand" "Qi,I"))
+   (clobber (match_scratch:SI 3 "=&z,r"))]
+  "TARGET_SH3E
+   && (reload_in_progress || reload_completed)"
+  "#"
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "y")
+	(match_operand:SI 1 "immediate_operand" "I"))
+   (clobber (match_operand:SI 2 "register_operand" "r"))]
+  ""
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "y")
+	(match_operand:SI 1 "memory_operand" ">"))
+   (clobber (reg:SI 0))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+(define_insn "branch_true"
+  [(set (pc) (if_then_else (ne (reg:SI 18) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  ""
+  "* return output_branch (1, insn, operands);"
+  [(set_attr "type" "cbranch")])
+
+(define_insn "branch_false"
+  [(set (pc) (if_then_else (eq (reg:SI 18) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  ""
+  "* return output_branch (0, insn, operands);"
+  [(set_attr "type" "cbranch")])
+
+;; Patterns to prevent reorg from re-combining a condbranch with a branch
+;; which destination is too far away.
+;; The const_int_operand is distinct for each branch target; it avoids
+;; unwanted matches with redundant_insn.
+(define_insn "block_branch_redirect"
+  [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] 4))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; This one has the additional purpose to record a possible scratch register
+;; for the following branch.
+(define_insn "indirect_jump_scratch"
+  [(set (match_operand 0 "register_operand" "r")
+	(unspec [(match_operand 1 "const_int_operand" "")] 4))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Conditional branch insns
+
+(define_expand "beq"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "from_compare (operands, EQ);")
+
+(define_expand "bne"
+  [(set (pc)
+	(if_then_else (eq (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "from_compare (operands, EQ);")
+
+(define_expand "bgt"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "from_compare (operands, GT);")
+
+(define_expand "blt"
+  [(set (pc)
+	(if_then_else (eq (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+    {
+      rtx tmp = sh_compare_op0;
+      sh_compare_op0 = sh_compare_op1;
+      sh_compare_op1 = tmp;
+      emit_insn (gen_bgt (operands[0]));
+      DONE;
+    }
+  from_compare (operands, GE);
+}")
+
+(define_expand "ble"
+  [(set (pc)
+	(if_then_else (eq (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  if (TARGET_SH3E
+      && TARGET_IEEE
+      && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+    {
+      rtx tmp = sh_compare_op0;
+      sh_compare_op0 = sh_compare_op1;
+      sh_compare_op1 = tmp;
+      emit_insn (gen_bge (operands[0]));
+      DONE;
+    }
+  from_compare (operands, GT);
+}")
+
+(define_expand "bge"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  if (TARGET_SH3E
+      && ! TARGET_IEEE
+      && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+    {
+      rtx tmp = sh_compare_op0;
+      sh_compare_op0 = sh_compare_op1;
+      sh_compare_op1 = tmp;
+      emit_insn (gen_ble (operands[0]));
+      DONE;
+    }
+  from_compare (operands, GE);
+}")
+
+(define_expand "bgtu"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "from_compare (operands, GTU); ")
+
+(define_expand "bltu"
+  [(set (pc)
+		  (if_then_else (eq (reg:SI 18) (const_int 0))
+				(label_ref (match_operand 0 "" ""))
+				(pc)))]
+  ""
+  "from_compare (operands, GEU);")
+
+(define_expand "bgeu"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "from_compare (operands, GEU);")
+
+(define_expand "bleu"
+  [(set (pc)
+	(if_then_else (eq (reg:SI 18) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "from_compare (operands, GTU);")
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  /* The length is 16 if the delay slot is unfilled.  */
+  if (get_attr_length(insn) > 4)
+    return output_far_jump(insn, operands[0]);
+  else
+    return   \"bra	%l0%#\";
+}"
+  [(set_attr "type" "jump")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "calli"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI 48))
+   (clobber (reg:SI 17))]
+  ""
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI 48))
+   (clobber (reg:SI 17))]
+  ""
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+			    (match_operand 1 "" ""))
+	      (use (reg:SI 48))
+	      (clobber (reg:SI 17))])]
+  ""
+  "operands[0] = force_reg (SImode, XEXP (operands[0], 0));")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (use (reg:SI 48))
+	      (clobber (reg:SI 17))])]
+  ""
+  "operands[1] = force_reg (SImode, XEXP (operands[1], 0));")
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "arith_reg_operand" "r"))]
+  ""
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+;; The use of operand 1 / 2 helps us distinguish case table jumps
+;; which can be present in structured code from indirect jumps which can not
+;; be present in structured code.  This allows -fprofile-arcs to work.
+
+;; For SH1 processors.
+(define_insn "casesi_jump_1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp  @%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+;; For all later processors.
+(define_insn "casesi_jump_2"
+  [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r")
+		      (label_ref (match_operand 1 "" ""))))
+   (use (label_ref (match_operand 2 "" "")))]
+  "! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn"
+  "braf	%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+;; Call subroutine returning any type.
+;; ??? This probably doesn't work.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "TARGET_SH3E"
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "dect"
+  [(set (reg:SI 18)
+	(eq:SI (match_operand:SI 0 "arith_reg_operand" "+r") (const_int 1)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  "TARGET_SH2"
+  "dt	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; Load address of a label. This is only generated by the casesi expand,
+;; and by machine_dependent_reorg (fixing up fp moves).
+;; This must use unspec, because this only works for labels that are
+;; within range,
+
+(define_insn "mova"
+  [(set (reg:SI 0)
+	(unspec [(label_ref (match_operand 0 "" ""))] 1))]
+  ""
+  "mova	%O0,r0"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+;; case instruction for switch statements.
+
+;; Operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "arith_reg_operand" "")
+   (match_operand:SI 1 "arith_reg_operand" "")
+   (match_operand:SI 2 "arith_reg_operand" "")
+   (match_operand 3 "" "") (match_operand 4 "" "")]
+  ""
+  "
+{
+  rtx reg = gen_reg_rtx (SImode);
+  rtx reg2 = gen_reg_rtx (SImode);
+  operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  /* If optimizing, casesi_worker depends on the mode of the instruction
+     before label it 'uses' - operands[3].  */
+  emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4],
+			   reg));
+  emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3]));
+  if (TARGET_SH2)
+    emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3]));
+  else
+    emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3]));
+  /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to
+     operands[3], but to lab.  We will fix this up in
+     machine_dependent_reorg.  */
+  emit_barrier ();
+  DONE;
+}")
+
+(define_expand "casesi_0"
+  [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" ""))
+   (set (match_dup 4) (minus:SI (match_dup 4)
+				(match_operand:SI 1 "arith_operand" "")))
+   (set (reg:SI 18)
+	(gtu:SI (match_dup 4)
+		(match_operand:SI 2 "arith_reg_operand" "")))
+   (set (pc)
+	(if_then_else (ne (reg:SI 18)
+			  (const_int 0))
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+;; ??? reload might clobber r0 if we use it explicitly in the RTL before
+;; reload; using a R0_REGS pseudo reg is likely to give poor code.
+;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload.
+
+(define_insn "casesi_worker_0"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec [(match_operand 1 "register_operand" "0,r")
+		 (label_ref (match_operand 2 "" ""))] 2))
+   (clobber (match_scratch:SI 3 "=X,1"))
+   (clobber (match_scratch:SI 4 "=&z,z"))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec [(match_operand 1 "register_operand" "")
+		 (label_ref (match_operand 2 "" ""))] 2))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "! TARGET_SH2 && reload_completed"
+  [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1))
+   (parallel [(set (match_dup 0)
+	      (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI 0)))]
+  "LABEL_NUSES (operands[2])++;")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec [(match_operand 1 "register_operand" "")
+		 (label_ref (match_operand 2 "" ""))] 2))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH2 && reload_completed"
+  [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1))
+   (parallel [(set (match_dup 0)
+	      (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2))
+	      (clobber (match_dup 3))])]
+  "LABEL_NUSES (operands[2])++;")
+
+(define_insn "*casesi_worker"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec [(reg:SI 0) (match_operand 1 "register_operand" "0,r")
+		 (label_ref (match_operand 2 "" ""))] 2))
+   (clobber (match_scratch:SI 3 "=X,1"))]
+  ""
+  "*
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+
+  if (GET_CODE (diff_vec) != ADDR_DIFF_VEC)
+    abort ();
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return \"shll2	%1\;mov.l	@(r0,%1),%0\";
+    case HImode:
+      return \"add	%1,%1\;mov.w	@(r0,%1),%0\";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"mov.b	@(r0,%1),%0\;extu.b	%0,%0\";
+      return \"mov.b	@(r0,%1),%0\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "length" "4")])
+
+;; ??? This is not the proper place to invoke another compiler pass;
+;; Alas, there is no proper place to put it.
+;; ??? This is also an odd place for the call to emit_fpscr_use.  It
+;; would be all right if it were for an define_expand for return, but
+;; that doesn't mix with emitting a prologue.
+(define_insn "return"
+  [(return)]
+  "emit_fpscr_use (),
+   remove_dead_before_cse (),
+   reload_completed"
+  "%@	%#"
+  [(set_attr "type" "return")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "sh_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "sh_expand_epilogue ();")
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "movt"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(eq:SI (reg:SI 18) (const_int 1)))]
+  ""
+  "movt	%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "seq"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (EQ);")
+
+(define_expand "slt"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (LT);")
+
+(define_expand "sle"
+  [(match_operand:SI 0 "arith_reg_operand" "")]
+  ""
+  "
+{
+  rtx tmp = sh_compare_op0;
+  sh_compare_op0 = sh_compare_op1;
+  sh_compare_op1 = tmp;
+  emit_insn (gen_sge (operands[0]));
+  DONE;
+}")
+
+(define_expand "sgt"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (GT);")
+
+(define_expand "sge"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "
+{
+  if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+    {
+      if (TARGET_IEEE)
+	{
+	  rtx t_reg = gen_rtx (REG, SImode, T_REG);
+	  rtx lab = gen_label_rtx ();
+	  prepare_scc_operands (EQ);
+	  emit_jump_insn (gen_branch_true (lab));
+	  prepare_scc_operands (GT);
+	  emit_label (lab);
+	  emit_insn (gen_movt (operands[0]));
+	}
+      else
+	emit_insn (gen_movnegt (operands[0], prepare_scc_operands (LT)));
+      DONE;
+    }
+  operands[1] = prepare_scc_operands (GE);
+}")
+
+(define_expand "sgtu"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (GTU);")
+
+(define_expand "sltu"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (LTU);")
+
+(define_expand "sleu"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (LEU);")
+
+(define_expand "sgeu"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(match_dup 1))]
+  ""
+  "operands[1] = prepare_scc_operands (GEU);")
+
+;; sne moves the complement of the T reg to DEST like this:
+;;      cmp/eq ...
+;;      mov    #-1,temp
+;;      negc   temp,dest
+;;   This is better than xoring compare result with 1 because it does
+;;   not require r0 and further, the -1 may be CSE-ed or lifted out of a
+;;   loop.
+
+(define_expand "sne"
+  [(set (match_dup 2) (const_int -1))
+   (parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+		   (neg:SI (plus:SI (match_dup 1)
+				    (match_dup 2))))
+	      (set (reg:SI 18)
+		   (ne:SI (ior:SI (match_dup 1) (match_dup 2))
+			  (const_int 0)))])]  
+  ""
+  "
+{
+   operands[1] = prepare_scc_operands (EQ);
+   operands[2] = gen_reg_rtx (SImode);
+}")
+
+;; Use the same trick for FP sle / sge
+(define_expand "movnegt"
+  [(set (match_dup 2) (const_int -1))
+   (parallel [(set (match_operand 0 "" "")
+		   (neg:SI (plus:SI (match_dup 1)
+				    (match_dup 2))))
+	      (set (reg:SI 18)
+		   (ne:SI (ior:SI (match_operand 1 "" "") (match_dup 2))
+			  (const_int 0)))])]  
+  ""
+  "operands[2] = gen_reg_rtx (SImode);")
+
+;; Recognize mov #-1/negc/neg sequence, and change it to movt/add #-1.
+;; This prevents a regression that occurred when we switched from xor to
+;; mov/neg for sne.
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(plus:SI (reg:SI 18)
+		 (const_int -1)))]
+  ""
+  [(set (match_dup 0) (eq:SI (reg:SI 18) (const_int 1)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  "")
+
+;; -------------------------------------------------------------------------
+;; Instructions to cope with inline literal tables
+;; -------------------------------------------------------------------------
+
+; 2 byte integer in line
+
+(define_insn "consttable_2"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 2)]
+ ""
+ "*
+{
+  assemble_integer (operands[0], 2, 1);
+  return \"\";
+}"
+ [(set_attr "length" "2")
+ (set_attr "in_delay_slot" "no")])
+
+; 4 byte integer in line
+
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 4)]
+ ""
+ "*
+{
+  assemble_integer (operands[0], 4, 1);
+  return \"\";
+}"
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+; 8 byte integer in line
+
+(define_insn "consttable_8"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 6)]
+ ""
+ "*
+{
+  assemble_integer (operands[0], 8, 1);
+  return \"\";
+}"
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+; 4 byte floating point
+
+(define_insn "consttable_sf"
+ [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g")] 4)]
+ ""
+ "*
+{
+  union real_extract u;
+  bcopy ((char *) &CONST_DOUBLE_LOW (operands[0]), (char *) &u, sizeof u);
+  assemble_real (u.d, SFmode);
+  return \"\";
+}"
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+; 8 byte floating point
+
+(define_insn "consttable_df"
+ [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g")] 6)]
+ ""
+ "*
+{
+  union real_extract u;
+  bcopy ((char *) &CONST_DOUBLE_LOW (operands[0]), (char *) &u, sizeof u);
+  assemble_real (u.d, DFmode);
+  return \"\";
+}"
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+;; Alignment is needed for some constant tables; it may also be added for
+;; Instructions at the start of loops, or after unconditional branches.
+;; ??? We would get more accurate lengths if we did instruction
+;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used
+;; here is too conservative.
+
+; align to a two byte boundary
+
+(define_expand "align_2"
+ [(unspec_volatile [(const_int 1)] 1)]
+ ""
+ "")
+
+; align to a four byte boundary
+;; align_4 and align_log are instructions for the starts of loops, or
+;; after unconditional branches, which may take up extra room.
+
+(define_expand "align_4"
+ [(unspec_volatile [(const_int 2)] 1)]
+ ""
+ "")
+
+; align to a cache line boundary
+
+(define_insn "align_log"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 1)]
+ ""
+ ""
+ [(set_attr "length" "0")
+  (set_attr "in_delay_slot" "no")])
+
+; emitted at the end of the literal table, used to emit the
+; 32bit branch labels if needed.
+
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] 11)]
+  ""
+  "* return output_jump_label_table ();"
+  [(set_attr "in_delay_slot" "no")])
+
+;; -------------------------------------------------------------------------
+;; Misc
+;; -------------------------------------------------------------------------
+
+;; String/block move insn.
+
+(define_expand "movstrsi"
+  [(parallel [(set (mem:BLK (match_operand:BLK 0 "" ""))
+		   (mem:BLK (match_operand:BLK 1 "" "")))
+	      (use (match_operand:SI 2 "nonmemory_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 4))
+	      (clobber (reg:SI 5))
+	      (clobber (reg:SI 0))])]
+  ""
+  "
+{
+  if(expand_block_move (operands))
+     DONE;
+  else FAIL;
+}")
+
+(define_insn "block_move_real"
+  [(parallel [(set (mem:BLK (reg:SI 4))
+		   (mem:BLK (reg:SI 5)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 0))])]
+  "! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real"
+  [(parallel [(set (mem:BLK (reg:SI 4))
+		   (mem:BLK (reg:SI 5)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI 6))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 4))
+	      (clobber (reg:SI 5))
+	      (clobber (reg:SI 6))
+	      (clobber (reg:SI 0))])]
+  "! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_move_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI 4))
+		   (mem:BLK (reg:SI 5)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI 4))
+		   (mem:BLK (reg:SI 5)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI 6))
+	      (clobber (reg:SI 17))
+	      (clobber (reg:SI 4))
+	      (clobber (reg:SI 5))
+	      (clobber (reg:SI 6))
+	      (clobber (reg:SI 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 3))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; -------------------------------------------------------------------------
+;; Floating point instructions.
+;; -------------------------------------------------------------------------
+
+;; ??? All patterns should have a type attribute.
+
+(define_expand "fpu_switch0"
+  [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values"))
+   (set (match_dup 2) (match_dup 1))]
+  ""
+  "
+{
+  operands[1] = gen_rtx (MEM, PSImode, operands[0]);
+  RTX_UNCHANGING_P (operands[1]) = 1;
+  operands[2] = get_fpscr_rtx ();
+}")
+
+(define_expand "fpu_switch1"
+  [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values"))
+   (set (match_dup 1) (plus:SI (match_dup 0) (const_int 4)))
+   (set (match_dup 3) (match_dup 2))]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (SImode);
+  operands[2] = gen_rtx (MEM, PSImode, operands[1]);
+  RTX_UNCHANGING_P (operands[2]) = 1;
+  operands[3] = get_fpscr_rtx ();
+}")
+
+(define_expand "movpsi"
+  [(set (match_operand:PSI 0 "register_operand" "")
+	(match_operand:PSI 1 "general_movsrc_operand" ""))]
+  ""
+  "")
+
+;; The c / m alternative is a fake to guide reload to load directly into
+;; fpscr, since reload doesn't know how to use post-increment.
+;; GO_IF_LEGITIMATE_ADDRESS guards about bogus addresses before reload,
+;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's
+;; predicate after reload.
+;; The gp_fpul type for r/!c might look a bit odd, but it actually schedules
+;; like a gpr <-> fpul move.
+(define_insn "fpu_switch"
+  [(set (match_operand:PSI 0 "register_operand" "c,c,r,c,c,r,m,r")
+	(match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c"))]
+  "! reload_completed
+   || true_regnum (operands[0]) != FPSCR_REG || GET_CODE (operands[1]) != MEM
+   || GET_CODE (XEXP (operands[1], 0)) != PLUS"
+  "@
+	! precision stays the same
+	lds.l	%1,fpscr
+	mov.l	%1,%0
+	#
+	lds	%1,fpscr
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	fpscr,%0"
+  [(set_attr "length" "0,2,2,4,2,2,2,2")
+   (set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")])
+
+(define_split
+  [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))]
+  "find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
+  [(set (match_dup 0) (match_dup 0))]
+  "
+{
+  rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+					gen_rtx (MEM, PSImode,
+						 gen_rtx (POST_INC, Pmode,
+							  operands[0]))));
+  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX);
+}")
+
+(define_split
+  [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))]
+  ""
+  [(set (match_dup 0) (plus:SI (match_dup 0) (const_int -4)))]
+  "
+{
+  rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+					gen_rtx (MEM, PSImode,
+						 gen_rtx (POST_INC, Pmode,
+							  operands[0]))));
+  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX);
+}")
+
+;; ??? This uses the fp unit, but has no type indicating that.
+;; If we did that, this would either give a bogus latency or introduce
+;; a bogus FIFO constraint.
+;; Since this insn is currently only used for prologues/epilogues,
+;; it is probably best to claim no function unit, which matches the
+;; current setting.
+(define_insn "toggle_sz"
+  [(set (reg:PSI 48) (xor:PSI (reg:PSI 48) (const_int 1048576)))]
+  "TARGET_SH4"
+  "fschg")
+
+(define_expand "addsf3"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")
+   (match_operand:SF 2 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "{ expand_sf_binop (&gen_addsf3_i, operands); DONE; }")
+
+(define_insn "addsf3_i"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "arith_reg_operand" "%0")
+		 (match_operand:SF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fadd	%2,%0"
+  [(set_attr "type" "fp")])
+
+(define_expand "subsf3"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")
+   (match_operand:SF 2 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "{ expand_sf_binop (&gen_subsf3_i, operands); DONE; }")
+
+(define_insn "subsf3_i"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "arith_reg_operand" "0")
+		 (match_operand:SF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fsub	%2,%0"
+  [(set_attr "type" "fp")])
+
+;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
+;; register in feeding fp instructions.  Thus, we cannot generate fmac for
+;; mixed-precision SH4 targets.  To allow it to be still generated for the
+;; SH3E, we use a separate insn for SH3E mulsf3.
+
+(define_expand "mulsf3"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")
+   (match_operand:SF 2 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "
+{
+  if (TARGET_SH4)
+    expand_sf_binop (&gen_mulsf3_i4, operands);
+  else
+    emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mulsf3_i4"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "arith_reg_operand" "%0")
+		 (match_operand:SF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fmul	%2,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "mulsf3_ie"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "arith_reg_operand" "%0")
+		 (match_operand:SF 2 "arith_reg_operand" "f")))]
+  "TARGET_SH3E && ! TARGET_SH4"
+  "fmul	%2,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "*macsf3"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "arith_reg_operand" "%w")
+			  (match_operand:SF 2 "arith_reg_operand" "f"))
+		 (match_operand:SF 3 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand" "c"))]
+  "TARGET_SH3E && ! TARGET_SH4"
+  "fmac	fr0,%2,%0"
+  [(set_attr "type" "fp")])
+
+(define_expand "divsf3"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")
+   (match_operand:SF 2 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "{ expand_sf_binop (&gen_divsf3_i, operands); DONE; }")
+
+(define_insn "divsf3_i"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "arith_reg_operand" "0")
+		 (match_operand:SF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fdiv	%2,%0"
+  [(set_attr "type" "fdiv")])
+
+(define_expand "floatsisf2"
+  [(set (reg:SI 22)
+	(match_operand:SI 1 "arith_reg_operand" ""))
+   (parallel [(set (match_operand:SF 0 "arith_reg_operand" "")
+		   (float:SF (reg:SI 22)))
+	      (use (match_dup 2))])]
+  "TARGET_SH3E"
+  "
+{
+  if (TARGET_SH4)
+    {
+      emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22),
+			  operands[1]));
+      emit_sf_insn (gen_floatsisf2_i4 (operands[0], get_fpscr_rtx ()));
+      DONE;
+    }
+  operands[2] = get_fpscr_rtx ();
+}")
+
+(define_insn "floatsisf2_i4"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(float:SF (reg:SI 22)))
+   (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "float	fpul,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "*floatsisf2_ie"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(float:SF (reg:SI 22)))]
+  "TARGET_SH3E && ! TARGET_SH4"
+  "float	fpul,%0"
+  [(set_attr "type" "fp")])
+
+(define_expand "fix_truncsfsi2"
+  [(set (reg:SI 22)
+	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+   (set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(reg:SI 22))]
+  "TARGET_SH3E"
+  "
+{
+  if (TARGET_SH4)
+    {
+      emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[1], get_fpscr_rtx ()));
+      emit_insn (gen_rtx (SET, VOIDmode, operands[0],
+			  gen_rtx (REG, SImode, 22)));
+      DONE;
+    }
+}")
+
+(define_insn "fix_truncsfsi2_i4"
+  [(set (reg:SI 22)
+	(fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "ftrc	%0,fpul"
+  [(set_attr "type" "fp")])
+
+(define_insn "fix_truncsfsi2_i4_2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+   (use (reg:SI 48))
+   (clobber (reg:SI 22))]
+  "TARGET_SH4"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (reg:SI 22))]
+  "TARGET_SH4"
+  [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1)))
+	      (use (match_dup 2))])
+   (set (match_dup 0) (reg:SI 22))])
+
+(define_insn "*fixsfsi"
+  [(set (reg:SI 22)
+	(fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))]
+  "TARGET_SH3E && ! TARGET_SH4"
+  "ftrc	%0,fpul"
+  [(set_attr "type" "fp")])
+
+(define_insn "cmpgtsf_t"
+  [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
+			   (match_operand:SF 1 "arith_reg_operand" "f")))]
+  "TARGET_SH3E && ! TARGET_SH4"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "cmpeqsf_t"
+  [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+			   (match_operand:SF 1 "arith_reg_operand" "f")))]
+  "TARGET_SH3E && ! TARGET_SH4"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "ieee_ccmpeqsf_t"
+  [(set (reg:SI 18) (ior:SI (reg:SI 18)
+			    (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+				   (match_operand:SF 1 "arith_reg_operand" "f"))))]
+  "TARGET_SH3E && TARGET_IEEE && ! TARGET_SH4"
+  "* return output_ieee_ccmpeq (insn, operands);"
+  [(set_attr "length" "4")])
+
+
+(define_insn "cmpgtsf_t_i4"
+  [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
+			   (match_operand:SF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "cmpeqsf_t_i4"
+  [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+			   (match_operand:SF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "*ieee_ccmpeqsf_t_4"
+  [(set (reg:SI 18) (ior:SI (reg:SI 18)
+			    (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+				   (match_operand:SF 1 "arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && TARGET_SH4"
+  "* return output_ieee_ccmpeq (insn, operands);"
+  [(set_attr "length" "4")])
+
+(define_expand "cmpsf"
+  [(set (reg:SI 18) (compare (match_operand:SF 0 "arith_operand" "")
+			     (match_operand:SF 1 "arith_operand" "")))]
+  "TARGET_SH3E"
+  "
+{
+  sh_compare_op0 = operands[0];
+  sh_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "negsf2"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "{ expand_sf_unop (&gen_negsf2_i, operands); DONE; }")
+
+(define_insn "negsf2_i"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fneg	%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "sqrtsf2"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "{ expand_sf_unop (&gen_sqrtsf2_i, operands); DONE; }")
+
+(define_insn "sqrtsf2_i"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fsqrt	%0"
+  [(set_attr "type" "fdiv")])
+
+(define_expand "abssf2"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")]
+  "TARGET_SH3E"
+  "{ expand_sf_unop (&gen_abssf2_i, operands); DONE; }")
+
+(define_insn "abssf2_i"
+  [(set (match_operand:SF 0 "arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fabs	%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "adddf3"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")
+   (match_operand:DF 2 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_binop (&gen_adddf3_i, operands); DONE; }")
+
+(define_insn "adddf3_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "arith_reg_operand" "%0")
+		 (match_operand:DF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fadd	%2,%0"
+  [(set_attr "type" "dfp_arith")])
+
+(define_expand "subdf3"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")
+   (match_operand:DF 2 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_binop (&gen_subdf3_i, operands); DONE; }")
+
+(define_insn "subdf3_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "arith_reg_operand" "0")
+		  (match_operand:DF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fsub	%2,%0"
+  [(set_attr "type" "dfp_arith")])
+
+(define_expand "muldf3"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")
+   (match_operand:DF 2 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_binop (&gen_muldf3_i, operands); DONE; }")
+
+(define_insn "muldf3_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "arith_reg_operand" "%0")
+		 (match_operand:DF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fmul	%2,%0"
+  [(set_attr "type" "dfp_arith")])
+
+(define_expand "divdf3"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")
+   (match_operand:DF 2 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_binop (&gen_divdf3_i, operands); DONE; }")
+
+(define_insn "divdf3_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "arith_reg_operand" "0")
+		(match_operand:DF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fdiv	%2,%0"
+  [(set_attr "type" "dfdiv")])
+
+(define_expand "floatsidf2"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:SI 1 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "
+{
+  emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), operands[1]));
+  emit_df_insn (gen_floatsidf2_i (operands[0], get_fpscr_rtx ()));
+  DONE;
+}")
+
+(define_insn "floatsidf2_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(float:DF (reg:SI 22)))
+   (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "float	fpul,%0"
+  [(set_attr "type" "dfp_conv")])
+
+(define_expand "fix_truncdfsi2"
+  [(match_operand:SI 0 "arith_reg_operand" "=r")
+   (match_operand:DF 1 "arith_reg_operand" "f")]
+  "TARGET_SH4"
+  "
+{
+  emit_df_insn (gen_fix_truncdfsi2_i (operands[1], get_fpscr_rtx ()));
+  emit_insn (gen_rtx (SET, VOIDmode, operands[0], gen_rtx (REG, SImode, 22)));
+  DONE;
+}")
+
+(define_insn "fix_truncdfsi2_i"
+  [(set (reg:SI 22)
+	(fix:SI (match_operand:DF 0 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "ftrc	%0,fpul"
+  [(set_attr "type" "dfp_conv")])
+
+(define_insn "fix_truncdfsi2_i4"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (reg:SI 22))]
+  "TARGET_SH4"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+   (clobber (reg:SI 22))]
+  "TARGET_SH4"
+  [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1)))
+	      (use (match_dup 2))])
+   (set (match_dup 0) (reg:SI 22))])
+
+(define_insn "cmpgtdf_t"
+  [(set (reg:SI 18) (gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
+			   (match_operand:DF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "dfp_cmp")])
+
+(define_insn "cmpeqdf_t"
+  [(set (reg:SI 18) (eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
+			   (match_operand:DF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "dfp_cmp")])
+
+(define_insn "*ieee_ccmpeqdf_t"
+  [(set (reg:SI 18) (ior:SI (reg:SI 18)
+			    (eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
+				   (match_operand:DF 1 "arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && TARGET_SH4"
+  "* return output_ieee_ccmpeq (insn, operands);"
+  [(set_attr "length" "4")])
+
+(define_expand "cmpdf"
+  [(set (reg:SI 18) (compare (match_operand:DF 0 "arith_operand" "")
+			     (match_operand:DF 1 "arith_operand" "")))]
+  "TARGET_SH4"
+  "
+{
+  sh_compare_op0 = operands[0];
+  sh_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "negdf2"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_unop (&gen_negdf2_i, operands); DONE; }")
+
+(define_insn "negdf2_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fneg	%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "sqrtdf2"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_unop (&gen_sqrtdf2_i, operands); DONE; }")
+
+(define_insn "sqrtdf2_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fsqrt	%0"
+  [(set_attr "type" "dfdiv")])
+
+(define_expand "absdf2"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "{ expand_df_unop (&gen_absdf2_i, operands); DONE; }")
+
+(define_insn "absdf2_i"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fabs	%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "extendsfdf2"
+  [(match_operand:DF 0 "arith_reg_operand" "")
+   (match_operand:SF 1 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "
+{
+  emit_sf_insn (gen_movsf_ie (gen_rtx (REG, SFmode, 22), operands[1],
+			      get_fpscr_rtx ()));
+  emit_df_insn (gen_extendsfdf2_i4 (operands[0], get_fpscr_rtx ()));
+  DONE;
+}")
+
+(define_insn "extendsfdf2_i4"
+  [(set (match_operand:DF 0 "arith_reg_operand" "=f")
+	(float_extend:DF (reg:SF 22)))
+   (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fcnvsd  fpul,%0"
+  [(set_attr "type" "fp")])
+
+(define_expand "truncdfsf2"
+  [(match_operand:SF 0 "arith_reg_operand" "")
+   (match_operand:DF 1 "arith_reg_operand" "")]
+  "TARGET_SH4"
+  "
+{
+  emit_df_insn (gen_truncdfsf2_i4 (operands[1], get_fpscr_rtx ()));
+  emit_sf_insn (gen_movsf_ie (operands[0], gen_rtx (REG, SFmode, 22),
+			   get_fpscr_rtx ()));
+  DONE;
+}")
+
+(define_insn "truncdfsf2_i4"
+  [(set (reg:SF 22)
+	(float_truncate:SF (match_operand:DF 0 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 1 "fpscr_operand" "c"))]
+  "TARGET_SH4"
+  "fcnvds  %0,fpul"
+  [(set_attr "type" "fp")])
+
+;; Bit field extract patterns.  These give better code for packed bitfields,
+;; because they allow auto-increment addresses to be generated.
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "")
+			 (match_operand:SI 1 "immediate_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))]
+  "! TARGET_LITTLE_ENDIAN"
+  "
+{
+  rtx addr_target, orig_address, shift_reg;
+  HOST_WIDE_INT size;
+
+  /* ??? expmed doesn't care for non-register predicates.  */
+  if (! memory_operand (operands[0], VOIDmode)
+      || ! immediate_operand (operands[1], VOIDmode)
+      || ! immediate_operand (operands[2], VOIDmode)
+      || ! general_operand (operands[3], VOIDmode))
+    FAIL;
+  /* If this isn't a 16 / 24 / 32 bit field, or if
+     it doesn't start on a byte boundary, then fail.  */
+  size = INTVAL (operands[1]);
+  if (size < 16 || size > 32 || size % 8 != 0
+      || (INTVAL (operands[2]) % 8) != 0)
+    FAIL;
+
+  size /= 8;
+  orig_address = XEXP (operands[0], 0);
+  addr_target = gen_reg_rtx (SImode);
+  shift_reg = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (shift_reg, operands[3]));
+  emit_insn (gen_addsi3 (addr_target, orig_address, GEN_INT (size - 1)));
+
+  operands[0] = change_address (operands[0], QImode, addr_target);
+  emit_insn (gen_movqi (operands[0], gen_rtx (SUBREG, QImode, shift_reg, 0)));
+
+  while (size -= 1)
+    {
+      emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8)));
+      emit_insn (gen_addsi3 (addr_target, addr_target, GEN_INT (-1)));
+      emit_insn (gen_movqi (operands[0],
+			    gen_rtx (SUBREG, QImode, shift_reg, 0)));
+    }
+
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Peepholes
+;; -------------------------------------------------------------------------
+
+;; This matches cases where a stack pointer increment at the start of the
+;; epilogue combines with a stack slot read loading the return value.
+
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(mem:SI (match_operand:SI 1 "arith_reg_operand" "")))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "REGNO (operands[1]) != REGNO (operands[0])"
+  "mov.l	@%1+,%0")
+
+;; See the comment on the dt combiner pattern above.
+
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (reg:SI 18)
+	(eq:SI (match_dup 0)
+	       (const_int 0)))]
+  "TARGET_SH2"
+  "dt	%0")
+
+;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn'
+;; to `mov #k,r0; mov.l @(r0,r15),rn'.  These sequences are generated by
+;; reload when the constant is too large for a reg+offset address.
+
+;; ??? We would get much better code if this was done in reload.  This would
+;; require modifying find_reloads_address to recognize that if the constant
+;; is out-of-range for an immediate add, then we get better code by reloading
+;; the constant into a register than by reloading the sum into a register,
+;; since the former is one instruction shorter if the address does not need
+;; to be offsettable.  Unfortunately this does not work, because there is
+;; only one register, r0, that can be used as an index register.  This register
+;; is also the function return value register.  So, if we try to force reload
+;; to use double-reg addresses, then we end up with some instructions that
+;; need to use r0 twice.  The only way to fix this is to change the calling
+;; convention so that r0 is not used to return values.
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 0))
+	(match_operand:SI 2 "general_movsrc_operand" ""))]
+  "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.l	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SI 2 "general_movdst_operand" "")
+	(mem:SI (match_dup 0)))]
+  "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.l	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:HI (match_dup 0))
+	(match_operand:HI 2 "general_movsrc_operand" ""))]
+  "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.w	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:HI 2 "general_movdst_operand" "")
+	(mem:HI (match_dup 0)))]
+  "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.w	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QI (match_dup 0))
+	(match_operand:QI 2 "general_movsrc_operand" ""))]
+  "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.b	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:QI 2 "general_movdst_operand" "")
+	(mem:QI (match_dup 0)))]
+  "REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.b	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "REGNO (operands[0]) == 0
+   && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "REGNO (operands[0]) == 0
+   && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "REGNO (operands[0]) == 0
+   && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) >= FIRST_FP_REG)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "REGNO (operands[0]) == 0
+   && ((GET_CODE (operands[2]) == REG && REGNO (operands[2]) >= FIRST_FP_REG)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	@(%0,%1),%2")
+
+;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF).  */
+(define_insn "sp_switch_1"
+  [(const_int 1)]
+  ""
+  "*
+{
+  rtx xoperands[1];
+
+  xoperands[0] = sp_switch;
+  output_asm_insn (\"mov.l r0,@-r15\;mov.l %0,r0\", xoperands);
+  output_asm_insn (\"mov.l @r0,r0\;mov.l r15,@-r0\", xoperands);
+  return \"mov r0,r15\";
+}"
+  [(set_attr "length" "10")])
+
+;; Switch back to the original stack for interrupt functions with the
+;; sp_switch attribute.  */
+(define_insn "sp_switch_2"
+  [(const_int 2)]
+  ""
+  "mov.l @r15+,r15\;mov.l @r15+,r0"
+  [(set_attr "length" "4")])
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
new file mode 100755
index 0000000..bfbf45e
--- /dev/null
+++ b/gcc/config/sh/t-sh
@@ -0,0 +1,29 @@
+CROSS_LIBGCC1 = libgcc1-asm.a
+LIB1ASMSRC = sh/lib1funcs.asm
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \
+  _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr
+
+# These are really part of libgcc1, but this will cause them to be
+# built correctly, so...
+
+LIB2FUNCS_EXTRA = fp-bit.c dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+	echo '#endif' 		>> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+	echo '#endif' 		>> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+MULTILIB_OPTIONS= ml m2/m3e/m4-single-only/m4-single/m4
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = m2=m3
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sh/xm-sh.h b/gcc/config/sh/xm-sh.h
new file mode 100755
index 0000000..f51b787
--- /dev/null
+++ b/gcc/config/sh/xm-sh.h
@@ -0,0 +1,42 @@
+/* Configuration for GNU C-compiler for Hitachi SH.
+   Copyright (C) 1993, 1997 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* #defines that need visibility everywhere.  */
+#define FALSE 0
+#define TRUE 1
+
+/* This describes the machine the compiler is hosted on.  */
+#define HOST_BITS_PER_CHAR 8
+#define HOST_BITS_PER_SHORT 16
+#define HOST_BITS_PER_INT 32
+#define HOST_BITS_PER_LONG 32
+
+/* If compiled with GNU C, use the built-in alloca.  */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#endif
+
+/* target machine dependencies.
+   tm.h is a symbolic link to the actual target specific file.  */
+#include "tm.h"
+
+/* Arguments to use with `exit'.  */
+#define SUCCESS_EXIT_CODE 0
+#define FATAL_EXIT_CODE 33