[PATCH 4/5] Simulator for nds32

Wei-cheng Wang cole945@gmail.com
Mon Jul 8 09:30:00 GMT 2013


2013-07-08  Wei-Cheng Wang  <cole945@gmail.com>

	* include/gdb/sim-nds32.h: New file.
	* configure.tgt: Add nds32 support.
	* nds32/Makefile.in: New file.
	* nds32/aclocal.m4: New file.
	* nds32/config.in: New file.
	* nds32/configure.ac: New file.
	* nds32/interp.c: New file.
	* nds32/mingw32-hdep.c: New file.
	* nds32/mingw32-hdep.h: New file.
	* nds32/nds32-cop0.c: New file.
	* nds32/nds32-gmon.c: New file.
	* nds32/nds32-gmon.h: New file.
	* nds32/nds32-load.c: New file.
	* nds32/nds32-mm.c: New file.
	* nds32/nds32-mm.h: New file.
	* nds32/nds32-pfm.c: New file.
	* nds32/nds32-pfm.h: New file.
	* nds32/nds32-sim.h: New file.
	* nds32/nds32-syscall.c: New file.
	* nds32/nds32-syscall.h: New file.
	* nds32/rbtree.c: New file.
	* nds32/rbtree.h: New file.
	* nds32/sim-main.h: New file.
	* nds32/tconfig.in: New file.

--- /dev/null
+++ b/include/gdb/sim-nds32.h
@@ -0,0 +1,46 @@
+/* This file defines the interface between the NDS32 simulator and GDB.
+
+   Copyright 2009-2013 Free Software Foundation, Inc.
+
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef SIM_NDS32_H
+#define SIM_NDS32_H
+
+enum sim_nds32_regs
+{
+  SIM_NDS32_TA_REGNUM = 15,
+  SIM_NDS32_FP_REGNUM = 28,
+  SIM_NDS32_GP_REGNUM = 29,
+  SIM_NDS32_LP_REGNUM = 30,
+  SIM_NDS32_SP_REGNUM = 31,
+
+  SIM_NDS32_PC_REGNUM = 32,
+  SIM_NDS32_D0LO_REGNUM = 33,
+  SIM_NDS32_D0HI_REGNUM = 34,
+  SIM_NDS32_D1LO_REGNUM = 35,
+  SIM_NDS32_D1HI_REGNUM = 36,
+  SIM_NDS32_ITB_REGNUM = 37,
+  SIM_NDS32_IFCLP_REGNUM = 38,
+
+  SIM_NDS32_PSW_REGNUM = 64,
+
+  SIM_NDS32_FD0_REGNUM = 128,
+};
+
+#endif
diff --git a/sim/configure b/sim/configure
index ab98231b..9859383 100755
--- a/sim/configure
+++ b/sim/configure
@@ -653,6 +653,7 @@ mips
 mn10300
 moxie
 msp430
+nds32
 rl78
 rx
 sh64
@@ -3776,6 +3777,14 @@ subdirs="$subdirs arm"


        ;;
+   nds32*)
+
+  sim_arch=nds32
+  subdirs="$subdirs nds32"
+
+
+       sim_testsuite=yes
+       ;;
    rl78-*-*)

   sim_arch=rl78
diff --git a/sim/configure.tgt b/sim/configure.tgt
index 39f92b6..021ab41 100644
--- a/sim/configure.tgt
+++ b/sim/configure.tgt
@@ -89,6 +89,10 @@ case "${target}" in
    msp430*-*-*)
        SIM_ARCH(msp430)
        ;;
+   nds32*)
+       SIM_ARCH(nds32)
+       sim_testsuite=yes
+       ;;
    rl78-*-*)
        SIM_ARCH(rl78)
        ;;
diff --git a/sim/nds32/Makefile.in b/sim/nds32/Makefile.in
new file mode 100644
index 0000000..645348f
--- /dev/null
+++ b/sim/nds32/Makefile.in
@@ -0,0 +1,47 @@
+#    Makefile template for Configure for the AVR sim library.
+#    Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+## COMMON_PRE_CONFIG_FRAG
+
+SIM_EXTRA_CFLAGS = -DHAVE_ENVIRON
+SIM_EXTRA_LIBS = -lm
+
+SIM_RUN_OBJS = nrun.o
+
+SIM_OBJS = \
+	$(SIM_NEW_COMMON_OBJS) \
+	interp.o \
+	nds32-cop0.o \
+	nds32-load.o \
+	nds32-mm.o \
+	nds32-syscall.o \
+	nds32-gmon.o \
+	nds32-pfm.o \
+	sim-cpu.o \
+	sim-engine.o \
+	sim-hrw.o \
+	sim-load.o \
+	sim-resume.o \
+	sim-stop.o \
+	sim-reason.o \
+	sim-reg.o \
+	sim-trace.o \
+	rbtree.o \
+	@NDS32_SIM_EXTRA_OBJS@ \
+	$(SIM_EXTRA_OBJS)
+## COMMON_POST_CONFIG_FRAG
+
+nds32-syscall.o: nds32-syscall.c targ-vals.h mingw32-hdep.h
nds32-sim.h nds32-mm.h nds32-syscall.h
diff --git a/sim/nds32/aclocal.m4 b/sim/nds32/aclocal.m4
new file mode 100644
index 0000000..86fe2f8
--- /dev/null
+++ b/sim/nds32/aclocal.m4
@@ -0,0 +1,88 @@
+# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+# 2005, 2006, 2007, 2008, 2009  Free Software Foundation, Inc.
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+# AM_CONDITIONAL                                            -*- Autoconf -*-
+
+# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 9
+
+# AM_CONDITIONAL(NAME, SHELL-CONDITION)
+# -------------------------------------
+# Define a conditional.
+AC_DEFUN([AM_CONDITIONAL],
+[AC_PREREQ(2.52)dnl
+ ifelse([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
+	[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
+AC_SUBST([$1_TRUE])dnl
+AC_SUBST([$1_FALSE])dnl
+_AM_SUBST_NOTMAKE([$1_TRUE])dnl
+_AM_SUBST_NOTMAKE([$1_FALSE])dnl
+m4_define([_AM_COND_VALUE_$1], [$2])dnl
+if $2; then
+  $1_TRUE=
+  $1_FALSE='#'
+else
+  $1_TRUE='#'
+  $1_FALSE=
+fi
+AC_CONFIG_COMMANDS_PRE(
+[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
+fi])])
+
+# Copyright (C) 2003, 2005  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 2
+
+# Check whether the underlying file-system supports filenames
+# with a leading dot.  For instance MS-DOS doesn't.
+AC_DEFUN([AM_SET_LEADING_DOT],
+[rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+AC_SUBST([am__leading_dot])])
+
+# Copyright (C) 2006, 2008  Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 2
+
+# _AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
+# This macro is traced by Automake.
+AC_DEFUN([_AM_SUBST_NOTMAKE])
+
+# AM_SUBST_NOTMAKE(VARIABLE)
+# ---------------------------
+# Public sister of _AM_SUBST_NOTMAKE.
+AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
diff --git a/sim/nds32/config.in b/sim/nds32/config.in
new file mode 100644
index 0000000..f29d045
--- /dev/null
+++ b/sim/nds32/config.in
@@ -0,0 +1,104 @@
+/* config.in.  Generated from configure.ac by autoheader.  */
+
+/* Define to 1 if translation of program messages to the user's native
+   language is requested. */
+#undef ENABLE_NLS
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <errno.h> header file. */
+#undef HAVE_ERRNO_H
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define to 1 if you have the <fpu_control.h> header file. */
+#undef HAVE_FPU_CONTROL_H
+
+/* Define to 1 if you have the `getrusage' function. */
+#undef HAVE_GETRUSAGE
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the `nsl' library (-lnsl). */
+#undef HAVE_LIBNSL
+
+/* Define to 1 if you have the `socket' library (-lsocket). */
+#undef HAVE_LIBSOCKET
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `sigaction' function. */
+#undef HAVE_SIGACTION
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#undef HAVE_SYS_RESOURCE_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the `time' function. */
+#undef HAVE_TIME
+
+/* Define to 1 if you have the <time.h> header file. */
+#undef HAVE_TIME_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the <zlib.h> header file. */
+#undef HAVE_ZLIB_H
+
+/* Define to 1 if you have the `__setfpucw' function. */
+#undef HAVE___SETFPUCW
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Additional package description */
+#undef PKGVERSION
+
+/* Bug reporting address */
+#undef REPORT_BUGS_TO
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#undef RETSIGTYPE
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
diff --git a/sim/nds32/configure.ac b/sim/nds32/configure.ac
new file mode 100644
index 0000000..80fca75
--- /dev/null
+++ b/sim/nds32/configure.ac
@@ -0,0 +1,23 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_PREREQ(2.64)dnl
+AC_INIT(Makefile.in)
+sinclude(../common/acinclude.m4)
+
+SIM_AC_COMMON
+
+SIM_AC_OPTION_WARNINGS
+SIM_AC_OPTION_ALIGNMENT(NONSTRICT_ALIGNMENT)
+SIM_AC_OPTION_ENVIRONMENT
+
+NDS32_SIM_EXTRA_OBJS=
+
+case ${host} in
+  *mingw32*)
+    NDS32_SIM_EXTRA_OBJS="${NDS32_SIM_EXTRA_OBJS} mingw32-hdep.o"
+    ;;
+  *) ;;
+esac
+
+AC_SUBST([NDS32_SIM_EXTRA_OBJS], ${NDS32_SIM_EXTRA_OBJS})
+
+SIM_AC_OUTPUT
diff --git a/sim/nds32/interp.c b/sim/nds32/interp.c
new file mode 100644
index 0000000..a358cf6
--- /dev/null
+++ b/sim/nds32/interp.c
@@ -0,0 +1,2507 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#include "bfd.h"
+#include "elf-bfd.h"
+#include "gdb/callback.h"
+#include "gdb/signals.h"
+#include "libiberty.h"
+#include "gdb/remote-sim.h"
+#include "gdb/sim-nds32.h"
+#include "dis-asm.h"
+#include "sim-main.h"
+#include "nds32-sim.h"
+#include "sim-utils.h"
+#include "sim-fpu.h"
+#include "sim-trace.h"
+#include "sim-options.h"
+
+#include "opcode/nds32.h"
+#include "nds32-sim.h"
+#include "nds32-mm.h"
+#include "nds32-syscall.h"
+#include "nds32-gmon.h"
+#include "nds32-pfm.h"
+
+#if defined (__linux__) || defined (__CYGWIN__)
+/* FIXME */
+#include <sys/types.h>
+#elif defined (__WIN32__)
+#include "mingw32-hdep.h"
+#endif
+#include <unistd.h>
+#include <time.h>
+
+struct disassemble_info dis_info; /* For print insn.  */
+
+static void nds32_set_nia (sim_cpu *cpu, sim_cia nia);
+
+enum {
+  OPTION_GPROF = OPTION_START,
+};
+
+/* Recent $pc, for debugging.  */
+#define RECENT_CIA_MASK	0xf
+static sim_cia recent_cia[RECENT_CIA_MASK + 1];
+static int recent_cia_idx = 0;
+
+static ulongest_t
+extract_unsigned_integer (unsigned char *addr, int len, int byte_order)
+{
+  ulongest_t retval;
+  const unsigned char *p;
+  const unsigned char *startaddr = addr;
+  const unsigned char *endaddr = startaddr + len;
+
+  retval = 0;
+  if (byte_order == BIG_ENDIAN)
+    {
+      for (p = startaddr; p < endaddr; ++p)
+	retval = (retval << 8) | *p;
+    }
+  else
+    {
+      for (p = endaddr - 1; p >= startaddr; --p)
+	retval = (retval << 8) | *p;
+    }
+  return retval;
+}
+
+static void
+store_unsigned_integer (unsigned char *addr, int len,
+			int byte_order, ulongest_t val)
+{
+  unsigned char *p;
+  unsigned char *startaddr = addr;
+  unsigned char *endaddr = startaddr + len;
+
+  /* Start at the least significant end of the integer,
+     and work towards the most significant.  */
+  if (byte_order == BIG_ENDIAN)
+    {
+      for (p = endaddr - 1; p >= startaddr; --p)
+	{
+	  *p = val & 0xff;
+	  val >>= 8;
+	}
+    }
+  else
+    {
+      for (p = startaddr; p < endaddr; ++p)
+	{
+	  *p = val & 0xff;
+	  val >>= 8;
+	}
+    }
+}
+
+/* Find first zero byte or mis-match in sequential memory address.
+   If no such byte is found, return 0.  */
+
+static uint32_t
+find_null_mism (unsigned char *b1, unsigned char *b2)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    {
+      if ((b1[i] == '\0') || (b1[i] != b2[i]))
+	return -4 + i;
+    }
+  return 0;
+}
+
+/* Find first mis-match in sequential memory address.
+   The 3rd argument inc: 1 means incremental memory address.
+			-1 means decremental memory address.
+   If no such byte is found, return 0.  */
+
+static uint32_t
+find_mism (unsigned char *b1, unsigned char *b2, int inc)
+{
+  int i, end;
+  i = (inc == 1) ? 0 : 3;
+  end = (inc == 1) ? 3 : 0;
+  while (1)
+    {
+      if ((b1[i] != b2[i]))
+	return -4 + i;
+      if (i == end)
+	return 0;
+      i += inc;
+    }
+}
+
+static void
+nds32_dump_registers (SIM_DESC sd)
+{
+  static char *reg2names[] = {
+	"r0", "r1", "r2", "r3", "r4", "r5",
+	"r6", "r7", "r8", "r9", "10", "11",
+	"12", "13", "14", "ta", "16", "17",
+	"18", "19", "20", "21", "22", "23",
+	"24", "25", "p0", "p1", "fp", "gp",
+	"lp", "sp"
+	};
+  int i;
+  int j;
+
+  for (i = 0; i < MAX_NR_PROCESSORS; ++i)
+    {
+      sim_cpu *cpu = STATE_CPU (sd, i);
+      /* TODO ... */
+      sim_io_eprintf (sd, "pc  %08x\n", CCPU_USR[USR0_PC].u);
+
+      for (j = 0; j < 32; j++)
+	{
+	  sim_io_eprintf (sd, "%s  %08x  ", reg2names[j], CCPU_GPR[j].u);
+	  if (j % 6 == 5)
+	    sim_io_eprintf (sd, "\n");
+	}
+      sim_io_eprintf (sd, "\n");
+
+      sim_io_eprintf (sd, "itb %08x  ", CCPU_USR[USR0_ITB].u);
+      sim_io_eprintf (sd, "ifc %08x  ", CCPU_USR[USR0_IFCLP].u);
+      sim_io_eprintf (sd, "d0  %08x  ", CCPU_USR[USR0_D0LO].u);
+      sim_io_eprintf (sd, "hi  %08x  ", CCPU_USR[USR0_D0HI].u);
+      sim_io_eprintf (sd, "d1  %08x  ", CCPU_USR[USR0_D1LO].u);
+      sim_io_eprintf (sd, "hi  %08x  ", CCPU_USR[USR0_D1HI].u);
+      sim_io_eprintf (sd, "\n");
+
+      sim_io_eprintf (sd, "psw %08x  ", CCPU_SR[SRIDX_PSW].u);
+      sim_io_eprintf (sd, "\n");
+    }
+
+  sim_io_eprintf (sd, "Recent $pc:\n");
+  for (i = 0; i <= RECENT_CIA_MASK; i++)
+    {
+      sim_io_eprintf (sd, "  0x%x",
+		      recent_cia[(i + recent_cia_idx) & RECENT_CIA_MASK]);
+      if (i % 6 == 5)
+	sim_io_eprintf (sd, "\n");
+    }
+
+  sim_io_eprintf (sd, "\n");
+}
+
+uint32_t
+nds32_raise_exception (sim_cpu *cpu, enum nds32_exceptions e, int sig,
+		       char *msg, ...)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  uint32_t cia = CCPU_USR[USR0_PC].u;
+  int i;
+
+  /* TODO: Show message only if it is not handled by user.  */
+  if (msg)
+    {
+      va_list va;
+      va_start (va, msg);
+      sim_io_evprintf (sd, msg, va);
+      va_end (va);
+    }
+
+  /* Dump registers before halt.  */
+  if (STATE_OPEN_KIND (sd) != SIM_OPEN_DEBUG)
+    {
+      fprintf (stderr, "  ");
+      print_insn_nds32 (cia, &dis_info);
+      fprintf (stderr, "\n");
+      nds32_dump_registers (sd);
+    }
+
+  sim_engine_halt (CPU_STATE (cpu), cpu, NULL, cia, sim_stopped, sig);
+
+  return cia;
+}
+
+void
+nds32_bad_op (sim_cpu *cpu, uint32_t cia, uint32_t insn, char *tag)
+{
+  if (tag == NULL)
+    tag = "";
+
+  nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGILL,
+			 "Illegal/Unhandled %s instruction (%08x)\n", tag, insn);
+}
+
+ulongest_t
+__nds32_ld (sim_cpu *cpu, SIM_ADDR addr, int size, int aligned_p)
+{
+  int r;
+  ulongest_t val = 0;
+  int order;
+  SIM_DESC sd = CPU_STATE (cpu);
+
+  SIM_ASSERT (size <= sizeof (ulongest_t));
+
+  if (aligned_p && (addr & (size - 1)) != 0)
+    nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+			   "Alignment check exception. "
+			   "Read of address 0x%08x in size of %d.\n",
+			   addr, size);
+
+  r = sim_read (sd, addr, (unsigned char *) &val, size);
+  order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+  val = extract_unsigned_integer ((unsigned char *) &val, size, order);
+
+  if (r == size)
+    return val;
+
+  nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+			 "Access violation. Read of address 0x%08x.\n", addr);
+
+  return val;
+}
+
+void
+__nds32_st (sim_cpu *cpu, SIM_ADDR addr, int size, ulongest_t val,
+	    int aligned_p)
+{
+  int r;
+  int order;
+  SIM_DESC sd = CPU_STATE (cpu);
+
+  SIM_ASSERT (size <= sizeof (ulongest_t));
+
+  if (aligned_p && (addr & (size - 1)) != 0)
+    nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+			   "Alignment check exception. "
+			   "Write of address 0x%08x in size of %d.\n",
+			   addr, size);
+
+  order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+  store_unsigned_integer ((unsigned char *) &val, size, order, val);
+  r = sim_write (sd, addr, (unsigned char *) &val, size);
+
+  if (r == size)
+    return;
+
+  nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+			 "Access violation. Write of address 0x%08x\n", addr);
+
+  return;
+}
+
+static void
+nds32_free_state (SIM_DESC sd)
+{
+  if (STATE_MODULES (sd) != NULL)
+    sim_module_uninstall (sd);
+  sim_cpu_free_all (sd);
+  sim_state_free (sd);
+}
+
+void
+sim_size (int s)
+{
+}
+
+/* Set next-instructoin-address, so sim_engine_run () fetches `nia'
+   instead of ($pc + 4) or ($pc + 2) for next instruction base on
+   currenly instruction size. */
+
+static void
+nds32_set_nia (sim_cpu *cpu, sim_cia nia)
+{
+  cpu->iflags |= NIF_BRANCH;
+  cpu->baddr = nia;
+}
+
+static void
+nds32_decode32_mem (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  const int rt = N32_RT5 (insn);
+  const int ra = N32_RA5 (insn);
+  const int rb = N32_RB5 (insn);
+  const int sv = __GF (insn, 8, 2);
+  const int op = insn & 0xFF;
+  uint32_t addr;
+  uint32_t shift;
+
+  switch (op)
+    {
+    case 0x0:			/* lb */
+    case 0x1:			/* lh */
+    case 0x2:			/* lw */
+    case 0x3:			/* ld */
+      addr = CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv);
+      CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, (1 << (op)));
+      break;
+    case 0x4:			/* lb.bi */
+    case 0x5:			/* lh.bi */
+    case 0x6:			/* lw.bi */
+    /* case 0x7: */		/* ld.bi */
+      /* UNPREDICTABLE if rt is equal to ra.
+	 Compute address before load, because rb could be rt.  */
+      addr = CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv);
+      CCPU_GPR[rt].u = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, (1 <<
(op & 0x3)));
+      CCPU_GPR[ra].u = addr;
+      break;
+    case 0x8:			/* sb */
+    case 0x9:			/* sh */
+    case 0xa:			/* sw */
+    /* case 0xb: */		/* sd */
+      addr = CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv);
+      nds32_st_aligned (cpu, addr, (1 << (op & 0x3)), CCPU_GPR[rt].u);
+      break;
+    case 0xc:			/* sb.bi */
+    case 0xd:			/* sh.bi */
+    case 0xe:			/* sw.bi */
+    /* case 0xf: */		/* sd.bi */
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u, (1 << (op & 0x3)),
+			CCPU_GPR[rt].u);
+      CCPU_GPR[ra].u += (CCPU_GPR[rb].u << sv);
+      break;
+    case 0x10:			/* lbs */
+    case 0x11:			/* lhs */
+    /* case 0x12: */		/* lws */
+      addr = CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv);
+      CCPU_GPR[rt].u =
+	nds32_ld_aligned (cpu, addr, (1 << (op & 0x3)));
+      CCPU_GPR[rt].u = __SEXT (CCPU_GPR[rt].u, (1 << (op & 0x3)) * 8);
+      break;
+    case 0x13:			/* dpref */
+      /* do nothing */
+      break;
+    case 0x14:			/* lbs.bi */
+    case 0x15:			/* lhs.bi */
+    /* case 0x16: */		/* lws.bi */
+      /* UNPREDICTABLE if rt is equal to ra.
+	Compute address before load, because rb could be rt.  */
+      addr = CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv);
+      CCPU_GPR[rt].u = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, (1 <<
(op & 0x3)));
+      CCPU_GPR[rt].u = __SEXT (CCPU_GPR[rt].u, (1 << (op & 0x3)) * 8);
+      CCPU_GPR[ra].u = addr;
+      break;
+    case 0x18:			/* llw */
+      CCPU_GPR[rt].u =
+	nds32_ld_aligned (cpu, CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv), 4);
+      break;
+    case 0x19:			/* scw */
+      /* SCW always successes.  */
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sv), 4,
+			CCPU_GPR[rt].u);
+      CCPU_GPR[rt].u = 1;
+      break;
+    case 0x20:			/* lbup */
+    case 0x22:			/* lwup */
+    case 0x28:			/* sbup */
+    case 0x2a:			/* swup */
+    default:
+      nds32_bad_op (cpu, cia, insn, "MEM");
+      return;
+    }
+}
+
+static void
+nds32_decode32_lsmw (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  int rb, re, ra, enable4, i;
+  int wac;			/* With Alignment Check ?  */
+  int reg_cnt = 0;		/* Total number of registers count.  */
+  int di;			/* dec=-1 or inc=1  */
+  int order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+  int size = 4;			/* The load/store bytes.  */
+  int len = 4;			/* The length of a fixed-size string.  */
+  int ret;
+  char enb4map[2][4] =
+    { {3, 2, 1, 0}, /* With Aligment Check.  */ {0, 1, 2, 3} };
+  uint32_t val = 0;
+  SIM_ADDR base = -1;
+  char buf[4];
+
+  /* Filter out undefined opcode.  */
+  if ((insn & 0x3) == 0x3)
+    {
+      nds32_bad_op (cpu, cia, insn, "LSMW");
+      return;
+    }
+  /* Filter out invalid opcode.  */
+  if ((insn & 0xB) == 0xA)
+    {
+      nds32_bad_op (cpu, cia, insn, "LSMW");
+      return;
+    }
+
+  /* Decode instruction.  */
+  rb = N32_RT5 (insn);
+  ra = N32_RA5 (insn);
+  re = N32_RB5 (insn);
+  enable4 = (insn >> 6) & 0x0F;
+  wac = (insn & 1) ? 1 : 0;
+  di = (insn & __BIT (3)) ? -1 : 1;
+
+  base = CCPU_GPR[ra].u;	/* Get the first memory address  */
+
+  /* Do the alignment check. */
+  if (wac && base & 0x3)
+    {
+      nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+			     (insn & 0x20)
+			     ? "Alignment check exception (SMWA). "
+			       "Write of address 0x%08x.\n"
+			     : "Alignment check exception (LMWA). "
+			       "Read of address 0x%08x.\n",
+			     base);
+      return;
+    }
+
+  /* Sum up the registers count.  */
+  reg_cnt += (enable4 & 0x1) ? 1 : 0;
+  reg_cnt += (enable4 & 0x2) ? 1 : 0;
+  reg_cnt += (enable4 & 0x4) ? 1 : 0;
+  reg_cnt += (enable4 & 0x8) ? 1 : 0;
+  if (rb < GPR_FP && re < GPR_FP)
+    {
+      reg_cnt += (re - rb) + 1;
+    }
+
+  /* Generate the first memory address.  */
+  if (insn & __BIT (4))
+    base += 4 * di;
+  /* Adjust the first memory address
+     due to operating from low address memory.  */
+  if (insn & __BIT (3))
+    base -= (reg_cnt - 1) * 4;
+
+
+  /* Operating from low address memory to high address memory.  */
+  for (i = rb; i <= re && rb < GPR_FP; i++)
+    {
+      if (insn & 0x20)
+	{
+	  /* store */
+
+	  val = CCPU_GPR[i].u;
+	  store_unsigned_integer ((unsigned char *) buf, 4, order, val);
+	  if ((insn & 0x3) == 0x2)
+	    {
+	      /* Until zero byte case.  */
+	      len = strnlen (buf, 4);
+	      size = (len == 4) ? 4 : len + 1;	/* Include zero byte.  */
+	    }
+	  ret = sim_write (sd, base, (unsigned char *) buf, size);
+	  if (ret != size)
+	    {
+	      nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+				     "Access violation. Write of address %#x\n",
+				     base);
+	    }
+	  if (len < 4)
+	    goto zero_byte_exist;
+	}
+      else
+	{
+	  /* load */
+
+	  ret = sim_read (sd, base, (unsigned char *) buf, 4);
+	  if (ret != 4)
+	    {
+	      nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+				     "Access violation. Write of address %#x\n",
+				     base);
+	    }
+	  val = extract_unsigned_integer ((unsigned char *) buf, 4, order);
+	  CCPU_GPR[i].u = val;
+	  if ((insn & 0x3) == 0x2)
+	    {
+	      /* Until zero byte case.  */
+	      len = strnlen (buf, 4);
+	      if (len < 4)
+		goto zero_byte_exist;
+	    }
+	}
+      base += 4;
+    }
+
+  /* Operating the 4 individual registers
+     from low address memory to high address memory. */
+  for (i = 0; i < 4; i++)
+    {
+      if (enable4 & (__BIT (enb4map[wac][i])))
+	{
+	  if (insn & 0x20)
+	    {
+	      /* store */
+
+	      val = CCPU_GPR[GPR_SP - (enb4map[wac][i])].u;
+	      store_unsigned_integer ((unsigned char *) buf, 4, order, val);
+	      if ((insn & 0x3) == 0x2)	/* Until zero byte case.  */
+		{
+		  len = strnlen (buf, 4);
+		  size = (len == 4) ? 4 : len + 1;	/* Include zero byte.  */
+		}
+	      ret = sim_write (sd, base, (unsigned char *) buf, size);
+	      if (ret != size)
+		{
+		  nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+					 "Access violation. Write of address %#x\n",
+					 base);
+		}
+	      if (len < 4)
+		goto zero_byte_exist;
+	    }
+	  else
+	    {
+	      /* load */
+
+	      ret = sim_read (sd, base, (unsigned char *) buf, 4);
+	      if (ret != 4)
+		{
+		  nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+					 "Access violation. Write of address %#x\n",
+					 base);
+		}
+	      val =
+		extract_unsigned_integer ((unsigned char *) buf, 4, order);
+	      CCPU_GPR[GPR_SP - (enb4map[wac][i])].u = val;
+	      if ((insn & 0x3) == 0x2)	/* until zero byte ? */
+		{
+		  len = strnlen (buf, 4);
+		  if (len < 4)
+		    goto zero_byte_exist;
+		}
+	    }
+	  base += 4;
+	}
+    }
+
+zero_byte_exist:
+  /* Update the base address register.  */
+  if (insn & __BIT (2))
+    CCPU_GPR[ra].u += reg_cnt * 4 * di;
+
+  return;
+}
+
+static void
+nds32_decode32_alu1 (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  int rt = N32_RT5 (insn);
+  int ra = N32_RA5 (insn);
+  int rb = N32_RB5 (insn);
+  const int rd = N32_RD5 (insn);
+  const int imm5u = rb;
+  const int sh5 = N32_SH5 (insn);
+
+  switch (insn & 0x1f)
+    {
+    case 0x0:			/* add, add_slli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u + (CCPU_GPR[rb].u << sh5);
+      break;
+    case 0x1:			/* sub, sub_slli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u - (CCPU_GPR[rb].u << sh5);
+      break;
+    case 0x2:			/* and, add_slli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & (CCPU_GPR[rb].u << sh5);
+      break;
+    case 0x3:			/* xor, xor_slli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u ^ (CCPU_GPR[rb].u << sh5);
+      break;
+    case 0x4:			/* or, or_slli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u | (CCPU_GPR[rb].u << sh5);
+      break;
+    case 0x5:			/* nor */
+      CCPU_GPR[rt].u = ~(CCPU_GPR[ra].u | CCPU_GPR[rb].u);
+      break;
+    case 0x6:			/* slt */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u < CCPU_GPR[rb].u ? 1 : 0;
+      break;
+    case 0x7:			/* slts */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].s < CCPU_GPR[rb].s ? 1 : 0;
+      break;
+
+    case 0x8:			/* slli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u << imm5u;
+      break;
+    case 0x9:			/* srli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u >> imm5u;
+      break;
+    case 0xa:			/* srai */
+      CCPU_GPR[rt].s = CCPU_GPR[ra].s >> imm5u;
+      break;
+    case 0xc:			/* sll */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u << (CCPU_GPR[rb].u & 0x1f);
+      break;
+    case 0xd:			/* srl */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u >> CCPU_GPR[rb].u;
+      break;
+    case 0xe:			/* sra */
+      CCPU_GPR[rt].s = CCPU_GPR[ra].s >> CCPU_GPR[rb].u;
+      break;
+    case 0xb:			/* rotri */
+    case 0xf:			/* rotr */
+      {
+	uint32_t shift = ((insn & 0x1f) == 0xb) ? imm5u : CCPU_GPR[rb].u;
+	uint32_t m = CCPU_GPR[ra].u & (__BIT (shift) - 1);
+	CCPU_GPR[rt].u = CCPU_GPR[ra].u >> shift;
+	CCPU_GPR[rt].u |= m << (32 - shift);
+      }
+      break;
+
+    case 0x10:			/* seb */
+      CCPU_GPR[rt].s = __SEXT (CCPU_GPR[ra].s, 8);
+      break;
+    case 0x11:			/* seh */
+      CCPU_GPR[rt].s = __SEXT (CCPU_GPR[ra].s, 16);
+      break;
+    case 0x12:			/* bitc */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & ~(CCPU_GPR[rb].u);
+      break;
+    case 0x13:			/* zeh */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & 0xffff;
+      break;
+    case 0x14:			/* wsbh */
+      CCPU_GPR[rt].u = ((CCPU_GPR[ra].u & 0xFF00FF00) >> 8)
+		       | ((CCPU_GPR[ra].u & 0x00FF00FF) << 8);
+      break;
+    case 0x15:			/* or_srli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u | (CCPU_GPR[rb].u >> sh5);
+      break;
+    case 0x16:			/* divsr */
+      {
+	/* FIXME: Positive qoutient exception.  */
+	int64_t q;
+	int64_t r;
+
+	q = CCPU_GPR[ra].s / CCPU_GPR[rb].s;
+	r = CCPU_GPR[ra].s % CCPU_GPR[rb].s;
+	CCPU_GPR[rt].s = q;
+	if (rt != rd)
+	  CCPU_GPR[rd].s = r;
+      }
+      break;
+    case 0x17:			/* divr */
+      {
+	uint64_t q;
+	uint64_t r;
+
+	q = CCPU_GPR[ra].u / CCPU_GPR[rb].u;
+	r = CCPU_GPR[ra].u % CCPU_GPR[rb].u;
+	CCPU_GPR[rt].u = q;
+	if (rt != rd)
+	  CCPU_GPR[rd].u = r;
+      }
+      break;
+    case 0x18:			/* sva */
+      {
+	uint64_t s = (uint64_t) CCPU_GPR[ra].u + (uint64_t) CCPU_GPR[rb].u;
+	s = (s >> 31) & 0x3;
+	CCPU_GPR[rt].u = (s == 0 || s == 3);
+      }
+      break;
+    case 0x19:			/* svs */
+      nds32_bad_op (cpu, cia, insn, "ALU1/svs");
+      break;
+    case 0x1a:			/* cmovz */
+      if (CCPU_GPR[rb].u == 0)
+	CCPU_GPR[rt].u = CCPU_GPR[ra].u;
+      break;
+    case 0x1b:			/* cmovn */
+      if (CCPU_GPR[rb].u != 0)
+	CCPU_GPR[rt].u = CCPU_GPR[ra].u;
+      break;
+    case 0x1c:			/* add_srli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u + (CCPU_GPR[rb].u >> sh5);
+      break;
+    case 0x1d:			/* sub_srli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u - (CCPU_GPR[rb].u >> sh5);
+      break;
+    case 0x1e:			/* and_srli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & (CCPU_GPR[rb].u >> sh5);
+      break;
+    case 0x1f:			/* xor_srli */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u ^ (CCPU_GPR[rb].u >> sh5);
+      break;
+    default:
+      nds32_bad_op (cpu, cia, insn, "ALU1");
+      return;
+    }
+
+  return;
+}
+
+static void
+nds32_decode32_alu2 (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  int rt = N32_RT5 (insn);
+  int ra = N32_RA5 (insn);
+  int rb = N32_RB5 (insn);
+  const int imm5u = rb;
+  const int dt = (insn & __BIT (21)) ? USR0_D1LO : USR0_D0LO;
+
+  if ((insn & 0x7f) == 0x4e)	/* ffbi */
+    {
+      unsigned char buff[4];
+      int order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+      int imm8 = ((insn >> 7) & 0xff);
+      unsigned char *ret;
+
+      store_unsigned_integer (buff, 4, order, CCPU_GPR[ra].u);
+      ret = memchr (buff, imm8, 4);
+      if (NULL == ret)
+	CCPU_GPR[rt].u = 0;
+      else
+	CCPU_GPR[rt].u = ret - buff - 4;
+      return;
+    }
+
+  switch (insn & 0x3ff)
+    {
+    case 0x0:			/* max */
+      CCPU_GPR[rt].s = (CCPU_GPR[ra].s > CCPU_GPR[rb].s)
+		       ? CCPU_GPR[ra].s : CCPU_GPR[rb].s;
+      break;
+    case 0x1:			/* min */
+      CCPU_GPR[rt].s = (CCPU_GPR[ra].s < CCPU_GPR[rb].s)
+		       ? CCPU_GPR[ra].s : CCPU_GPR[rb].s;
+      break;
+    case 0x2:			/* ave */
+      {
+	int64_t r = ((int64_t) CCPU_GPR[ra].s << 1)
+		    + ((int64_t) CCPU_GPR[rb].s << 1) + 1;
+	CCPU_GPR[rt].u = (r >> 1) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x3:			/* abs */
+      if (CCPU_GPR[ra].s >= 0)
+	CCPU_GPR[rt].s = CCPU_GPR[ra].s;
+      else if (CCPU_GPR[ra].u == 0x80000000)
+	CCPU_GPR[rt].u = 0x7fffffff;
+      else
+	CCPU_GPR[rt].s = -CCPU_GPR[ra].s;
+      break;
+    case 0x4:			/* clips */
+      if (CCPU_GPR[ra].s > ((1 << imm5u) - 1))
+	CCPU_GPR[rt].s = ((1 << imm5u) - 1);
+      else if (CCPU_GPR[ra].s < -(1 << imm5u))
+	CCPU_GPR[rt].s = -(1 << imm5u);
+      else
+	CCPU_GPR[rt].s = CCPU_GPR[ra].s;
+      break;
+    case 0x5:			/* clip */
+      if (CCPU_GPR[ra].s > ((1 << imm5u) - 1))
+	CCPU_GPR[rt].s = ((1 << imm5u) - 1);
+      else if (CCPU_GPR[ra].s < 0)
+	CCPU_GPR[rt].s = 0;
+      else
+	CCPU_GPR[rt].s = CCPU_GPR[ra].s;
+      break;
+    case 0x6:			/* clo */
+      {
+	int i, cnt = 0;
+
+	for (i = 31; i >= 0; i--)
+	  {
+	    if (CCPU_GPR[ra].u & __BIT (i))
+	      cnt++;
+	    else
+	      break;
+	  }
+	CCPU_GPR[rt].u = cnt;
+      }
+      break;
+    case 0x7:			/* clz */
+      {
+	int i, cnt = 0;
+
+	for (i = 31; i >= 0; i--)
+	  {
+	    if ((CCPU_GPR[ra].u & __BIT (i)) == 0)
+	      cnt++;
+	    else
+	      break;
+	  }
+	CCPU_GPR[rt].u = cnt;
+      }
+      break;
+    case 0x8:			/* bset */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u | (1 << imm5u);
+      break;
+    case 0x9:			/* bclr */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & ~(1 << imm5u);
+      break;
+    case 0xa:			/* btgl */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u ^ (1 << imm5u);
+      break;
+    case 0xb:			/* btst */
+      CCPU_GPR[rt].u = (CCPU_GPR[ra].u & (1 << imm5u)) != 0;
+      break;
+    case 0xc:			/* bse */
+      {
+	int n = __GF (CCPU_GPR[rb].u, 0, 5);
+	int m = __GF (CCPU_GPR[rb].u, 8, 5);
+	int underflow = CCPU_GPR[rb].u & __BIT (30);
+	int refill = CCPU_GPR[rb].u & __BIT (31);
+	int len = m + 1;
+	int dist = 32 - len - n;	/* From LSB.  */
+	int val;
+	int d = n + m;
+	uint32_t ora = CCPU_GPR[ra].u;
+
+	/* Clear non-occupied.  */
+	if (!underflow)
+	  CCPU_GPR[rt].u = __GF (CCPU_GPR[rt].u, 0, len);
+
+	/* Normal condition.  */
+	if (31 > d)
+	  {
+	    __put_field (&CCPU_GPR[rb].u, 0, 5, d + 1);
+	    val = __GF (ora, dist, len);
+
+	    __put_field (&CCPU_GPR[rt].u, 0, len, val);
+
+	    if (underflow)
+	      {
+		/* Restore old length.  */
+		__put_field (&CCPU_GPR[rb].u, 8, 5, __GF (CCPU_GPR[rb].u, 16, 5));
+		/* Why?  */
+		__put_field (&CCPU_GPR[rb].u, 13, 3, 0);
+	      }
+
+	    CCPU_GPR[rb].u &= ~__BIT (30);
+	    CCPU_GPR[rb].u &= ~__BIT (31);
+	  }
+	/* Empty condition.  */
+	else if (31 == d)
+	  {
+	    CCPU_GPR[rb].u &= ~0x1f;
+	    val = __GF (ora, dist, len);
+
+	    __put_field (&CCPU_GPR[rt].u, 0, len, val);
+
+	    CCPU_GPR[rb].u &= ~__BIT (30);
+	    CCPU_GPR[rb].u |= __BIT (31);
+	  }
+	/* Undeflow condition.  */
+	else /* 31 < d */
+	  {
+	    __put_field (&CCPU_GPR[rb].u, 16, 5, m);
+	    __put_field (&CCPU_GPR[rb].u, 8, 5, d - 32);
+	    CCPU_GPR[rb].u &= ~0x1f;
+	    CCPU_GPR[rb].u |= __BIT (30);
+	    CCPU_GPR[rb].u |= __BIT (31);
+	    val = __GF (ora, 0, 32 - n);
+	    __put_field (&CCPU_GPR[rt].u, 0, len, val << (d - 31));
+	  }
+      }
+      break;
+    case 0xd:			/* bsp */
+      {
+	int n = __GF (CCPU_GPR[rb].u, 0, 5);
+	int m = __GF (CCPU_GPR[rb].u, 8, 5);
+	int underflow = CCPU_GPR[rb].u & __BIT (30);
+	int refill = CCPU_GPR[rb].u & __BIT (31);
+	int len = m + 1;
+	int dist = 32 - len - n;	/* From LSB.  */
+	int val;
+	int d = n + m;
+	uint32_t ora = CCPU_GPR[ra].u;
+
+
+	/* Normal condition.  */
+	if (31 > d)
+	  {
+	    __put_field (&CCPU_GPR[rb].u, 0, 5, d + 1);
+	    val = __GF (ora, 0, len);
+
+	    __put_field (&CCPU_GPR[rt].u, dist, len, val);
+
+	    if (underflow)
+	      {
+		/* Restore old length.  */
+		__put_field (&CCPU_GPR[rb].u, 8, 5, __GF (CCPU_GPR[rb].u, 16, 5));
+		/* Why?  */
+		__put_field (&CCPU_GPR[rb].u, 13, 3, 0);
+	      }
+
+	    CCPU_GPR[rb].u &= ~__BIT (30);
+	    CCPU_GPR[rb].u &= ~__BIT (31);
+	  }
+	/* Empty condition.  */
+	else if (31 == d)
+	  {
+	    CCPU_GPR[rb].u &= ~0x1f;
+	    val = __GF (ora, 0, len);
+
+	    __put_field (&CCPU_GPR[rt].u, dist, len, val);
+
+	    CCPU_GPR[rb].u &= ~__BIT (30);
+	    CCPU_GPR[rb].u |= __BIT (31);
+	  }
+	/* Undeflow condition.  */
+	else /* 31 < d */
+	  {
+	    __put_field (&CCPU_GPR[rb].u, 16, 5, m);
+	    __put_field (&CCPU_GPR[rb].u, 8, 5, d - 32);
+	    CCPU_GPR[rb].u &= ~0x1f;
+	    CCPU_GPR[rb].u |= __BIT (30);
+	    CCPU_GPR[rb].u |= __BIT (31);
+	    val = __GF (ora, 0, len) >> (d - 31);
+	    __put_field (&CCPU_GPR[rt].u, 0, 32 - n, val);
+	  }
+      }
+      break;
+    case 0xe:			/* ffb */
+      {
+	char buff[4];
+	int order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+	void *ret;
+
+	store_unsigned_integer ((unsigned char *) &buff, 4, order, CCPU_GPR[ra].u);
+	ret = memchr (buff, CCPU_GPR[rb].u, 4);
+	if (NULL == ret)
+	  CCPU_GPR[rt].u = 0;
+	else
+	  CCPU_GPR[rt].u = (char *) ret - (char *) buff - 4;
+      }
+      break;
+    case 0xf:			/* ffmism */
+      {
+	char a[4];
+	char b[4];
+	int order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+	int ret;
+
+	store_unsigned_integer ((unsigned char *) &a, 4, order, CCPU_GPR[ra].u);
+	store_unsigned_integer ((unsigned char *) &b, 4, order, CCPU_GPR[rb].u);
+	ret = find_mism ((unsigned char *) &a, (unsigned char *) &b, 1);
+	CCPU_GPR[rt].u = ret;
+      }
+      break;
+    case 0x17:			/* ffzmism */
+      {
+	char a[4];
+	char b[4];
+	int order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+	int ret;
+
+	store_unsigned_integer ((unsigned char *) &a, 4, order, CCPU_GPR[ra].u);
+	store_unsigned_integer ((unsigned char *) &b, 4, order, CCPU_GPR[rb].u);
+	ret = find_null_mism ((unsigned char *) &a, (unsigned char *) &b);
+	CCPU_GPR[rt].u = ret;
+      }
+      break;
+    case 0x24:			/* mul */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u * CCPU_GPR[rb].u;
+      break;
+    case 0x20:			/* mfusr */
+      CCPU_GPR[rt].u = CCPU_USR[rb << 5 | ra].u;
+      if (((rb << 5) | ra) == 31)	/* PC */
+	CCPU_GPR[rt].u = cia;
+      break;
+    case 0x21:			/* mtusr */
+      CCPU_USR[(rb << 5) | ra].u = CCPU_GPR[rt].u;
+      break;
+    case 0x28:			/* mults64 */
+      {
+	int64_t d = (int64_t) CCPU_GPR[ra].s * (int64_t) CCPU_GPR[rb].s;
+
+	CCPU_USR[dt].s = d & 0xFFFFFFFF;
+	CCPU_USR[dt + 1].s = (d >> 32) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x29:			/* mult64 */
+      {
+	uint64_t d = (uint64_t) CCPU_GPR[ra].u * (uint64_t) CCPU_GPR[rb].u;
+
+	CCPU_USR[dt].u = d & 0xFFFFFFFF;
+	CCPU_USR[dt + 1].u = (d >> 32) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x2a:			/* madds64 */
+      {
+	int64_t mr = (int64_t) CCPU_GPR[ra].s * (int64_t) CCPU_GPR[rb].s;
+	int64_t d = ((int64_t) CCPU_USR[dt + 1].s << 32)
+		    | ((int64_t) CCPU_USR[dt].  s & 0xFFFFFFFF);
+
+	d += mr;
+	CCPU_USR[dt].u = d & 0xFFFFFFFF;
+	CCPU_USR[dt + 1].u = (d >> 32) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x2b:			/* madd64 */
+      {
+	uint64_t mr = (uint64_t) CCPU_GPR[ra].u * (uint64_t) CCPU_GPR[rb].u;
+	uint64_t d = ((uint64_t) CCPU_USR[dt + 1].u << 32)
+		     | ((uint64_t) CCPU_USR[dt].u & 0xFFFFFFFF);
+
+	d += mr;
+	CCPU_USR[dt].u = d & 0xFFFFFFFF;
+	CCPU_USR[dt + 1].u = (d >> 32) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x2c:			/* msubs64 */
+      {
+	int64_t mr = (int64_t) CCPU_GPR[ra].s * (int64_t) CCPU_GPR[rb].s;
+	int64_t d = ((int64_t) CCPU_USR[dt + 1].s << 32)
+		    | ((int64_t) CCPU_USR[dt].s & 0xFFFFFFFF);
+
+	d -= mr;
+	CCPU_USR[dt].u = d & 0xFFFFFFFF;
+	CCPU_USR[dt + 1].u = (d >> 32) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x2d:			/* msub64 */
+      {
+	uint64_t mr = (uint64_t) CCPU_GPR[ra].u * (uint64_t) CCPU_GPR[rb].u;
+	uint64_t d = ((uint64_t) CCPU_USR[dt + 1].u << 32)
+		     | ((uint64_t) CCPU_USR[dt].u & 0xFFFFFFFF);
+
+	d -= mr;
+	CCPU_USR[dt].u = d & 0xFFFFFFFF;
+	CCPU_USR[dt + 1].u = (d >> 32) & 0xFFFFFFFF;
+      }
+      break;
+    case 0x2e:			/* divs */
+      {
+	int32_t q;
+	int32_t r;
+
+	q = CCPU_GPR[ra].s / CCPU_GPR[rb].s;
+	r = CCPU_GPR[ra].s % CCPU_GPR[rb].s;
+	CCPU_USR[dt].s = q;
+	CCPU_USR[dt + 1].s = r;
+      }
+      break;
+    case 0x2f:			/* div */
+      {
+	uint32_t q;
+	uint32_t r;
+
+	q = CCPU_GPR[ra].u / CCPU_GPR[rb].u;
+	r = CCPU_GPR[ra].u % CCPU_GPR[rb].u;
+	CCPU_USR[dt].u = q;
+	CCPU_USR[dt + 1].u = r;
+      }
+      break;
+    case 0x31:			/* mult32 */
+      CCPU_USR[dt].s = CCPU_GPR[ra].s * CCPU_GPR[rb].s;
+      break;
+    case 0x33:			/* madd32 */
+      CCPU_USR[dt].s += CCPU_GPR[ra].s * CCPU_GPR[rb].s;
+      break;
+    case 0x35:			/* msub32 */
+      CCPU_USR[dt].s -= CCPU_GPR[ra].s * CCPU_GPR[rb].s;
+      break;
+    case 0x4f:			/* flmism */
+      {
+	char a[4];
+	char b[4];
+	int order = CCPU_SR_TEST (PSW, PSW_BE) ? BIG_ENDIAN : LITTLE_ENDIAN;
+	int ret;
+
+	store_unsigned_integer ((unsigned char *) &a, 4, order, CCPU_GPR[ra].u);
+	store_unsigned_integer ((unsigned char *) &b, 4, order, CCPU_GPR[rb].u);
+	ret = find_mism ((unsigned char *) &a, (unsigned char *) &b, -1);
+	CCPU_GPR[rt].u = ret;
+      }
+      break;
+    case 0x68:			/* mulsr64 */
+      {
+	int64_t r = (int64_t) CCPU_GPR[ra].s * (int64_t) CCPU_GPR[rb].s;
+	int d = rt & ~1;
+
+	if (CCPU_SR_TEST (PSW, PSW_BE))
+	  {
+	    CCPU_GPR[d].u = (r >> 32) & 0xFFFFFFFF;
+	    CCPU_GPR[d + 1].u = r & 0xFFFFFFFF;
+	  }
+	else
+	  {
+	    CCPU_GPR[d + 1].u = (r >> 32) & 0xFFFFFFFF;
+	    CCPU_GPR[d].u = r & 0xFFFFFFFF;
+	  }
+      }
+      break;
+    case 0x69:			/* mulr64 */
+      {
+	uint64_t r = (uint64_t) CCPU_GPR[ra].u * (uint64_t) CCPU_GPR[rb].u;
+	int d = rt & ~1;
+
+	if (CCPU_SR_TEST (PSW, PSW_BE))
+	  {
+	    CCPU_GPR[d].u = (r >> 32) & 0xFFFFFFFF;
+	    CCPU_GPR[d + 1].u = r & 0xFFFFFFFF;
+	  }
+	else
+	  {
+	    CCPU_GPR[d + 1].u = (r >> 32) & 0xFFFFFFFF;
+	    CCPU_GPR[d].u = r & 0xFFFFFFFF;
+	  }
+      }
+      break;
+    case 0x73:			/* maddr32 */
+      CCPU_GPR[rt].u += (CCPU_GPR[ra].u * CCPU_GPR[rb].u) & 0xFFFFFFFF;
+      break;
+    case 0x75:			/* msubr32 */
+      CCPU_GPR[rt].u -= (CCPU_GPR[ra].u * CCPU_GPR[rb].u) & 0xFFFFFFFF;
+      break;
+    default:
+      nds32_bad_op (cpu, cia, insn, "ALU2");
+      return;
+    }
+
+  return;
+}
+
+static void
+nds32_decode32_jreg (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  int rt = N32_RT5 (insn);
+  int ra = N32_RA5 (insn);
+  int rb = N32_RB5 (insn);
+  sim_cia nia;
+
+  if (ra != 0)
+    sim_io_error (sd, "JREG RA == %d at pc=0x%x, code=0x%08x\n",
+		  ra, cia, insn);
+
+  if (__GF (insn, 8, 2) != 0)
+    sim_io_error (sd, "JREG DT/IT not supported at pc=0x%x, code=0x%08x\n",
+		  cia, insn);
+
+  switch (insn & 0x1f)
+    {
+    case 0:			/* jr, ifret, ret */
+      if (__GF (insn, 5, 2) == 0x3)
+	{
+	  /* ifret. IFC + RET */
+	  if (CCPU_SR_TEST (PSW, PSW_IFCON))
+	    cia = CCPU_USR[USR0_IFCLP].u;
+	  else
+	    return;		/* Do nothing. (ifret) */
+	}
+      else
+	/* jr or ret */
+	cia = CCPU_GPR[rb].u;
+
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+      nds32_set_nia (cpu, cia);
+      return;
+
+    case 1:			/* jral */
+      if (sd->gprof)
+	nds32_gmon_mcount (cia, CCPU_GPR[rb].u);
+      if (cpu->iflags & NIF_EX9)
+	CCPU_GPR[rt].u = cia + 2;
+      else
+	CCPU_GPR[rt].u = cia + 4;
+
+      cia = CCPU_GPR[rb].u;
+      /* If PSW.IFCON, it returns to $ifclp instead.  */
+      if (CCPU_SR_TEST (PSW, PSW_IFCON))
+	CCPU_GPR[rt] = CCPU_USR[USR0_IFCLP];
+
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+      nds32_set_nia (cpu, cia);
+      return;
+
+    case 2:			/* jrnez */
+      if (CCPU_GPR[rb].u == 0)
+	return;			/* NOT taken */
+
+      /* PSW.IFCON is only cleared when taken.  */
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+      nds32_set_nia (cpu, CCPU_GPR[rb].u);
+      return;
+
+    case 3:			/* jralnez */
+      /* Prevent early clobbing of rb (rt == rb).  */
+      nia = CCPU_GPR[rb].u;
+
+      /* Rt is always set according to spec.  */
+      if (cpu->iflags & NIF_EX9)
+	CCPU_GPR[rt].u = cia + 2;
+      else
+	CCPU_GPR[rt].u = cia + 4;
+
+      /* By spec, PSW.IFCON is always cleared no matter it takes or not.  */
+      if (CCPU_SR_TEST (PSW, PSW_IFCON))
+	CCPU_GPR[rt] = CCPU_USR[USR0_IFCLP];
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+
+      if (nia != 0)		/* taken branch */
+	nds32_set_nia (cpu, nia);
+
+      return;
+
+    default:
+      nds32_bad_op (cpu, cia, insn, "JREG");
+      return;
+    }
+
+  return;
+}
+
+static void
+nds32_decode32_br1 (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  int rt = N32_RT5 (insn);
+  int ra = N32_RA5 (insn);
+  int imm14s = N32_IMM14S (insn);
+
+  switch ((insn >> 14) & 1)
+    {
+    case 0:			/* beq */
+      if (CCPU_GPR[rt].u == CCPU_GPR[ra].u)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (imm14s << 1));
+	}
+      break;
+    case 1:			/* bne */
+      if (CCPU_GPR[rt].u != CCPU_GPR[ra].u)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (imm14s << 1));
+	}
+      break;
+    }
+}
+
+static void
+nds32_decode32_br2 (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  int rt = N32_RT5 (insn);
+  int imm16s1 = N32_IMM16S (insn) << 1;
+
+  switch (__GF (insn, 16, 4))
+    {
+    case 0x0:			/* ifcall */
+      /* Do not set $ifclp when chaining ifcall.  */
+      if (!CCPU_SR_TEST (PSW, PSW_IFCON))
+	{
+	  if (cpu->iflags & NIF_EX9)
+	    CCPU_USR[USR0_IFCLP].u = cia + 2;
+	  else
+	    CCPU_USR[USR0_IFCLP].u = cia + 4;
+	}
+      nds32_set_nia (cpu, cia + imm16s1);
+      CCPU_SR_SET (PSW, PSW_IFCON);
+      break;
+    case 0x2:			/* beqz */
+      if (CCPU_GPR[rt].s == 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + imm16s1);
+	}
+      break;
+    case 0x3:			/* bnez */
+      if (CCPU_GPR[rt].s != 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + imm16s1);
+	}
+      break;
+    case 0x4:			/* bgez */
+      if (CCPU_GPR[rt].s >= 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + imm16s1);
+	}
+      break;
+    case 0x5:			/* bltz */
+      if (CCPU_GPR[rt].s < 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + imm16s1);
+	}
+      break;
+    case 0x6:			/* bgtz */
+      if (CCPU_GPR[rt].s > 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + imm16s1);
+	}
+      break;
+    case 0x7:			/* blez */
+      if (CCPU_GPR[rt].s <= 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + imm16s1);
+	}
+      break;
+    case 0x1c:			/* bgezal */
+      /* Always clob $lp.  */
+      if (cpu->iflags & NIF_EX9)
+	CCPU_GPR[GPR_LP].u = cia + 2;
+      else
+	CCPU_GPR[GPR_LP].u = cia + 4;
+
+      /* Always set $lp = $ifc_lp no matter it takes no not.  */
+      if (CCPU_SR_TEST (PSW, PSW_IFCON))
+	CCPU_GPR[GPR_LP].u = CCPU_USR[USR0_IFCLP].u;
+
+      /* PSW.IFCON is only cleared when the branch is taken.  */
+      if (!(CCPU_GPR[rt].s >= 0))
+	return;
+
+      if (sd->gprof)
+	nds32_gmon_mcount (cia, cia + imm16s1);
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+      nds32_set_nia (cpu, cia + imm16s1);
+      return;
+    case 0x1d:			/* bltzal */
+      /* Always clob $lp.  */
+      if (cpu->iflags & NIF_EX9)
+	CCPU_GPR[GPR_LP].u = cia + 2;
+      else
+	CCPU_GPR[GPR_LP].u = cia + 4;
+
+      /* Always set $lp = $ifc_lp no matter it takes no not.  */
+      if (CCPU_SR_TEST (PSW, PSW_IFCON))
+	CCPU_GPR[GPR_LP].u = CCPU_USR[USR0_IFCLP].u;
+
+      /* PSW.IFCON is only cleared when the branch is taken.  */
+      if (!(CCPU_GPR[rt].s < 0))
+	return;
+
+      if (sd->gprof)
+	nds32_gmon_mcount (cia, cia + imm16s1);
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+      nds32_set_nia (cpu, cia + imm16s1);
+      break;
+    default:
+      nds32_bad_op (cpu, cia, insn, "BR2");
+      break;
+    }
+}
+
+static void
+nds32_decode32_misc (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  int rt = N32_RT5 (insn);
+
+  switch (insn & 0x1F)
+    {
+    case 0x0:			/* standby */
+    case 0x1:			/* cctl */
+    case 0x8:			/* dsb */
+    case 0x9:			/* isb */
+    case 0xd:			/* isync */
+    case 0xc:			/* msync */
+      break;
+    case 0x5:			/* trap */
+    case 0xa:			/* break */
+      nds32_raise_exception (cpu, EXP_DEBUG, SIM_SIGTRAP, NULL);
+      return; /* FIXME dispatch exception?  */
+    case 0x2:			/* mfsr */
+      CCPU_GPR[rt] = CCPU_SR[__GF (insn, 10, 10)];
+      break;
+    case 0x3:			/* mtsr */
+      {
+	int sridx = __GF (insn, 10, 10);
+
+	switch (__GF (insn, 5, 5))
+	  {
+	  case 0:		/* mtsr */
+	    CCPU_SR[sridx] = CCPU_GPR[rt];
+	    switch (sridx)
+	      {
+	      case SRIDX_PFM_CTL:
+		nds32_pfm_ctl (cpu);
+		break;
+	      }
+	    break;
+	  case 1:		/* setend */
+	    if (sridx != 0x80)
+	      nds32_bad_op (cpu, cia, insn, "SETEND (sridx)");
+
+	    if (rt == 1)
+	      CCPU_SR_SET (PSW, PSW_BE);
+	    else if (rt == 0)
+	      CCPU_SR_CLEAR (PSW, PSW_BE);
+	    else
+	      nds32_bad_op (cpu, cia, insn, "SETEND (BE/LE)");
+	    break;
+	  case 2:		/* setgie */
+	    if (sridx != 0x80)
+	      nds32_bad_op (cpu, cia, insn, "SETGIE (sridx)");
+
+	    if (rt == 1)
+	      CCPU_SR_SET (PSW, PSW_GIE);
+	    else if (rt == 0)
+	      CCPU_SR_CLEAR (PSW, PSW_GIE);
+	    else
+	      nds32_bad_op (cpu, cia, insn, "SETEND (BE/LE)");
+	    break;
+	  }
+      }
+      break;
+    case 0xb:			/* syscall */
+      nds32_syscall (cpu, __GF (insn, 5, 15), cia);
+      break;
+    case 0x4:			/* iret */
+      nds32_bad_op (cpu, cia, insn, "iret (MISC)");
+      break;
+    case 0x6:			/* teqz */
+      nds32_bad_op (cpu, cia, insn, "teqz (MISC)");
+      break;
+    case 0x7:			/* tnez */
+      nds32_bad_op (cpu, cia, insn, "tnez (MISC)");
+      break;
+    case 0xe:			/* tlbop */
+      nds32_bad_op (cpu, cia, insn, "tlbop (MISC)");
+      break;
+    default:
+      nds32_bad_op (cpu, cia, insn, "MISC");
+      break;
+    }
+}
+
+static void
+nds32_decode32 (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  int op = N32_OP6 (insn);
+  int rt = N32_RT5 (insn);
+  int ra = N32_RA5 (insn);
+  int imm15s = N32_IMM15S (insn);
+  int imm15u = N32_IMM15U (insn);
+  uint32_t shift;
+  uint32_t addr;
+  sim_cia next_cia;
+
+  switch (op)
+    {
+    case 0x0:			/* lbi */
+    case 0x1:			/* lhi */
+    case 0x2:			/* lwi */
+    /* case 0x3: */		/* ldi */
+      {
+	shift = (op - 0x0);
+	addr = CCPU_GPR[ra].u + (imm15s << shift);
+	CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, 1 << shift);
+      }
+      break;
+
+    case 0x4:			/* lbi.bi */
+    case 0x5:			/* lhi.bi */
+    case 0x6:			/* lwi.bi */
+    /* case 0x7: */		/* ldi.bi */
+      {
+	shift = (op - 0x4);
+	CCPU_GPR[rt].u = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, 1 << shift);
+	CCPU_GPR[ra].u += (imm15s << shift);
+      }
+      break;
+
+    case 0x8:			/* sbi */
+    case 0x9:			/* shi */
+    case 0xa:			/* swi */
+    /* case 0xb: */		/* sdi */
+      {
+	shift = (op - 0x8);
+	addr = CCPU_GPR[ra].u + (imm15s << shift);
+	nds32_st_aligned (cpu, addr, 1 << shift, CCPU_GPR[rt].u);
+      }
+      break;
+
+    case 0xc:			/* sbi.bi */
+    case 0xd:			/* shi.bi */
+    case 0xe:			/* swi.bi */
+    /* case 0xf: */		/* sdi.bi */
+      {
+	shift = (op - 0xc);
+	nds32_st_aligned (cpu, CCPU_GPR[ra].u, 1 << shift, CCPU_GPR[rt].u);
+	CCPU_GPR[ra].u += (imm15s << shift);
+      }
+      break;
+
+    case 0x10:			/* lbsi */
+    case 0x11:			/* lhsi */
+    /* case 0x12: */		/* lwsi */
+      {
+	shift = (op - 0x10);
+	addr = CCPU_GPR[ra].u + (imm15s << shift);
+	CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, 1 << shift);
+	CCPU_GPR[rt].u = __SEXT (CCPU_GPR[rt].u, (1 << shift) * 8);
+      }
+      break;
+    case 0x13:			/* dprefi */
+      /* do nothing */
+      break;
+    case 0x14:			/* lbsi.bi */
+    case 0x15:			/* lhsi.bi */
+    /* case 0x16: */		/* lwsi.bi */
+      {
+	shift = (op - 0x14);
+	CCPU_GPR[rt].u = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, 1 << shift);
+	CCPU_GPR[rt].u = __SEXT (CCPU_GPR[rt].u, (1 << shift) * 8);
+	CCPU_GPR[ra].u += (imm15s << shift);
+      }
+      break;
+    case 0x17:			/* LBGP */
+      if (insn & __BIT (19))	/* lbsi.gp */
+	{
+	  addr = CCPU_GPR[GPR_GP].u + N32_IMMS (insn, 19);
+	  CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, 1);
+	  CCPU_GPR[rt].u = __SEXT (CCPU_GPR[rt].u, 1 * 8);
+	}
+      else			/* lbi.gp */
+	CCPU_GPR[rt].u =
+	  nds32_ld_aligned (cpu, CCPU_GPR[GPR_GP].u + N32_IMMS (insn, 19), 1);
+      break;
+    case 0x18:			/* LWC */
+      nds32_decode32_lwc (cpu, insn, cia);
+      return;
+    case 0x19:			/* SWC */
+      nds32_decode32_swc (cpu, insn, cia);
+      return;
+    case 0x1a:			/* LDC */
+      nds32_decode32_ldc (cpu, insn, cia);
+      return;
+    case 0x1b:			/* SDC */
+      nds32_decode32_sdc (cpu, insn, cia);
+      return;
+    case 0x1c:			/* MEM */
+      nds32_decode32_mem (cpu, insn, cia);
+      return;
+    case 0x1d:			/* LSMW */
+      nds32_decode32_lsmw (cpu, insn, cia);
+      return;
+    case 0x1e:			/* HWGP */
+      switch (__GF (insn, 17, 3))
+	{
+	case 0: case 1:		/* lhi.gp */
+	  addr = CCPU_GPR[GPR_GP].u + (N32_IMMS (insn, 18) << 1);
+	  CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, 2);
+	  break;
+	case 2: case 3:		/* lhsi.gp */
+	  addr = CCPU_GPR[GPR_GP].u + (N32_IMMS (insn, 18) << 1);
+	  CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, 2);
+	  CCPU_GPR[rt].u = __SEXT (CCPU_GPR[rt].u, 2 * 8);
+	  break;
+	case 4: case 5:		/* shi.gp */
+	  nds32_st_aligned (cpu, CCPU_GPR[GPR_GP].u + (N32_IMMS (insn, 18) << 1), 2,
+			    CCPU_GPR[rt].u);
+	  break;
+	case 6:			/* lwi.gp */
+	  addr= CCPU_GPR[GPR_GP].u + (N32_IMMS (insn, 17) << 2);
+	  CCPU_GPR[rt].u = nds32_ld_aligned (cpu, addr, 4);
+	  break;
+	case 7:			/* swi.gp */
+	  nds32_st_aligned (cpu, CCPU_GPR[GPR_GP].u + (N32_IMMS (insn, 17) << 2),
+			    4, CCPU_GPR[rt].u);
+	  break;
+	}
+      break;
+    case 0x1f:			/* SBGP */
+      if (insn & __BIT (19))	/* addi.gp */
+	CCPU_GPR[rt].s = CCPU_GPR[GPR_GP].u + N32_IMMS (insn, 19);
+      else			/* sbi.gp */
+	nds32_st_aligned (cpu, CCPU_GPR[GPR_GP].u + N32_IMMS (insn, 19), 1,
+			  CCPU_GPR[rt].u & 0xFF);
+      break;
+    case 0x20:			/* ALU_1 */
+      nds32_decode32_alu1 (cpu, insn, cia);
+      return;
+    case 0x21:			/* ALU_2 */
+      nds32_decode32_alu2 (cpu, insn, cia);
+      return;
+    case 0x22:			/* movi */
+      CCPU_GPR[rt].s = N32_IMM20S (insn);
+      break;
+    case 0x23:			/* sethi */
+      CCPU_GPR[rt].u = N32_IMM20U (insn) << 12;
+      break;
+    case 0x24:			/* ji, jal */
+      if (cpu->iflags & NIF_EX9)
+	{
+	  /* Address in ji/jal is treated as absolute address in ex9.  */
+	  if (insn & __BIT (24))	/* jal in ex9 */
+	    CCPU_GPR[GPR_LP].u = cia + 2;
+	  next_cia = (cia & 0xff000000) | (N32_IMMU (insn, 24) << 1);
+	}
+      else
+	{
+	  if (insn & __BIT (24))	/* jal */
+	    CCPU_GPR[GPR_LP].u = cia + 4;
+	  next_cia = cia + (N32_IMMS (insn, 24) << 1);
+	}
+
+      if (CCPU_SR_TEST (PSW, PSW_IFCON))
+	{
+	  if (insn & __BIT (24))	/* jal */
+	    CCPU_GPR[GPR_LP] = CCPU_USR[USR0_IFCLP];
+	}
+
+      if (insn & __BIT (24))	/* jump-and-link */
+	if (sd->gprof)
+	  nds32_gmon_mcount (cia, next_cia);
+
+      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+      nds32_set_nia (cpu, next_cia);
+      return;
+    case 0x25:			/* jreg */
+      nds32_decode32_jreg (cpu, insn, cia);
+      return;
+    case 0x26:			/* br1 */
+      nds32_decode32_br1 (cpu, insn, cia);
+      return;
+    case 0x27:			/* br2 */
+      nds32_decode32_br2 (cpu, insn, cia);
+      return;
+    case 0x28:			/* addi rt, ra, imm15s */
+      CCPU_GPR[rt].s = CCPU_GPR[ra].s + imm15s;
+      break;
+    case 0x29:			/* subri */
+      CCPU_GPR[rt].s = imm15s - CCPU_GPR[ra].s;
+      break;
+    case 0x2a:			/* andi */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & imm15u;
+      break;
+    case 0x2b:			/* xori */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u ^ imm15u;
+      break;
+    case 0x2c:			/* ori */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u | imm15u;
+      break;
+    case 0x2d:			/* br3, beqc, bnec */
+      {
+	int imm11s = __SEXT (__GF (insn, 8, 11), 11);
+
+	if (((insn & __BIT (19)) == 0) ^ (CCPU_GPR[rt].s != imm11s))
+	  {
+	    CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	    nds32_set_nia (cpu, cia + (N32_IMMS (insn, 8) << 1));
+	  }
+	return;
+      }
+      break;
+    case 0x2e:			/* slti */
+      CCPU_GPR[rt].u = (CCPU_GPR[ra].u < (uint32_t) imm15s) ? 1 : 0;
+      break;
+    case 0x2f:			/* sltsi */
+      CCPU_GPR[rt].u = (CCPU_GPR[ra].s < imm15s) ? 1 : 0;
+      break;
+    case 0x32:			/* misc */
+      nds32_decode32_misc (cpu, insn, cia);
+      return;
+    case 0x33:			/* bitci */
+      CCPU_GPR[rt].u = CCPU_GPR[ra].u & ~imm15u;
+      break;
+    case 0x35:			/* COP */
+      nds32_decode32_cop (cpu, insn, cia);
+      return;
+    default:
+      nds32_bad_op (cpu, cia, insn, "32-bit");
+    }
+}
+
+static void
+nds32_decode16_ex9 (sim_cpu *cpu, uint32_t insn, sim_cia cia)
+{
+  /* Set NIF_EX9 so to change how JI/JAL interpreting address.  */
+
+  cpu->iflags |= NIF_EX9;
+  nds32_decode32 (cpu, insn, cia);
+  cpu->iflags &= ~NIF_EX9;
+}
+
+static void
+nds32_decode16 (sim_cpu *cpu, uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  const int rt5 = N16_RT5 (insn);
+  const int ra5 = N16_RA5 (insn);
+  const int rt4 = N16_RT4 (insn);
+  const int imm5u = N16_IMM5U (insn);
+  const int imm5s = N16_IMM5S (insn);
+  const int imm9u = N16_IMM9U (insn);
+  const int rt3 = N16_RT3 (insn);
+  const int ra3 = N16_RA3 (insn);
+  const int rb3 = N16_RB3 (insn);
+  const int rt38 = N16_RT38 (insn);
+  const int imm3u = rb3;
+  uint32_t shift;
+  uint32_t addr;
+
+  switch (__GF (insn, 7, 8))
+    {
+    case 0xf8:			/* push25 */
+      {
+	uint32_t smw_adm = 0x3A6F83BC;
+	uint32_t res[] = { 6, 8, 10, 14 };
+	uint32_t re = __GF (insn, 5, 2);
+
+	smw_adm |= res[re] << 10;
+	nds32_decode32_lsmw (cpu, smw_adm, cia);
+	CCPU_GPR[GPR_SP].u -= (imm5u << 3);
+	if (re >= 1)
+	  CCPU_GPR[8].u = cia & 0xFFFFFFFC;
+      }
+      return;
+    case 0xf9:			/* pop25 */
+      {
+	uint32_t lmw_bim = 0x3A6F8384;
+	uint32_t res[] = { 6, 8, 10, 14 };
+	uint32_t re = __GF (insn, 5, 2);
+
+	lmw_bim |= res[re] << 10;
+	CCPU_GPR[GPR_SP].u += (imm5u << 3);
+	nds32_decode32_lsmw (cpu, lmw_bim, cia);
+	CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	nds32_set_nia (cpu, CCPU_GPR[GPR_LP].u);
+      }
+      return;
+    }
+
+  if (__GF (insn, 8, 7) == 0x7d)	/* movd44 */
+    {
+      int rt5e = __GF (insn, 4, 4) << 1;
+      int ra5e = __GF (insn, 0, 4) << 1;
+
+      CCPU_GPR[rt5e] = CCPU_GPR[ra5e];
+      CCPU_GPR[rt5e + 1] = CCPU_GPR[ra5e + 1];
+      return;
+    }
+
+  switch (__GF (insn, 9, 6))
+    {
+    case 0x4:			/* add45 */
+      CCPU_GPR[rt4].u += CCPU_GPR[ra5].u;
+      return;
+    case 0x5:			/* sub45 */
+      CCPU_GPR[rt4].u -= CCPU_GPR[ra5].u;
+      return;
+    case 0x6:			/* addi45 */
+      CCPU_GPR[rt4].u += imm5u;
+      return;
+    case 0x7:			/* subi45 */
+      CCPU_GPR[rt4].u -= imm5u;
+      return;
+    case 0x8:			/* srai45 */
+      CCPU_GPR[rt4].u = CCPU_GPR[rt4].s >> imm5u;
+      return;
+    case 0x9:			/* srli45 */
+      CCPU_GPR[rt4].u = CCPU_GPR[rt4].u >> imm5u;
+      return;
+    case 0xa:			/* slli333 */
+      CCPU_GPR[rt3].u = CCPU_GPR[ra3].u << imm3u;
+      return;
+    case 0xc:			/* add333 */
+      CCPU_GPR[rt3].u = CCPU_GPR[ra3].u + CCPU_GPR[rb3].u;
+      return;
+    case 0xd:			/* sub333 */
+      CCPU_GPR[rt3].u = CCPU_GPR[ra3].u - CCPU_GPR[rb3].u;
+      return;
+    case 0xe:			/* addi333 */
+      CCPU_GPR[rt3].u = CCPU_GPR[ra3].u + imm3u;
+      return;
+    case 0xf:			/* subi333 */
+      CCPU_GPR[rt3].u = CCPU_GPR[ra3].u - imm3u;
+      return;
+    case 0x10:			/* lwi333 */
+    case 0x12:			/* lhi333 */
+    case 0x13:			/* lbi333 */
+      {
+	int shtbl[] = { 2, -1, 1, 0 };
+
+	shift = shtbl[(__GF (insn, 9, 6) - 0x10)];
+	addr = CCPU_GPR[ra3].u + (imm3u << shift);
+	CCPU_GPR[rt3].u = nds32_ld_aligned (cpu, addr, 1 << shift);
+      }
+      return;
+    case 0x11:			/* lwi333.bi */
+      CCPU_GPR[rt3].u = nds32_ld_aligned (cpu, CCPU_GPR[ra3].u, 4);
+      CCPU_GPR[ra3].u += imm3u << 2;
+      return;
+    case 0x14:			/* swi333 */
+    case 0x16:			/* shi333 */
+    case 0x17:			/* sbi333 */
+      {
+	int shtbl[] = { 2, -1, 1, 0 };
+
+	shift = shtbl[(__GF (insn, 9, 6) - 0x14)];
+	nds32_st_aligned (cpu, CCPU_GPR[ra3].u + (imm3u << shift),
+			  1 << shift, CCPU_GPR[rt3].u);
+      }
+      return;
+    case 0x15:			/* swi333.bi */
+      nds32_st_aligned (cpu, CCPU_GPR[ra3].u, 4, CCPU_GPR[rt3].u);
+      CCPU_GPR[ra3].u += imm3u << 2;
+      return;
+    case 0x18:			/* addri36.sp */
+      CCPU_GPR[rt3].u = CCPU_GPR[GPR_SP].u + (N16_IMM6U (insn) << 2);
+      return;
+    case 0x19:			/* lwi45.fe */
+      {
+	/* Not tested yet */
+	int imm7n = -((32 - imm5u) << 2);
+
+	CCPU_GPR[rt4].u = nds32_ld_aligned (cpu, CCPU_GPR[8].u + imm7n, 4);
+      }
+      return;
+    case 0x1a:			/* lwi450 */
+      CCPU_GPR[rt4].u = nds32_ld_aligned (cpu, CCPU_GPR[ra5].u, 4);
+      return;
+    case 0x1b:			/* swi450 */
+      nds32_st_aligned (cpu, CCPU_GPR[ra5].u, 4, CCPU_GPR[rt4].u);
+      return;
+    case 0x30:			/* slts45 */
+      CCPU_GPR[GPR_TA].u = (CCPU_GPR[rt4].s < CCPU_GPR[ra5].s) ? 1 : 0;
+      return;
+    case 0x31:			/* slt45 */
+      CCPU_GPR[GPR_TA].u = (CCPU_GPR[rt4].u < CCPU_GPR[ra5].u) ? 1 : 0;
+      return;
+    case 0x32:			/* sltsi45 */
+      CCPU_GPR[GPR_TA].u = (CCPU_GPR[rt4].s < imm5u) ? 1 : 0;
+      return;
+    case 0x33:			/* slti45 */
+      CCPU_GPR[GPR_TA].u = (CCPU_GPR[rt4].u < imm5u) ? 1 : 0;
+      return;
+
+    case 0x34:			/* beqzs8, bnezs8 */
+      if (((insn & __BIT (8)) == 0) ^ (CCPU_GPR[GPR_TA].u != 0))
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (N16_IMM8S (insn) << 1));
+	}
+      return;
+    case 0x35:			/* break16, ex9.it */
+      if (imm9u < 32)		/* break16 */
+	{
+	  nds32_raise_exception (cpu, EXP_DEBUG, SIM_SIGTRAP, NULL);
+	  return;
+	}
+
+      /* ex9.it */
+      sim_read (sd, (CCPU_USR[USR0_ITB].u & 0xfffffffc) + (imm9u << 2),
+		(unsigned char *) &insn, 4);
+      insn = extract_unsigned_integer ((unsigned char *) &insn, 4, BIG_ENDIAN);
+      nds32_decode16_ex9 (cpu, insn, cia);
+      return;
+    case 0x3c:			/* ifcall9 */
+      if (!CCPU_SR_TEST (PSW, PSW_IFCON))
+	CCPU_USR[USR0_IFCLP].u = cia + 2;
+
+      nds32_set_nia (cpu, cia + (N16_IMM9U (insn) << 1));
+      CCPU_SR_SET (PSW, PSW_IFCON);
+      return;
+    case 0x3d:			/* movpi45 */
+      CCPU_GPR[rt4].u = imm5u + 16;
+      return;
+    case 0x3f:			/* MISC33 */
+      switch (insn & 0x7)
+	{
+	case 2:			/* neg33 */
+	  CCPU_GPR[rt3].s = -CCPU_GPR[ra3].u;
+	  return;
+	case 3:			/* not33 */
+	  CCPU_GPR[rt3].u = ~CCPU_GPR[ra3].u;
+	  return;
+	case 4:			/* mul33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[rt3].u * CCPU_GPR[ra3].u;
+	  return;
+	case 5:			/* xor33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[rt3].u ^ CCPU_GPR[ra3].u;
+	  return;
+	case 6:			/* and33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[rt3].u & CCPU_GPR[ra3].u;
+	  return;
+	case 7:			/* or33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[rt3].u | CCPU_GPR[ra3].u;
+	  return;
+	default:
+	  goto bad_op;
+	}
+      return;
+    case 0xb:			/* ... */
+      switch (insn & 0x7)
+	{
+	case 0:			/* zeb33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[ra3].u & 0xff;
+	  break;
+	case 1:			/* zeh33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[ra3].u & 0xffff;
+	  break;
+	case 2:			/* seb33 */
+	  CCPU_GPR[rt3].s = __SEXT (CCPU_GPR[ra3].s, 8);
+	  break;
+	case 3:			/* seh33 */
+	  CCPU_GPR[rt3].s = __SEXT (CCPU_GPR[ra3].s, 16);
+	  break;
+	case 4:			/* xlsb33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[ra3].u & 0x1;
+	  break;
+	case 5:			/* x11b33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[ra3].u & 0x7FF;
+	  break;
+	case 6:			/* bmski33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[rt3].u & (1 << __GF (insn, 3, 3));
+	  break;
+	case 7:			/* fexti33 */
+	  CCPU_GPR[rt3].u = CCPU_GPR[rt3].u & ((1 << (__GF (insn, 3, 3) + 1)) - 1);
+	  break;
+	}
+      return;
+    }
+
+  switch (__GF (insn, 10, 5))
+    {
+    case 0x0:			/* mov55 or ifret16 */
+      /* It's ok to do assignment even if it's ifret16.  */
+      CCPU_GPR[rt5].u = CCPU_GPR[ra5].u;
+
+      if (rt5 == ra5 && rt5 == 31 && CCPU_SR_TEST (PSW, PSW_IFCON))
+	{
+	  /* ifret */
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, CCPU_USR[USR0_IFCLP].u);
+	}
+      return;
+    case 0x1:			/* movi55 */
+      CCPU_GPR[rt5].s = imm5s;
+      return;
+    case 0x1b:			/* addi10s (V2) */
+      CCPU_GPR[GPR_SP].u += N16_IMM10S (insn);
+      return;
+    }
+
+  switch (__GF (insn, 11, 4))
+    {
+    case 0x7:			/* lwi37.fp/swi37.fp */
+      addr = CCPU_GPR[GPR_FP].u + (N16_IMM7U (insn) << 2);
+      if (insn & (1 << 7))	/* swi37.fp */
+	nds32_st_aligned (cpu, addr, 4, CCPU_GPR[rt38].u);
+      else			/* lwi37.fp */
+	CCPU_GPR[rt38].u = nds32_ld_aligned (cpu, addr, 4);
+      return;
+    case 0x8:			/* beqz38 */
+      if (CCPU_GPR[rt38].u == 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (N16_IMM8S (insn) << 1));
+	}
+      return;
+    case 0x9:			/* bnez38 */
+      if (CCPU_GPR[rt38].u != 0)
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (N16_IMM8S (insn) << 1));
+	}
+      return;
+    case 0xa:			/* beqs38/j8, implied r5 */
+      if (CCPU_GPR[rt38].u == CCPU_GPR[5].u)	/* rt38 == 5 means j8 */
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (N16_IMM8S (insn) << 1));
+	}
+      return;
+    case 0xb:			/* bnes38 and others */
+      if (rt38 == 5)
+	{
+	  switch (__GF (insn, 5, 3))
+	    {
+	    case 0:		/* jr5 */
+	    case 4:		/* ret5 */
+	      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	      nds32_set_nia (cpu, CCPU_GPR[ra5].u);
+	      return;
+	    case 1:		/* jral5 */
+	      if (sd->gprof)
+		nds32_gmon_mcount (cia, CCPU_GPR[ra5].u);
+	      CCPU_GPR[GPR_LP].u = cia + 2;
+	      if (CCPU_SR_TEST (PSW, PSW_IFCON))
+		CCPU_GPR[GPR_LP] = CCPU_USR[USR0_IFCLP];
+	      CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	      nds32_set_nia (cpu, CCPU_GPR[ra5].u);
+	      return;
+	    case 2:		/* ex9.it imm5 */
+	      sim_read (sd, (CCPU_USR[USR0_ITB].u & 0xfffffffc) + (imm5u << 2),
+			(unsigned char *) &insn, 4);
+	      insn = extract_unsigned_integer ((unsigned char *) &insn, 4,
+					       BIG_ENDIAN);
+	      nds32_decode16_ex9 (cpu, insn, cia);
+	      return;
+	    case 5:		/* add5.pc */
+	      CCPU_GPR[ra5].u += cia;
+	      break;
+	    default:
+	      goto bad_op;
+	    }
+	  return;
+	}
+      else if (CCPU_GPR[rt38].u != CCPU_GPR[5].u) /* bnes38 */
+	{
+	  CCPU_SR_CLEAR (PSW, PSW_IFCON);
+	  nds32_set_nia (cpu, cia + (N16_IMM8S (insn) << 1));
+	  return;
+	}
+      return;
+    case 0xe:			/* lwi37/swi37 */
+      addr = CCPU_GPR[GPR_SP].u + (N16_IMM7U (insn) << 2);
+      if (insn & (1 << 7))	/* swi37.sp */
+	nds32_st_aligned (cpu, addr, 4, CCPU_GPR[rt38].u);
+      else			/* lwi37.sp */
+	CCPU_GPR[rt38].u = nds32_ld_aligned (cpu, addr, 4);
+      return;
+    }
+
+bad_op:
+  nds32_bad_op (cpu, cia, insn, "16-bit");
+}
+
+void
+sim_engine_run (SIM_DESC sd, int next_cpu_nr, int nr_cpus, int siggnal)
+{
+  int r;
+  sim_cia cia;
+  sim_cpu *cpu;
+  SIM_ASSERT (STATE_MAGIC (sd) == SIM_MAGIC_NUMBER);
+  cpu = STATE_CPU (sd, 0);
+  cia = CIA_GET (cpu);
+
+  if (siggnal != 0)
+    {
+      /* FIXME: Study kernel to make sure this.  */
+      /* TODO: In OPERATING_ENVIRONMENT, users may want to handle
+	       this himself. */
+      sim_engine_halt (CPU_STATE (cpu), cpu, NULL, cia, sim_exited,
+		       128 + siggnal);
+      return;
+    }
+
+  while (1)
+    {
+      uint32_t insn;
+
+      recent_cia[recent_cia_idx] = cia;
+      recent_cia_idx = (recent_cia_idx + 1) & RECENT_CIA_MASK;
+
+      if (sd->gprof)
+	nds32_gmon_sample (cia);
+
+      nds32_pfm_event (cpu, PFM_CYCLE);
+      nds32_pfm_event (cpu, PFM_INST);
+
+      r = sim_read (sd, cia, (unsigned char *) &insn, 4);
+      insn = extract_unsigned_integer ((unsigned char *) &insn, 4,
+				       BIG_ENDIAN);
+
+      if (r != 4)
+	nds32_dump_registers (sd);
+      SIM_ASSERT (r == 4);
+
+      if (TRACE_LINENUM_P (cpu))
+	{
+	  trace_prefix (sd, cpu, NULL_CIA, cia, TRACE_LINENUM_P (cpu),
+			NULL, 0, " "); /* Use a space for gcc warnings.  */
+	}
+
+      cpu->iflags &= ~NIF_BRANCH;
+      if ((insn & 0x80000000) == 0)
+	{
+	  nds32_decode32 (cpu, insn, cia);
+	  cia += 4;
+	}
+      else
+	{
+	  nds32_decode16 (cpu, insn >> 16, cia);
+	  cia += 2;
+	}
+
+      if (cpu->iflags & NIF_BRANCH)
+	{
+	  if (cpu->baddr & 1)
+	    nds32_raise_exception (cpu, EXP_GENERAL, SIM_SIGSEGV,
+				   "Alignment check exception. "
+				   "Unaligned instruction address 0x%08x\n",
+				   cia);
+	  cia = cpu->baddr;
+	}
+
+      if (TRACE_LINENUM_P (cpu))
+	{
+	  trace_result_addr1 (sd, cpu, TRACE_INSN_IDX, cia);
+	}
+
+      /* Sync registers. TODO: Sync PSW with current_target_endian.  */
+      CIA_SET (cpu, cia);
+
+      /* process any events */
+      if (sim_events_tick (sd))
+	{
+	  CIA_SET (cpu, cia);
+	  sim_events_process (sd);
+	}
+    }
+}
+
+/* This function is mainly used for fetch general purpose registers.
+   GDB remote-sim calls this too, so it will be used for fetch some
+   USR (PC, D0, D1), FLOAT, SR (PSW).  */
+
+static int
+nds32_fetch_register (sim_cpu *cpu, int rn, unsigned char *memory, int length)
+{
+  ulongest_t val = 0;
+
+  /* General purpose registers.  */
+  if (rn < 32)
+    {
+      val = cpu->reg_gpr[rn].u;
+      goto do_fetch;
+    }
+
+  /* Special user registers.  */
+  switch (rn)
+    {
+    case SIM_NDS32_PC_REGNUM:
+      val = cpu->reg_usr[USR0_PC].u;
+      goto do_fetch;
+    case SIM_NDS32_D0LO_REGNUM:
+      val = cpu->reg_usr[USR0_D0LO].u;
+      goto do_fetch;
+    case SIM_NDS32_D0HI_REGNUM:
+      val = cpu->reg_usr[USR0_D0HI].u;
+      goto do_fetch;
+    case SIM_NDS32_D1LO_REGNUM:
+      val = cpu->reg_usr[USR0_D1LO].u;
+      goto do_fetch;
+    case SIM_NDS32_D1HI_REGNUM:
+      val = cpu->reg_usr[USR0_D1HI].u;
+      goto do_fetch;
+    case SIM_NDS32_ITB_REGNUM:
+      val = cpu->reg_usr[USR0_ITB].u;
+      goto do_fetch;
+    case SIM_NDS32_IFCLP_REGNUM:
+      val = cpu->reg_usr[USR0_IFCLP].u;
+      goto do_fetch;
+    }
+
+  if (rn >= SIM_NDS32_FD0_REGNUM && rn < SIM_NDS32_FD0_REGNUM + 32)
+    {
+      int fr = (rn - SIM_NDS32_FD0_REGNUM) << 1;
+
+      val = ((uint64_t) cpu->reg_fpr[fr].u << 32)
+	    | (uint64_t) cpu->reg_fpr[fr + 1].u;
+      goto do_fetch;
+    }
+
+  /* System registers.  */
+  switch (rn)
+    {
+    case SIM_NDS32_PSW_REGNUM:
+      val = cpu->reg_sr[SRIDX (1, 0, 0)].u;
+      goto do_fetch;
+    }
+
+  return 0;
+
+do_fetch:
+  store_unsigned_integer (memory, length,
+			  CCPU_SR_TEST (PSW, PSW_BE)
+			  ? BIG_ENDIAN : LITTLE_ENDIAN,
+			  val);
+  return length;
+}
+
+static int
+nds32_store_register (sim_cpu *cpu, int rn, unsigned char *memory, int length)
+{
+  ulongest_t val;
+
+  val = extract_unsigned_integer (memory, length,
+				  CCPU_SR_TEST (PSW, PSW_BE)
+				  ? BIG_ENDIAN : LITTLE_ENDIAN);
+
+  /* General purpose registers.  */
+  if (rn < 32)
+    {
+      cpu->reg_gpr[rn].u = val;
+      return 4;
+    }
+
+  /* Special user registers.  */
+  switch (rn)
+    {
+    case SIM_NDS32_PC_REGNUM:
+      cpu->reg_usr[USR0_PC].u = val;
+      return 4;
+    case SIM_NDS32_D0LO_REGNUM:
+      cpu->reg_usr[USR0_D0LO].u = val;
+      return 4;
+    case SIM_NDS32_D0HI_REGNUM:
+      cpu->reg_usr[USR0_D0HI].u = val;
+      return 4;
+    case SIM_NDS32_D1LO_REGNUM:
+      cpu->reg_usr[USR0_D1LO].u = val;
+      return 4;
+    case SIM_NDS32_D1HI_REGNUM:
+      cpu->reg_usr[USR0_D1HI].u = val;
+      return 4;
+    case SIM_NDS32_ITB_REGNUM:
+      cpu->reg_usr[USR0_ITB].u = val;
+      return 4;
+    case SIM_NDS32_IFCLP_REGNUM:
+      cpu->reg_usr[USR0_IFCLP].u = val;
+      return 4;
+    }
+
+  if (rn >= SIM_NDS32_FD0_REGNUM && rn < SIM_NDS32_FD0_REGNUM + 32)
+    {
+      int fr = (rn - SIM_NDS32_FD0_REGNUM) << 1;
+
+      cpu->reg_fpr[fr + 1].u = val & 0xffffffff;
+      cpu->reg_fpr[fr].u = (val >> 32) & 0xffffffff;
+      return 8;
+    }
+
+  /* System registers.  */
+  switch (rn)
+    {
+    case SIM_NDS32_PSW_REGNUM:
+      cpu->reg_sr[SRIDX (1, 0, 0)].u = val;
+      return 4;
+    }
+
+  return 0;
+}
+
+static sim_cia
+nds32_pc_get (sim_cpu *cpu)
+{
+  return cpu->reg_usr[USR0_PC].u;
+}
+
+static void
+nds32_pc_set (sim_cpu *cpu, sim_cia cia)
+{
+  cpu->reg_usr[USR0_PC].u = cia;
+}
+
+static void
+nds32_initialize_cpu (SIM_DESC sd, sim_cpu *cpu, struct bfd *abfd)
+{
+  memset (cpu->reg_gpr, 0, sizeof (cpu->reg_gpr));
+  memset (cpu->reg_usr, 0, sizeof (cpu->reg_usr));
+  memset (cpu->reg_sr, 0, sizeof (cpu->reg_sr));
+  memset (cpu->reg_fpr, 0, sizeof (cpu->reg_fpr));
+
+  /* Common operations defined in sim-cpu.h */
+  CPU_REG_FETCH (cpu) = nds32_fetch_register;
+  CPU_REG_STORE (cpu) = nds32_store_register;
+  CPU_PC_FETCH (cpu) = nds32_pc_get;
+  CPU_PC_STORE (cpu) = nds32_pc_set;
+
+  /* CPU_VER: N12 + COP/FPU */
+  CCPU_SR[SRIDX (0, 0, 0)].u = (0xc << 24) | 3;
+
+  /* MSC_CFG */
+  /* User code may need this for specialized code. e.g., set $ITB.  */
+  CCPU_SR_SET (MSC_CFG, MSC_CFG_PFM);
+  CCPU_SR_SET (MSC_CFG, MSC_CFG_DIV);
+  CCPU_SR_SET (MSC_CFG, MSC_CFG_MAC);
+  CCPU_SR_SET (MSC_CFG, MSC_CFG_IFC);
+  CCPU_SR_SET (MSC_CFG, MSC_CFG_EIT);
+
+  CCPU_SR_CLEAR (IVB, IVB_EVIC);	/* (IM) */
+  CCPU_SR_PUT (IVB, IVB_ESZ, 1);	/* 16-byte */
+  CCPU_SR_PUT (IVB, IVB_IVBASE, 0);	/* (IM) */
+}
+
+static SIM_RC
+nds32_option_handler (SIM_DESC sd, sim_cpu *cpu, int opt, char *arg,
+		      int is_command)
+{
+  switch (opt)
+    {
+    case OPTION_GPROF:
+      if (arg == NULL || strcmp (arg, "on") == 0)
+	sd->gprof = 1;
+      break;
+
+    default:
+      return SIM_RC_FAIL;
+    }
+  return SIM_RC_OK;
+}
+
+static const OPTION nds32_options[] = {
+  {{"gprof", optional_argument, NULL, OPTION_GPROF},
+    '\0', "on|off", "Enable gprof", nds32_option_handler},
+  {{NULL, no_argument, NULL, 0}, '\0', NULL, NULL, NULL}
+};
+
+SIM_DESC
+sim_open (SIM_OPEN_KIND kind, host_callback * callback,
+	  struct bfd *abfd, char **argv)
+{
+  int i;
+  SIM_DESC sd = sim_state_alloc (kind, callback);
+  struct nds32_mm *mm = STATE_MM (sd);
+
+  /* The cpu data is kept in a separately allocated chunk of memory.  */
+  if (sim_cpu_alloc_all (sd, 1, 0) != SIM_RC_OK)
+    {
+      nds32_free_state (sd);
+      return 0;
+    }
+
+  if (sim_pre_argv_init (sd, argv[0]) != SIM_RC_OK)
+    {
+      nds32_free_state (sd);
+      return 0;
+    }
+
+  sim_add_option_table (sd, NULL, nds32_options);
+
+  /* Handle target sim arguments. */
+  if (sim_parse_args (sd, argv) != SIM_RC_OK)
+    {
+      nds32_free_state (sd);
+      return 0;
+    }
+
+  /* Check for/establish the a reference program image.  */
+  if (sim_analyze_program (sd,
+			   (STATE_PROG_ARGV (sd) != NULL
+			    ? *STATE_PROG_ARGV (sd)
+			    : NULL), abfd) != SIM_RC_OK)
+    {
+      nds32_free_state (sd);
+      return 0;
+    }
+
+#if 0
+  /* COLE: Not sure whether this is necessary. */
+
+  /* Establish any remaining configuration options.  */
+  if (sim_config (sd) != SIM_RC_OK)
+    {
+      nds32_free_state (sd);
+      return 0;
+    }
+#endif
+
+  if (sim_post_argv_init (sd) != SIM_RC_OK)
+    {
+      nds32_free_state (sd);
+      return 0;
+    }
+
+  /* CPU specific initialization.  */
+  for (i = 0; i < MAX_NR_PROCESSORS; ++i)
+    {
+      sim_cpu *cpu = STATE_CPU (sd, i);
+      nds32_initialize_cpu (sd, cpu, abfd);
+    }
+
+  /* Always initial memory-management struct;
+     otherwise, we cannot know whether VMA are used or not.  */
+  nds32_mm_init (mm);
+  sd->mem_attached = FALSE;
+
+  callback->syscall_map = cb_nds32_libgloss_syscall_map;
+
+  return sd;
+}
+
+void
+sim_close (SIM_DESC sd, int quitting)
+{
+  struct nds32_mm *mm = STATE_MM (sd);
+  nds32_freeall_vma (mm);
+
+  if (sd->gprof)
+    nds32_gmon_cleanup (STATE_PROG_BFD (sd));
+
+#if 0 && defined (USE_TLB)
+  /* Dump VMA usage for debugging.  */
+  char *SIM_DEBUG = getenv ("SIM_DEBUG");
+  if (!SIM_DEBUG || atoi (SIM_DEBUG) == 0)
+    return;
+
+  uint64_t t = mm->cache_ihit + mm->cache_dhit + mm->cache_miss;
+  nds32_dump_vma (mm);
+
+  printf ("i-hit rate: %f (%llu/%llu)\n",
+	  (double) mm->cache_ihit / t * 100, mm->cache_ihit, t);
+  printf ("d-hit rate: %f (%llu/%llu)\n",
+	  (double) mm->cache_dhit / t * 100, mm->cache_dhit, t);
+#endif
+}
+
+static int
+sim_dis_read (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
+	      struct disassemble_info *dinfo)
+{
+  SIM_DESC sd = (SIM_DESC) dinfo->application_data;
+
+  return sim_read (sd, memaddr, (unsigned char *) myaddr, length)
+	 != length;
+}
+
+SIM_RC
+sim_create_inferior (SIM_DESC sd, struct bfd *prog_bfd, char **argv,
+		     char **env)
+{
+  SIM_CPU *cpu = STATE_CPU (sd, 0);
+
+  /* Set the initial register set.  */
+  if (prog_bfd == NULL)
+    return SIM_RC_OK;
+
+  if (sd->gprof)
+    nds32_gmon_start (prog_bfd);
+
+  memset (&dis_info, 0, sizeof (dis_info));
+  /* See opcode/dis-init.c and dis-asm.h for details.  */
+  INIT_DISASSEMBLE_INFO (dis_info, stderr, fprintf);
+  dis_info.application_data = (void *) sd;
+  dis_info.read_memory_func = sim_dis_read;
+  dis_info.arch = bfd_get_arch (prog_bfd);
+  dis_info.mach = bfd_get_mach (prog_bfd);
+  disassemble_init_for_target (&dis_info);
+
+  /* Set PC to entry point address.  */
+  (*CPU_PC_STORE (cpu)) (cpu, bfd_get_start_address (prog_bfd));
+
+  /* Set default endian.  */
+  if (bfd_big_endian (prog_bfd))
+    CCPU_SR_SET (PSW, PSW_BE);
+  else
+    CCPU_SR_CLEAR (PSW, PSW_BE);
+
+  if (STATE_ENVIRONMENT (sd) == USER_ENVIRONMENT)
+    nds32_init_linux (sd, prog_bfd, argv, env);
+  else
+    nds32_init_libgloss (sd, prog_bfd, argv, env);
+
+  return SIM_RC_OK;
+}
+
+void
+sim_set_callbacks (host_callback * ptr)
+{
+  /* callback = ptr; */
+}
diff --git a/sim/nds32/mingw32-hdep.c b/sim/nds32/mingw32-hdep.c
new file mode 100644
index 0000000..c582507
--- /dev/null
+++ b/sim/nds32/mingw32-hdep.c
@@ -0,0 +1,56 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+
+#define UNIMP(SYSCALL)	\
+	void SYSCALL () { puts ("Unimplemented syscall " #SYSCALL); abort (); }
+
+/* These system calls are only used by Linux program.
+   It shouldn't bother ELF programs.
+   I define these symbol just to make mingw32-build happy.
+   It will take time to implement these, especially mmap,
+   and unfortunately mmap is the most important syscall for
+   Linux dynamically linked program.
+
+   On Windows, non-anonymouse mapping should be done with
+     CreateFileMapping () and MapViewOfFile ().
+   Anonymous mapping should be done with VirtualAlloc () or just malloc ().
+
+   The allocation granularity seems quiet large, 64 KB,
+   returned by GetSystemInfo ().
+   See http://msdn.microsoft.com/en-us/library/windows/desktop/aa366761%28v=vs.85%29.aspx
+  */
+
+UNIMP(getegid);
+UNIMP(uname);
+UNIMP(link);
+UNIMP(getgid);
+UNIMP(getuid);
+UNIMP(times);
+UNIMP(geteuid);
+UNIMP(setgid);
+UNIMP(setuid);
+UNIMP(ioctl);
+UNIMP(fcntl);
+UNIMP(munmap);
+UNIMP(mmap);
+UNIMP(getrlimit);
+UNIMP(setrlimit);
diff --git a/sim/nds32/mingw32-hdep.h b/sim/nds32/mingw32-hdep.h
new file mode 100644
index 0000000..b54a079
--- /dev/null
+++ b/sim/nds32/mingw32-hdep.h
@@ -0,0 +1,105 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef MINGW32_HDEP_H
+#define MINGW32_HDEP_H
+
+#include <stdint.h>
+
+/*
+ * sys/mman.h
+ */
+
+/* Return value of `mmap' in case of an error.  */
+#define MAP_FAILED	((void *) -1)
+
+#define PROT_READ	0x1		/* Page can be read.  */
+#define PROT_WRITE	0x2		/* Page can be written.  */
+#define PROT_EXEC	0x4		/* Page can be executed.  */
+#define PROT_NONE	0x0		/* Page can not be accessed.  */
+#define PROT_GROWSDOWN	0x01000000	/* Extend change to start of
+					   growsdown vma (mprotect only).  */
+#define PROT_GROWSUP	0x02000000	/* Extend change to start of
+					   growsup vma (mprotect only).  */
+
+/* Sharing types (must choose one and only one of these).  */
+#define MAP_SHARED	0x01		/* Share changes.  */
+#define MAP_PRIVATE	0x02		/* Changes are private.  */
+
+/* Other flags.  */
+#define MAP_FIXED	0x10		/* Interpret addr exactly.  */
+#define MAP_ANONYMOUS	0x20		/* Don't use a file.  */
+#define MAP_ANON	MAP_ANONYMOUS
+
+/* These are Linux-specific.  */
+#define MAP_STACK	0x20000		/* Allocation is for a stack.  */
+
+/*
+ * Syscalls
+ */
+
+typedef long long loff_t;	/* llseek () */
+
+struct timeval {
+  uint32_t tv_sec;
+  uint32_t tv_usec;
+};
+
+struct timezone {
+  uint32_t tz_minuteswest;
+  uint32_t tz_dsttime;
+};
+
+/* All times reported are in clock ticks.  */
+struct tms {
+  uint32_t tms_utime;
+  uint32_t tms_stime;
+  uint32_t tms_cutime;
+  uint32_t tms_cstime;
+};
+
+struct utsname {
+  char sysname[65];	/* Operating system name (e.g., "Linux") */
+  char nodename[65];	/* Name within "some implementation-defined
+			  network" */
+  char release[65];	/* OS release (e.g., "2.6.28") */
+  char version[65];	/* OS version */
+  char machine[65];	/* Hardware identifier */
+#if 0 && defined (_GNU_SOURCE)
+  char domainname[65];	/* NIS or YP domain name */
+#endif
+};
+
+/*
+ * rlimit
+ */
+typedef uint32_t rlim_t;
+struct rlimit {
+  rlim_t rlim_cur;  /* Soft limit */
+  rlim_t rlim_max;  /* Hard limit (ceiling for rlim_cur) */
+};
+
+#define RLIMIT_DATA		2	/* max data size */
+#define RLIMIT_STACK		3	/* max stack size */
+
+int getrlimit(int resource, struct rlimit *rlim);
+
+
+#endif
diff --git a/sim/nds32/nds32-cop0.c b/sim/nds32/nds32-cop0.c
new file mode 100644
index 0000000..0eb0954
--- /dev/null
+++ b/sim/nds32/nds32-cop0.c
@@ -0,0 +1,550 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#include "bfd.h"
+#include "gdb/callback.h"
+#include "gdb/signals.h"
+#include "libiberty.h"
+#include "gdb/remote-sim.h"
+#include "dis-asm.h"
+#include "sim-main.h"
+#include "nds32-sim.h"
+#include "sim-utils.h"
+#include "sim-fpu.h"
+
+#include "opcode/nds32.h"
+#include "nds32-sim.h"
+
+static inline uint64_t
+nds32_fd_to_64 (sim_cpu *cpu, int fd)
+{
+  fd <<= 1;
+  return ((uint64_t) CCPU_FPR[fd].u << 32) | (uint64_t) CCPU_FPR[fd + 1].u;
+}
+
+static inline void
+nds32_fd_from_64 (sim_cpu *cpu, int fd, uint64_t u64)
+{
+  fd <<= 1;
+  CCPU_FPR[fd + 1].u = u64 & 0xFFFFFFFF;
+  CCPU_FPR[fd].u = (u64 >> 32) & 0xFFFFFFFF;
+}
+
+sim_cia
+nds32_decode32_lwc (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  const int cop = __GF (insn, 13, 2);
+  const int fst = N32_RT5 (insn);
+  const int ra = N32_RA5 (insn);
+  const int imm12s = N32_IMM12S (insn);
+
+  SIM_ASSERT (cop == 0);
+
+  if (insn & (1 << 12))
+    {
+      CCPU_FPR[fst].u = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, 4);
+      CCPU_GPR[ra].u += (imm12s << 2);
+    }
+  else
+    {
+      CCPU_FPR[fst].u = nds32_ld_aligned (cpu, CCPU_GPR[ra].u +
(imm12s << 2), 4);
+    }
+
+  return cia + 4;
+}
+
+sim_cia
+nds32_decode32_swc (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  const int cop = __GF (insn, 13, 2);
+  const int fst = N32_RT5 (insn);
+  const int ra = N32_RA5 (insn);
+  const int imm12s = N32_IMM12S (insn);
+
+  SIM_ASSERT (cop == 0);
+
+  if (insn & (1 << 12))		/* fssi.bi */
+    {
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u, 4, CCPU_FPR[fst].u);
+      CCPU_GPR[ra].u += (imm12s << 2);
+    }
+  else				/* fssi */
+    {
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u + (imm12s << 2), 4,
CCPU_FPR[fst].u);
+    }
+
+  return cia + 4;
+}
+
+sim_cia
+nds32_decode32_ldc (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  const int cop = __GF (insn, 13, 2);
+  const int fdt = N32_RT5 (insn);
+  const int ra = N32_RA5 (insn);
+  const int imm12s = N32_IMM12S (insn);
+  uint64_t u64;
+
+  SIM_ASSERT (cop == 0);
+
+  if (insn & (1 << 12))		/* fldi.bi */
+    {
+      u64 = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, 8);
+      CCPU_GPR[ra].u += (imm12s << 2);
+    }
+  else				/* fldi */
+    {
+      u64 = nds32_ld_aligned (cpu, CCPU_GPR[ra].u + (imm12s << 2), 8);
+    }
+
+  nds32_fd_from_64 (cpu, fdt, u64);
+
+  return cia + 4;
+}
+
+sim_cia
+nds32_decode32_sdc (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  const int cop = __GF (insn, 13, 2);
+  const int fdt = N32_RT5 (insn);
+  const int ra = N32_RA5 (insn);
+  const int imm12s = N32_IMM12S (insn);
+  uint64_t u64;
+
+  SIM_ASSERT (cop == 0);
+
+  u64 = nds32_fd_to_64 (cpu, fdt);
+
+  if (insn & (1 << 12))
+    {
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u, 8, u64);
+      CCPU_GPR[ra].u += (imm12s << 2);
+    }
+  else
+    {
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u + (imm12s << 2), 8, u64);
+    }
+
+  return cia + 4;
+}
+
+/* Returns 0 for false
+	   1 for equal
+	   2 for less
+	   3 for qnan
+	   4 for snan */
+
+static int
+nds32_decode32_fcmp (sim_fpu *sfa, sim_fpu *sfb)
+{
+  int op0is = sim_fpu_is (sfa);
+  int op1is = sim_fpu_is (sfb);
+  int fcmp; /* lazy init. sim_fpu_cmp (&sfa, &sfb); */
+  int r;
+  static int s2i[12] = {
+    [SIM_FPU_IS_NINF] = 0,
+    [SIM_FPU_IS_PINF] = 7,
+    [SIM_FPU_IS_NNUMBER] = 1,
+    [SIM_FPU_IS_PNUMBER] = 6,
+    [SIM_FPU_IS_NDENORM] = 2,
+    [SIM_FPU_IS_PDENORM] = 5,
+    [SIM_FPU_IS_NZERO] = 3,
+    [SIM_FPU_IS_PZERO] = 4,
+    [SIM_FPU_IS_QNAN] = 8,
+    [SIM_FPU_IS_SNAN] = 9,
+  };
+  /* -i -n -dn -0 +0 +dn +n +i qn sn*/
+  static char ctab[100] = {
+    1, 0, 0, 0, 0, 0, 0, 0, 3, 4,
+    2, 9, 0, 0, 0, 0, 0, 0, 3, 4,
+    2, 2, 9, 0, 0, 0, 0, 0, 3, 4,
+    2, 2, 2, 1, 1, 0, 0, 0, 3, 4,
+    2, 2, 2, 1, 1, 0, 0, 0, 3, 4,
+    2, 2, 2, 2, 2, 9, 0, 0, 3, 4,
+    2, 2, 2, 2, 2, 2, 9, 0, 3, 4,
+    2, 2, 2, 2, 2, 2, 2, 1, 3, 4,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  };
+
+  /*
+    0 - GT
+    1 - EQ
+    2 - LT
+    3 - UN (quiet)
+    4 - UN (signaling)
+    9 - Calc
+   */
+
+  r = ctab [s2i[op0is] + s2i[op1is] * 10];
+  if (r != 9)
+    return r;
+
+  fcmp = sim_fpu_cmp (sfa, sfb);
+
+  if (LSBIT32 (fcmp)
+      & (LSBIT32 (SIM_FPU_IS_NZERO) | LSBIT32 (SIM_FPU_IS_PZERO)))
+    return 1;
+  else if (LSBIT32 (fcmp)
+	   & (LSBIT32 (SIM_FPU_IS_NINF) | LSBIT32 (SIM_FPU_IS_NNUMBER)
+	      | LSBIT32 (SIM_FPU_IS_NDENORM) | LSBIT32 (SIM_FPU_IS_NZERO)))
+    return 2;
+  return 0;
+}
+
+sim_cia
+nds32_decode32_cop (sim_cpu *cpu, const uint32_t insn, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  const int cop = __GF (insn, 4, 2);
+  const int sv = __GF (insn, 8, 2);
+  const int fst = N32_RT5 (insn);
+  const int fsa = N32_RA5 (insn);
+  const int fsb = N32_RB5 (insn);
+  const int rt = N32_RT5 (insn);
+  const int ra = N32_RA5 (insn);
+  const int rb = N32_RB5 (insn);
+  const int fdt_ = N32_RT5 (insn) << 1;	/* I use fdX_ as shifted fdX. */
+  const int fda_ = N32_RA5 (insn) << 1;
+  const int fdb_ = N32_RB5 (insn) << 1;
+  int fcmp = SIM_FPU_IS_SNAN;
+  uint64_t u64;
+  uint32_t u32;
+  uint32_t i32;
+  sim_fpu sft, sft2;
+  sim_fpu sfa;
+  sim_fpu sfb;
+
+  SIM_ASSERT (cop == 0);
+
+  /* Prepare operand for F[SD][12]. */
+  if ((insn & 0xb) == 0)
+    {
+      /* FS1,  FS2 */
+      sim_fpu_32to (&sfa, CCPU_FPR[fsa].u);
+      sim_fpu_32to (&sfb, CCPU_FPR[fsb].u);
+
+      /* MAC instructions use value in fst.  */
+      switch (__GF (insn, 6, 4))
+	{
+	case 4: case 5: case 8: case 9:
+	  sim_fpu_32to (&sft, CCPU_FPR[fst].u);
+	  break;
+	}
+    }
+  else if ((insn & 0xb) == 8)
+    {
+      /* FD1, FD2 */
+      u64 = nds32_fd_to_64 (cpu, fda_ >> 1);
+      sim_fpu_64to (&sfa, u64);
+      u64 = nds32_fd_to_64 (cpu, fdb_ >> 1);
+      sim_fpu_64to (&sfb, u64);
+
+      /* MAC instructions use value in fst.  */
+      switch (__GF (insn, 6, 4))
+	{
+	case 4: case 5: case 8: case 9:
+	  u64 = nds32_fd_to_64 (cpu, fdt_ >> 1);
+	  sim_fpu_64to (&sft, u64);
+	  break;
+	}
+    }
+
+  if ((insn & 0x7) == 0)	/* FS1 or FD1 */
+    {
+      int dp = (insn & 0x8) > 0;
+      int sft_to_dp = dp;
+
+      /* To simplify the operations,
+	 all the single-precision operations are
+	 promoted and double-precision.
+
+	 sft_to_dp determines whether the final destination
+	 is single or double.
+	 dp determines whether the source operands are
+	 single or double.  */
+
+      switch (__GF (insn, 6, 4))
+	{
+	case 0x0:		/* fadds */
+	  sim_fpu_add (&sft, &sfa, &sfb);
+	  break;
+	case 0x1:		/* fsubs */
+	  sim_fpu_sub (&sft, &sfa, &sfb);
+	  break;
+	case 0x2:		/* fcpynsd */
+	  if (!dp)
+	    {
+	      /* fcpynss */
+	      u32 = CCPU_FPR[fsa].u & 0x7fffffff;
+	      u32 |= (CCPU_FPR[fsb].u & 0x80000000) ^ 0x80000000;
+	      CCPU_FPR[fst].u = u32;
+	    }
+	  else
+	    {
+	      /* fcpynsd */
+	      u32 = CCPU_FPR[fda_].u & 0x7fffffff;
+	      u32 |= (CCPU_FPR[fdb_].u & 0x80000000) ^ 0x80000000;
+	      CCPU_FPR[fdt_].u = u32;
+	      CCPU_FPR[fdt_ + 1].u = CCPU_FPR[fda_ + 1].u;
+	    }
+	  goto done;
+	case 0x3:
+	  if (!dp)
+	    {
+	      /* fcpyss */
+	      u32 = CCPU_FPR[fsa].u & 0x7fffffff;
+	      u32 |= CCPU_FPR[fsb].u & 0x80000000;
+	      CCPU_FPR[fst].u = u32;
+	    }
+	  else
+	    {
+	      /* fcpysd */
+	      u32 = CCPU_FPR[fda_].u & 0x7fffffff;
+	      u32 |= CCPU_FPR[fdb_].u & 0x80000000;
+	      CCPU_FPR[fdt_].u = u32;
+	      CCPU_FPR[fdt_ + 1].u = CCPU_FPR[fda_ + 1].u;
+	    }
+	  goto done; /* Just return.  */
+	case 0x4:		/* fmaddd */
+	  sim_fpu_mul (&sft2, &sfa, &sfb);
+	  sim_fpu_add (&sft, &sft, &sft2);
+	  break;
+	case 0x5:		/* fmsubd */
+	  sim_fpu_mul (&sft2, &sfa, &sfb);
+	  sim_fpu_sub (&sft, &sft, &sft2);
+	  break;
+	case 0x6:		/* fcmovnX */
+	case 0x7:		/* fcmovzX */
+	  if (!dp)
+	    {
+	      /* fcmovzs */
+	      if ((CCPU_FPR[fsb].u != 0) ^ ((insn & 1 << 6) != 0))
+		CCPU_FPR[fst] = CCPU_FPR[fsa];
+	    }
+	  else
+	    {
+	      /* fcmovzd */
+	      if ((CCPU_FPR[fsb].u != 0) ^ ((insn & 1 << 6) != 0))
+		{
+		  CCPU_FPR[fdt_] = CCPU_FPR[fda_];
+		  CCPU_FPR[fdt_ + 1] = CCPU_FPR[fda_ + 1];
+		}
+	    }
+	  goto done;
+	case 0x8:		/* fnmaddd */
+	  sim_fpu_mul (&sft2, &sfa, &sfb);
+	  sim_fpu_add (&sft, &sft, &sft2);
+	  sim_fpu_neg (&sft, &sft);
+	  break;
+	case 0x9:		/* fnmsubd */
+	  sim_fpu_mul (&sft2, &sfa, &sfb);
+	  sim_fpu_sub (&sft, &sft, &sft2);
+	  sim_fpu_neg (&sft, &sft);
+	  break;
+	case 0xc:		/* fmuls */
+	  sim_fpu_mul (&sft, &sfa, &sfb);
+	  break;
+	case 0xd:		/* fdivs */
+	  sim_fpu_div (&sft, &sfa, &sfb);
+	  break;
+#if 0
+	case 0xa:
+	case 0xb:		/* reserved */
+#endif
+	case 0xf:		/* F2OP */
+	  switch (__GF (insn, 10, 5))
+	    {
+	    case 0x0:		/* fs2d, fd2s */
+	      sft = sfa;
+	      sft_to_dp = !dp;
+	      break;
+	    case 0x1:		/* sqrts, sqrtd */
+	      sim_fpu_sqrt (&sft, &sfa);
+	      break;
+	    case 0x5:
+	      if (!dp)
+		{
+		  /* fabss */
+		  CCPU_FPR[fst].u = CCPU_FPR[fsa].u & 0x7fffffff;
+		}
+	      else
+		{
+		  /* fabsd */
+		  CCPU_FPR[fdt_].u = CCPU_FPR[fda_].u & 0x7fffffff;
+		  CCPU_FPR[fdt_ + 1].u = CCPU_FPR[fda_ + 1].u;
+		}
+	      goto done; /* Just return.  */
+	    case 0xc:		/* fsi2s, fsi2d */
+	      sim_fpu_i32to (&sft, CCPU_FPR[fsa].u, sim_fpu_round_near);
+	      break;
+	    case 0x10:		/* fs2ui, fd2ui */
+	    case 0x14:		/* fs2ui.z, fd2ui.z */
+	      sim_fpu_to32u (&u32, &sfa, (insn & (1 << 12))
+					 ? sim_fpu_round_zero
+					 : sim_fpu_round_near);
+	      CCPU_FPR[fst].u = u32;
+	      goto done;	/* just return */
+	    case 0x18:		/* fs2si, fd2si */
+	    case 0x1c:		/* fs2si.z, fd2si.z */
+	      sim_fpu_to32i (&i32, &sfa, (insn & (1 << 12))
+					 ? sim_fpu_round_zero
+					 : sim_fpu_round_near);
+	      CCPU_FPR[fst].s = i32;
+	      goto done; /* Just return.  */
+	    default:
+	      goto bad_op;
+	    }
+	  break;
+	default:
+	  goto bad_op;
+	}
+
+      if (!sft_to_dp)
+	{
+	  /* General epilogue for saving result to fst.  */
+	  sim_fpu_to32 ((unsigned32 *) (CCPU_FPR + fst), &sft);
+	}
+      else
+	{
+	  /* General epilogue for saving result to fdt.  */
+	  sim_fpu_to64 (&u64, &sft);
+	  nds32_fd_from_64 (cpu, fdt_ >> 1, u64);
+	}
+      goto done;
+    }
+
+  /* fcmpxxd and fcmpxxs share this function. */
+  if ((insn & 0x7) == 4)
+    {
+      fcmp = nds32_decode32_fcmp (&sfa, &sfb);
+      switch (__GF (insn, 7, 3))
+	{
+	case 0x0:		/* fcmpeq[sd] */
+	  CCPU_FPR[fst].u = fcmp == 1;
+	  goto done;
+	case 0x1:		/* fcmplt[sd] */
+	  CCPU_FPR[fst].u = fcmp == 2;
+	  goto done;
+	case 0x2:		/* fcmple[sd] */
+	  CCPU_FPR[fst].u = fcmp == 1 || fcmp == 2;
+	  goto done;
+	case 0x3:
+	  CCPU_FPR[fst].u = fcmp == 3 || fcmp == 4;
+	  goto done;
+	}
+      goto done;
+    }
+
+  switch (insn & 0x3ff)
+    {
+    case 0x1:			/* fmfsr */
+      CCPU_GPR[rt].u = CCPU_FPR[fsa].u;
+      goto done;
+    case 0x9:			/* fmtsr */
+      CCPU_FPR[fsa].u = CCPU_GPR[rt].u;
+      goto done;
+    case 0x41:			/* fmfdr */
+      {
+	int rt_ = rt & ~1;
+	if (CCPU_SR_TEST (PSW, PSW_BE))
+	  {
+	    CCPU_GPR[rt_] = CCPU_FPR[fda_];
+	    CCPU_GPR[rt_ + 1] = CCPU_FPR[fda_ + 1];
+	  }
+	else
+	  {
+	    CCPU_GPR[rt_] = CCPU_FPR[fda_ + 1];
+	    CCPU_GPR[rt_ + 1] = CCPU_FPR[fda_];
+	  }
+      }
+      goto done;
+    case 0x49:			/* fmtdr */
+      {
+	int rt_ = rt & ~1;
+	if (CCPU_SR_TEST (PSW, PSW_BE))
+	  {
+	    CCPU_FPR[fda_ + 1] = CCPU_GPR[rt_ + 1];
+	    CCPU_FPR[fda_] = CCPU_GPR[rt_];
+	  }
+	else
+	  {
+	    CCPU_FPR[fda_ + 1] = CCPU_GPR[rt_];
+	    CCPU_FPR[fda_] = CCPU_GPR[rt_ + 1];
+	  }
+      }
+      goto done;
+    }
+
+  switch (insn & 0xFF)
+    {
+    case 0x2:			/* fls */
+      u32 = nds32_ld_aligned (cpu, CCPU_GPR[ra].u + (CCPU_GPR[rb].s << sv), 4);
+      CCPU_FPR[fst].u = u32;
+      goto done;
+    case 0x3:			/* fld */
+      u64 = nds32_ld_aligned (cpu, CCPU_GPR[ra].u + (CCPU_GPR[rb].s << sv), 8);
+      nds32_fd_from_64 (cpu, fdt_ >> 1, u64);
+      goto done;
+    case 0xa:			/* fss */
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u + (CCPU_GPR[rb].s << sv),
4, CCPU_FPR[fst].u);
+      goto done;
+    case 0xb:			/* fsd */
+      u64 = nds32_fd_to_64 (cpu, fdt_ >> 1);
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u + (CCPU_GPR[rb].s << sv), 8, u64);
+      goto done;
+    case 0x82:			/* fls.bi */
+      u32 = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, 4);
+      CCPU_GPR[ra].u += (CCPU_GPR[rb].s << sv);
+      CCPU_FPR[fst].u = u32;
+      goto done;
+    case 0x83:			/* fld.bi */
+      u64 = nds32_ld_aligned (cpu, CCPU_GPR[ra].u, 8);
+      CCPU_GPR[ra].u += (CCPU_GPR[rb].s << sv);
+      nds32_fd_from_64 (cpu, fdt_ >> 1, u64);
+      goto done;
+    case 0x8a:			/* fss.bi */
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u, 4, CCPU_FPR[fst].u);
+      CCPU_GPR[ra].u += (CCPU_GPR[rb].s << sv);
+      goto done;
+    case 0x8b:			/* fsd.bi */
+      u64 = nds32_fd_to_64 (cpu, fdt_ >> 1);
+      nds32_st_aligned (cpu, CCPU_GPR[ra].u, 8, u64);
+      CCPU_GPR[ra].u += (CCPU_GPR[rb].s << sv);
+      goto done;
+    }
+
+
+done:
+  return cia + 4;
+
+bad_op:
+  nds32_bad_op (cpu, cia, insn, "COP");
+  return cia;
+}
diff --git a/sim/nds32/nds32-gmon.c b/sim/nds32/nds32-gmon.c
new file mode 100644
index 0000000..234b63a
--- /dev/null
+++ b/sim/nds32/nds32-gmon.c
@@ -0,0 +1,236 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "bfd.h"
+#include "elf-bfd.h"
+
+#include "nds32-gmon.h"
+#include "rbtree.h"
+
+#define HIST_GRANULARITY_SHIFT	2
+#define HIST_GRANULARITY	(HIST_GRANULARITY_SHIFT << 1)
+#define CG_GRANULARITY_SHIFT	2
+#define CG_GRANULARITY		(CG_GRANULARITY_SHIFT << 1)
+#define CYCLE_GRANULARITY	1
+
+/* Data structure for recording call-graph.  */
+struct cg_node
+{
+  /* PC of caller.  */
+  uint32_t from_pc;
+  /* PC of callee.  */
+  uint32_t self_pc;
+  /* The number of times the function was called.  */
+  uint32_t count;
+};
+
+/* Used for pass handlers for writing call-graph data.  */
+
+struct cg_handlers
+{
+  bfd *abfd;
+  FILE *fp;
+};
+
+static uint16_t *hist;
+/* Range of text.  */
+static uint32_t low_pc, high_pc;
+rbtree_t cg_tree;
+
+/* Find the upper/lower bound of the text.
+   FROMS and HIST are allocated based on this range.  */
+
+static void
+find_text_range (bfd *abfd, asection *sect, void *obj)
+{
+  bfd_vma vma, size;
+
+  if ((bfd_get_section_flags (abfd, sect) & SEC_CODE) == 0)
+    return;
+
+  vma = bfd_section_vma (abfd, sect);
+  size = bfd_section_size (abfd, sect);
+
+  if (vma < low_pc)
+    low_pc = vma;
+  if (vma + size > high_pc)
+    high_pc = vma + size;
+}
+
+/* Write histogram to file.  */
+
+static void
+write_hist (FILE *fp, bfd *abfd, uint16_t *hist)
+{
+  struct nds32_gmon_hist_hdr hdr;
+  int i;
+  int tag;
+
+  tag = NDS32_GMON_TAG_TIME_HIST;
+  fwrite (&tag, 1, 1, fp);
+
+  bfd_put_32 (abfd, low_pc, hdr.low_pc);
+  bfd_put_32 (abfd, high_pc, hdr.high_pc);
+  bfd_put_32 (abfd, (high_pc - low_pc) >> HIST_GRANULARITY_SHIFT,
hdr.hist_size);
+  bfd_put_32 (abfd, CYCLE_GRANULARITY, hdr.prof_rate);
+  strcpy (hdr.dimen, "cycle");
+  hdr.dimen_abbrev = 'c';
+  fwrite (&hdr, sizeof (hdr), 1, fp);
+
+  for (i = 0; i < (high_pc - low_pc) >> HIST_GRANULARITY_SHIFT; i++)
+    {
+      uint16_t h;
+
+      bfd_put_16 (abfd, hist[i], &h);
+      fwrite (&h, 2, 1, fp);
+    }
+}
+
+/* Write call-graph data to file.  */
+
+static void
+cg_free_node (rbtree_t tree, rbnode_t node, void *dontcare)
+{
+  free (node->key);
+}
+
+static int
+cg_cmp (void *lhs, void *rhs)
+{
+  struct cg_node *lcg = (struct cg_node *) lhs;
+  struct cg_node *rcg = (struct cg_node *) rhs;
+
+  if (lcg->from_pc == rcg->from_pc)
+    {
+      if (lcg->self_pc == rcg->self_pc)
+	return 0;
+      return lcg->self_pc < rcg->self_pc ? -1 : 1;
+    }
+  return lcg->from_pc < rcg->from_pc ? -1 : 1;
+}
+
+/* Initialization called when the program started.  */
+
+void
+nds32_gmon_start (struct bfd *abfd)
+{
+  low_pc = 0xffffffff;
+  high_pc = 0;
+  bfd_map_over_sections (abfd, find_text_range, NULL);
+
+  if (cg_tree)
+    {
+      rbtree_traverse_node (cg_tree, cg_tree->root, cg_free_node, NULL);
+      rbtree_destroy_tree (cg_tree);
+    }
+  cg_tree = rbtree_create_tree (cg_cmp, NULL);
+
+  free (hist);
+  hist = (uint16_t *)
+    calloc ((high_pc - low_pc) >> HIST_GRANULARITY_SHIFT,
+	    sizeof (uint16_t));
+}
+
+static void
+write_cg_trav (rbtree_t tree, rbnode_t n, void *arg)
+{
+  struct cg_node *cgn = (struct cg_node *) n->key;
+  char buf[8];
+  struct cg_handlers *hp = (struct cg_handlers *) arg;
+
+  bfd_put_32 (hp->abfd, NDS32_GMON_TAG_CG_ARC, buf);
+  fwrite (buf, 1, 1, hp->fp);
+  bfd_put_32 (hp->abfd, cgn->from_pc, buf);
+  fwrite (buf, 4, 1, hp->fp);
+  bfd_put_32 (hp->abfd, cgn->self_pc, buf);
+  fwrite (buf, 4, 1, hp->fp);
+  bfd_put_32 (hp->abfd, cgn->count, buf);
+  fwrite (buf, 4, 1, hp->fp);
+}
+
+/* Clean-up and write out collected data.  */
+
+void
+nds32_gmon_cleanup (bfd *abfd)
+{
+  struct nds32_gmon_hdr hdr;
+  struct cg_handlers h;
+
+  h.abfd = abfd;
+  h.fp = fopen ("gmon.out", "w");
+
+  memset (&hdr, 0, sizeof (hdr));
+  memcpy (hdr.cookie, "gmon", 4);
+  bfd_put_32 (abfd, 1, hdr.version);
+
+  fwrite (&hdr, sizeof (hdr), 1, h.fp);
+
+  write_hist (h.fp, abfd, hist);
+
+  rbtree_traverse_node (cg_tree, cg_tree->root, write_cg_trav, &h);
+
+  fclose (h.fp);
+}
+
+/* Simulate mcount function.
+   They should be called by JAL/JRAL instructions.  */
+
+void
+nds32_gmon_mcount (uint32_t from_pc, uint32_t self_pc)
+{
+  struct cg_node n;
+  struct cg_node *new_cg;
+  rbnode_t p;
+
+  n.from_pc = from_pc;
+  n.self_pc = self_pc;
+  p = rbtree_find (cg_tree, &n);
+
+  if (p)
+    {
+      ((struct cg_node *) p->key)->count++;
+      return;
+    }
+
+  new_cg = (struct cg_node *) calloc (1, sizeof (struct cg_node));
+  new_cg->from_pc = from_pc;
+  new_cg->self_pc = self_pc;
+  new_cg->count = 1;
+  rbtree_insert (cg_tree, new_cg);
+}
+
+/* Simulate time-sampling.
+   Currently the are called for every instruction.  */
+
+void
+nds32_gmon_sample (uint32_t pc)
+{
+  if (pc < low_pc || pc >= high_pc)
+    return;
+
+  hist[(pc - low_pc) >> CG_GRANULARITY_SHIFT]++;
+}
diff --git a/sim/nds32/nds32-gmon.h b/sim/nds32/nds32-gmon.h
new file mode 100644
index 0000000..352f987
--- /dev/null
+++ b/sim/nds32/nds32-gmon.h
@@ -0,0 +1,56 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef NDS32_GMON_H
+#define NDS32_GMON_H
+
+void nds32_gmon_start (struct bfd *abfd);
+void nds32_gmon_cleanup (struct bfd *abfd);
+void nds32_gmon_mcount (uint32_t from_pc, uint32_t self_pc);
+void nds32_gmon_sample (uint32_t pc);
+
+/* sys/gmon_out.h is missing on cygwin,
+   so I copied some declaration here
+   in order to avoid portability issues.  */
+enum
+{
+  NDS32_GMON_TAG_TIME_HIST = 0,
+  NDS32_GMON_TAG_CG_ARC = 1,
+  NDS32_GMON_TAG_BB_COUNT = 2
+};
+
+struct nds32_gmon_hdr
+{
+  char cookie[4];
+  char version[4];
+  char spare[3 * 4];
+};
+
+struct nds32_gmon_hist_hdr
+{
+  char low_pc[sizeof (char *)];		/* Base pc address of sample buffer.  */
+  char high_pc[sizeof (char *)];	/* Max pc address of sampled buffer.  */
+  char hist_size[4];			/* Size of sample buffer.  */
+  char prof_rate[4];			/* Profiling clock rate.  */
+  char dimen[15];			/* Phys. dim., usually "seconds".  */
+  char dimen_abbrev;			/* Usually 's' for "seconds".  */
+};
+
+#endif
diff --git a/sim/nds32/nds32-load.c b/sim/nds32/nds32-load.c
new file mode 100644
index 0000000..7ca84b0
--- /dev/null
+++ b/sim/nds32/nds32-load.c
@@ -0,0 +1,486 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+
+#include <stdlib.h>
+#if defined (__linux__) || defined (__CYGWIN__)
+#include <sys/mman.h>
+#include <sys/resource.h>
+#elif defined (__WIN32__)
+#include "mingw32-hdep.h"
+#endif
+
+#include "bfd.h"
+#include "elf-bfd.h"
+#include "sim-main.h"
+#include "sim-utils.h"
+#include "sim-assert.h"
+
+#include "nds32-sim.h"
+#include "nds32-mm.h"
+#include "nds32-syscall.h"
+
+static void
+nds32_simple_osabi_sniff_sections (bfd *abfd, asection *sect, void *obj)
+{
+  const char *name;
+  int *osabi = (int *) obj;
+
+  name = bfd_get_section_name (abfd, sect);
+  if (strcmp (name, ".note.ABI-tag") == 0)
+    *osabi = 1;
+}
+
+/* Calculate the total size for mapping an ELF.  */
+
+static int
+total_mapping_size (Elf_Internal_Phdr *phdr, int n)
+{
+  int i;
+  int first = -1;
+  int last = - 1;
+
+  for (i = 0; i < n; i++)
+    {
+      if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
+	continue;
+
+      if (first == -1)
+	first = i;
+      last = i;
+    }
+
+  return phdr[last].p_vaddr +  phdr[last].p_memsz - phdr[first].p_vaddr;
+}
+
+
+static void
+nds32_alloc_memory (SIM_DESC sd, struct bfd *abfd)
+{
+  int osabi = 0;
+  int i;
+  char buf[1024];
+  Elf_Internal_Phdr *phdr;
+  Elf_Internal_Phdr *interp_phdr = NULL;
+  uint32_t off;
+  uint32_t len;
+  int sysroot_len;
+  uint32_t interp_base;
+  SIM_CPU *cpu = STATE_CPU (sd, 0);
+  struct rlimit limit;
+  struct nds32_mm *mm = STATE_MM (sd);
+
+  getrlimit (RLIMIT_STACK, &limit);
+  mm->limit_sp = limit.rlim_cur;
+  getrlimit (RLIMIT_DATA, &limit);
+  mm->limit_data = limit.rlim_cur;
+
+  if (mm->limit_sp & 1) /* Unlimited?  */
+    mm->limit_sp = 0x800000;
+  if (mm->limit_data & 1) /* Unlimited?  */
+    mm->limit_data = 0x800000;
+
+  if (STATE_ENVIRONMENT (sd) == ALL_ENVIRONMENT)
+    {
+      bfd_map_over_sections (abfd, nds32_simple_osabi_sniff_sections, &osabi);
+      if (osabi)
+	STATE_ENVIRONMENT (sd) = USER_ENVIRONMENT;
+      else
+	STATE_ENVIRONMENT (sd) = OPERATING_ENVIRONMENT;
+    }
+
+  if (STATE_ENVIRONMENT (sd) != USER_ENVIRONMENT)
+    {
+      /* FIXME: We should only do this if user doesn't allocate one.
+		But how can we know it? */
+      sim_do_command (sd, "memory region 0,0x4000000"); /* 64 MB */
+      return;
+    }
+
+    /*
+    See sim-config.h for detailed explanation.
+	--environment user|virtual|operating
+
+    By default, the setting is 'all' for un-selected.
+
+    In my current design, USER_ENVIRONMENT is used for Linux application,
+    so
+	1. Load ELF by segment instead of by section.
+	2. Load dynamic-link (INTERP) if needed
+	3. Prepare stack for arguments, environments and AUXV.
+	4. Use nds32-mm for memory mapping
+    If the ENVIRONMENT is not USER, the I treat it as normal ELF application,
+    so only a single 64MB memory block is allocated,
+    and default sim_load_file () is used.  */
+
+  /* For emulating Linux VMA */
+  sim_core_attach (sd, NULL, 0, access_read_write_exec, 0, 0x00004000,
+		   0xFFFF8000, 0, &nds32_mm_devices, NULL);
+
+  nds32_mm_init (mm);
+
+  /* Allocate stack.  */
+  /* TODO: Executable stack.  Currently, EXEC affects vma cache. */
+  nds32_mmap (cpu, mm->start_sp - mm->limit_sp, mm->limit_sp,
+	      PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+	      -1, 0);
+
+  /* FIXME: Handle ET_DYN and ET_EXEC.  */
+  phdr = elf_tdata (abfd)->phdr;
+  sd->exec_base = -1;
+  for (i = 0; i < elf_elfheader (abfd)->e_phnum; i++)
+    {
+      uint32_t addr, len;
+      uint32_t prot = 0;
+
+      if (phdr[i].p_type == PT_INTERP)
+	interp_phdr = &phdr[i];
+
+      if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
+	continue;
+
+      addr = phdr[i].p_vaddr;
+      len = addr + phdr[i].p_memsz - PAGE_ALIGN (addr);
+      len = PAGE_ROUNDUP (len);
+      addr = PAGE_ALIGN (addr);
+
+      if (phdr[i].p_flags & PF_X)
+	prot |= PROT_EXEC;
+      if (phdr[i].p_flags & PF_W)
+	prot |= PROT_WRITE;
+      if (phdr[i].p_flags & PF_R)
+	prot |= PROT_READ;
+
+      if (sd->exec_base == -1)
+	sd->exec_base = addr;
+
+      nds32_mmap (cpu, addr, len, prot,
+		  MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		  -1, 0);
+
+      if (addr + len > mm->brk)
+	mm->brk = addr + len;
+    }
+
+  /* TODO: Pre-map brk */
+
+  if (!interp_phdr)
+    return;
+
+  /* Read path of interp.  */
+  off = interp_phdr->p_offset;
+  len = interp_phdr->p_filesz;
+  sysroot_len = strlen (simulator_sysroot);
+
+  strcpy (buf, simulator_sysroot);
+  if (buf[sysroot_len - 1] == '/')
+    buf[--sysroot_len] = '\0';
+
+  if (bfd_seek (abfd, off, SEEK_SET) != 0
+      || bfd_bread (buf + sysroot_len, len, abfd) != len)
+    return;
+
+  sd->interp_bfd = bfd_openr (buf, 0);
+
+  if (sd->interp_bfd == NULL)
+    return;
+
+  bfd_check_format (sd->interp_bfd, bfd_object);
+
+  /* Add memory for interp.  */
+  phdr = elf_tdata (sd->interp_bfd)->phdr;
+  len = total_mapping_size (phdr, elf_elfheader (sd->interp_bfd)->e_phnum);
+  interp_base = nds32_get_unmapped_area (mm, 0, len);
+  for (i = 0; i < elf_elfheader (sd->interp_bfd)->e_phnum; i++)
+    {
+      uint32_t addr, len, prot = 0;
+
+      if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
+	continue;
+
+      addr = interp_base + phdr[i].p_vaddr;
+      len = addr + phdr[i].p_memsz - PAGE_ALIGN (addr);
+      len = PAGE_ROUNDUP (len);
+      addr = PAGE_ALIGN (addr);
+
+      if (phdr[i].p_flags & PF_X)
+	prot |= PROT_EXEC;
+      if (phdr[i].p_flags & PF_W)
+	prot |= PROT_WRITE;
+      if (phdr[i].p_flags & PF_R)
+	prot |= PROT_READ;
+
+      nds32_mmap (cpu, addr, len, prot,
+		  MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		  -1, 0);
+    }
+
+  sd->interp_base = interp_base;
+}
+
+static void
+nds32_load_segments (SIM_DESC sd, bfd *abfd, uint32_t load_base)
+{
+  Elf_Internal_Phdr *phdr;
+  int i;
+  int bias = -1;
+  int bias_set = 0;
+
+  phdr = elf_tdata (abfd)->phdr;
+
+  for (i = 0; i < elf_elfheader (abfd)->e_phnum; i++)
+    {
+      uint32_t addr, filesz, memsz;
+      char *data = NULL;
+
+      if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0)
+	continue;
+
+      addr = phdr[i].p_vaddr;
+      filesz = phdr[i].p_filesz;
+      memsz = phdr[i].p_memsz;
+
+      if (bias_set == 0)
+	{
+	  bias = load_base - addr;
+	  bias_set = 1;
+	}
+
+      if (STATE_OPEN_KIND (sd) == SIM_OPEN_DEBUG)
+	sim_io_printf (sd, "Load segment, size 0x%x addr 0x%x\n",
+		       addr + bias, memsz);
+
+      data = xmalloc (memsz);
+      /* Clear for .bss or something else. */
+      if (memsz != filesz)
+	memset (data + filesz, 0, memsz - filesz);
+
+      if (bfd_seek (abfd, phdr[i].p_offset, SEEK_SET) == 0
+	  && bfd_bread (data, filesz, abfd) == filesz)
+	sim_write (sd, addr + bias, (unsigned char *) data, memsz);
+
+      free (data);
+    }
+
+  return;
+}
+
+SIM_RC
+sim_load (SIM_DESC sd, char *prog_name, struct bfd *prog_bfd, int from_tty)
+{
+  bfd *result_bfd;
+  struct nds32_mm *mm = STATE_MM (sd);
+
+  if (prog_bfd == NULL)
+    prog_bfd = STATE_PROG_BFD (sd);
+
+  SIM_ASSERT (STATE_MAGIC (sd) == SIM_MAGIC_NUMBER);
+  if (sim_analyze_program (sd, prog_name, prog_bfd) != SIM_RC_OK)
+    return SIM_RC_FAIL;
+  SIM_ASSERT (STATE_PROG_BFD (sd) != NULL);
+
+  /* Free vma for previous program.  */
+  nds32_freeall_vma (mm);
+
+  /* Allocate core memory if none is specified by user.  */
+  if (STATE_MEMOPT (sd) == NULL && sd->mem_attached == FALSE
+      && prog_bfd != NULL)
+    nds32_alloc_memory (sd, prog_bfd);
+
+  if (STATE_ENVIRONMENT (sd) != USER_ENVIRONMENT)
+    {
+      /* NOTE: For historical reasons, older hardware simulators
+	 incorrectly write the program sections at LMA interpreted as a
+	 virtual address.  This is still accommodated for backward
+	 compatibility reasons. */
+
+      result_bfd = sim_load_file (sd, STATE_MY_NAME (sd),
+				  STATE_CALLBACK (sd),
+				  prog_name,
+				  STATE_PROG_BFD (sd),
+				  STATE_OPEN_KIND (sd) == SIM_OPEN_DEBUG,
+				  STATE_LOAD_AT_LMA_P (sd),
+				  sim_write);
+      if (result_bfd == NULL)
+	{
+	  bfd_close (STATE_PROG_BFD (sd));
+	  STATE_PROG_BFD (sd) = NULL;
+	  return SIM_RC_FAIL;
+	}
+    }
+  else
+    {
+      /* For Linux programs, we should load ELF based on loadable
+	 segments, not sections.  Otherwise, ELF/Program headers will
+	 not be loaded which are needed by dynamic linker.  */
+      nds32_load_segments (sd, prog_bfd, sd->exec_base);
+      if (sd->interp_bfd)
+	nds32_load_segments (sd, sd->interp_bfd, sd->interp_base);
+    }
+
+  return SIM_RC_OK;
+}
+
+void
+nds32_init_libgloss (SIM_DESC sd, struct bfd *abfd, char **argv, char **env)
+{
+  int len, mlen, i;
+
+  STATE_CALLBACK (sd)->syscall_map = cb_nds32_libgloss_syscall_map;
+
+  /* Save argv for -mcrt-arg hacking.  */
+  memset (sd->cmdline, 0, sizeof (sd->cmdline));
+  mlen = sizeof (sd->cmdline) - 1;
+  len = 0;
+  for (i = 0; argv && argv[i]; i++)
+    {
+      int l = strlen (argv[i]) + 1;
+
+      if (l + len >= mlen)
+	break;
+
+      len += sprintf (sd->cmdline + len, "%s ", argv[i]);
+    }
+
+  if (len > 0)
+    sd->cmdline[len - 1] = '\0';	/* Trim the last space. */
+
+  return;
+}
+
+static uint32_t
+nds32_push_auxv (SIM_DESC sd, struct bfd *abfd, uint32_t sp, uint32_t type,
+		 uint32_t val)
+{
+  unsigned char buf[4];
+
+  bfd_put_32 (abfd, type, buf);
+  sim_write (sd, sp - 8, buf, sizeof (buf));
+  bfd_put_32 (abfd, val, buf);
+  sim_write (sd, sp - 4, buf, sizeof (buf));
+
+  return sp - 8;
+}
+
+void
+nds32_init_linux (SIM_DESC sd, struct bfd *abfd, char **argv, char **env)
+{
+  int argc = 0, argv_len = 0;
+  int envc = 0, env_len = 0;
+  int auxvc = 0;
+  SIM_CPU *cpu = STATE_CPU (sd, 0);
+  uint32_t sp = STACK_TOP;
+  uint32_t sp_argv, sp_envp;		/* Pointers to argv amd envp array.  */
+  uint32_t flat;			/* Beginning of argv/env strings.  */
+  unsigned char buf[8];
+  int i;
+  Elf_Internal_Ehdr *exec = elf_elfheader (abfd);
+
+  STATE_CALLBACK (sd)->syscall_map = cb_nds32_linux_syscall_map;
+
+  /* Check stack layout in
+	http://articles.manugarg.com/aboutelfauxiliaryvectors.html
+     for details.
+
+     TODO: Push AUXV vector (especially AT_ENTRY,
+	   so we can run dynamically linked executables.  */
+
+  for (argc = 0; argv && argv[argc]; argc++)
+    argv_len += strlen (argv[argc]) + 1;
+
+  for (envc = 0; env && env[envc]; envc++)
+    env_len += strlen (env[envc]) + 1;
+
+  /*
+			<---- STACK_TOP
+     env strings
+     argv strings	<---- flat pointer
+     auxv[term] = AT_NULL
+     auxv[...]
+     auxv[0]
+     envp[term] = NULL
+     envp[...]
+     envp[0]
+     argv[n] = NULL
+     argv[...]
+     argv[0]
+     argc		<---- $sp  */
+  sp = flat = STACK_TOP - ROUNDUP (argv_len + env_len, 8);
+
+  /* Adjust sp so that the final $sp is 8-byte aligned.  */
+  if ((argc + envc + 1) % 2 != 0)
+    sp -= 4;
+
+  /* Push AUXV.  */
+  sp = nds32_push_auxv (sd, abfd, sp, AT_NULL, 0);
+  sp = nds32_push_auxv (sd, abfd, sp, AT_PAGESZ, PAGE_SIZE);
+  sp = nds32_push_auxv (sd, abfd, sp, AT_PHDR, sd->exec_base + exec->e_phoff);
+  sp = nds32_push_auxv (sd, abfd, sp, AT_PHENT, sizeof (Elf_Internal_Phdr));
+  sp = nds32_push_auxv (sd, abfd, sp, AT_PHNUM, exec->e_phnum);
+  sp = nds32_push_auxv (sd, abfd, sp, AT_BASE, sd->interp_base);
+  sp = nds32_push_auxv (sd, abfd, sp, AT_ENTRY, exec->e_entry);
+  sp = nds32_push_auxv (sd, abfd, sp, AT_HWCAP, 0x9dc6f);
+
+  /* Make room for argc, argv[] and envp[] arrays.  */
+  sp -= 4 + (argc + 1 + envc + 1) * 4;
+  sp_argv = sp + 4;
+  sp_envp = sp_argv + (argc + 1) * 4;
+  CCPU_GPR[GPR_SP].u = sp;
+  SIM_ASSERT ((sp % 8) == 0);
+
+  /* Write argc.  */
+  bfd_put_32 (abfd, argc, buf);
+  sim_write (sd, sp, buf, 4);
+
+  /* Write argv[] array and argument strings.  */
+  for (i = 0; i < argc; i++)
+    {
+      int len = strlen (argv[i]) + 1;	/* 1 for trailing \0.  */
+
+      sim_write (sd, flat, (unsigned char *) argv[i], len);
+      bfd_put_32 (abfd, flat, buf);
+      sim_write (sd, sp_argv + i * 4, buf, 4);
+      flat += len;
+    }
+  bfd_put_32 (abfd, 0, buf);
+  sim_write (sd, sp_argv + argc * 4, buf, 4); /* term-zero */
+
+  /* Write envp[] array and environment strings.  */
+  for (i = 0; i < envc; i++)
+    {
+      int len = strlen (env[i]) + 1;	/* 1 for trailing \0.  */
+
+      sim_write (sd, flat, (unsigned char *) env[i], len);
+      bfd_put_32 (abfd, flat, buf);
+      sim_write (sd, sp_envp + i * 4, buf, 4);
+      flat += len;
+    }
+  bfd_put_32 (abfd, 0, buf);
+  sim_write (sd, sp_envp + envc * 4, buf, 4); /* term-zero */
+
+  if (sd->interp_bfd)
+    {
+      CPU_PC_STORE (cpu) (cpu, sd->interp_base
+			  + bfd_get_start_address (sd->interp_bfd));
+    }
+
+  return;
+}
diff --git a/sim/nds32/nds32-mm.c b/sim/nds32/nds32-mm.c
new file mode 100644
index 0000000..2be572f
--- /dev/null
+++ b/sim/nds32/nds32-mm.c
@@ -0,0 +1,447 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+
+#include <errno.h>
+#if defined (__linux__) || defined (__CYGWIN__)
+#include <sys/mman.h>
+#elif defined (__WIN32__)
+#include "mingw32-hdep.h"
+#endif
+
+#include "bfd.h"
+#include "elf-bfd.h"
+
+#include "nds32-sim.h"
+#include "nds32-mm.h"
+
+/* Linux memory are emulated using `device',
+   so we can have handling more sophiscated maping operations then sim-core. */
+struct _device { char dummy; } nds32_mm_devices;
+
+void
+device_error (device *me ATTRIBUTE_UNUSED,
+		const char *message ATTRIBUTE_UNUSED,
+		...)
+{
+  abort ();
+}
+
+/* Read memory in Linux VMA.  */
+
+int
+device_io_read_buffer (device *me ATTRIBUTE_UNUSED,
+			void *source,
+			int space ATTRIBUTE_UNUSED,
+			address_word addr, unsigned nr_bytes,
+			SIM_DESC sd, SIM_CPU *cpu,
+			sim_cia cia ATTRIBUTE_UNUSED)
+{
+  struct nds32_mm *mm = STATE_MM (sd);
+  struct nds32_vm_area *vma;
+  cpu = STATE_CPU (sd, 0);
+  cia = CIA_GET (cpu);
+
+#if defined (USE_TLB)
+  if (mm->icache && addr >= mm->icache->vm_start
+      && (addr + nr_bytes) <= mm->icache->vm_end)
+    {
+      /* mm->cache_ihit++; */
+      vma = mm->icache;
+      goto FOUND;
+    }
+  else if (mm->dcache && addr >= mm->dcache->vm_start
+	   && (addr + nr_bytes) <= mm->dcache->vm_end)
+    {
+      /* mm->cache_dhit++; */
+      vma = mm->dcache;
+      goto FOUND;
+    }
+  /* mm->cache_miss++; */
+  vma = nds32_find_vma (mm, addr);
+  if (vma->vm_prot & PROT_EXEC)
+    mm->icache = vma;
+  else
+    mm->dcache = vma;
+#else
+  vma = nds32_find_vma (mm, addr);
+#endif
+
+  if (vma == NULL || addr < vma->vm_start
+      || (addr + nr_bytes - 1) >= vma->vm_end)
+    return 0;
+
+FOUND:
+  memcpy (source, vma->vm_buf + (addr - vma->vm_start), nr_bytes);
+
+  return nr_bytes;
+}
+
+/* Write memory in Linux VMA.  */
+
+int
+device_io_write_buffer (device *me ATTRIBUTE_UNUSED,
+			const void *source,
+			int space ATTRIBUTE_UNUSED,
+			address_word addr, unsigned nr_bytes,
+			SIM_DESC sd, SIM_CPU *cpu, sim_cia cia)
+{
+  struct nds32_mm *mm = STATE_MM (sd);
+  struct nds32_vm_area *vma = NULL;
+  cpu = STATE_CPU (sd, 0);
+  cia = CIA_GET (cpu);
+
+#if defined (USE_TLB)
+  if (mm->dcache && addr >= mm->dcache->vm_start
+      && (addr + nr_bytes) <= mm->dcache->vm_end)
+    {
+      /* mm->cache_dhit++; */
+      vma = mm->dcache;
+      goto FOUND;
+    }
+  /* mm->cache_miss++; */
+  vma = nds32_find_vma (mm, addr);
+  mm->dcache = vma;
+#else
+  vma = nds32_find_vma (mm, addr);
+#endif
+
+  if (vma == NULL || addr < vma->vm_start
+      || (addr + nr_bytes - 1) >= vma->vm_end)
+    return 0;
+
+FOUND:
+  memcpy (vma->vm_buf + (addr - vma->vm_start), source, nr_bytes);
+
+  return nr_bytes;
+}
+
+/* Allocate a VMA struct.  */
+
+static struct nds32_vm_area *
+nds32_alloc_vma ()
+{
+  struct nds32_vm_area *vma = xmalloc (sizeof (struct nds32_vm_area));
+  vma->vm_start = 0;
+  vma->vm_end = 0;
+  vma->vm_buf = NULL;
+  vma->vm_prev = vma->vm_next = vma;
+  return vma;
+}
+
+/* Free a VMA struct.  */
+
+static void
+nds32_free_vma (struct nds32_vm_area *vma)
+{
+  /* The caller should un-map vm_buf itself.  */
+  free (vma);
+}
+
+/* Find the first VMA which satisfices addr < vma->vm_end. */
+
+struct nds32_vm_area *
+nds32_find_vma (struct nds32_mm *mm, uint32_t addr)
+{
+  struct nds32_vm_area *vma;
+
+  for (vma = MM_HEAD (mm)->vm_next; vma != MM_HEAD (mm); vma = vma->vm_next)
+    {
+      if (vma->vm_end > addr)
+	break;
+    }
+
+  if (vma != MM_HEAD (mm))
+    {
+      return vma;
+    }
+  return NULL;
+}
+
+/* Find a proper place and insert the VMA.  */
+
+static void
+nds32_link_vma (struct nds32_mm *mm, struct nds32_vm_area *vma)
+{
+  struct nds32_vm_area *prev; /* The vma is to be inserted after prev vma.  */
+
+  prev = nds32_find_vma (mm, vma->vm_start);
+  if (prev)
+    prev = prev->vm_prev;
+  else
+    /* If the is not match vma, then vma is to be inserted after LAST vma. */
+    prev = MM_HEAD (mm)->vm_prev;
+
+  vma->vm_next = prev->vm_next;
+  vma->vm_prev = prev;
+  prev->vm_next->vm_prev = vma;
+  prev->vm_next = vma;
+
+#if defined (USE_TLB)
+  mm->icache = mm->dcache = NULL;
+#endif
+}
+
+/* Remove a VMA mapping and unmap its buffer.  */
+
+static void
+nds32_unlink_vma (struct nds32_mm *mm, uint32_t addr, uint32_t len)
+{
+  struct nds32_vm_area *vma;
+  uint32_t end = addr + len;
+
+  vma = nds32_find_vma (mm, addr);
+  if (!vma)
+    return;
+
+#if defined (USE_TLB)
+  mm->icache = mm->dcache = NULL;
+#endif
+  /*
+    Possible intersection cases:
+
+      |---vma i---|   |---vma i+1---|
+
+    |-----|
+	|----|
+    |---------------|
+	|-----------|
+    |-------------------------|
+	|---------------------|
+   */
+
+  for ( ; end > vma->vm_start && vma != MM_HEAD (mm); vma = vma->vm_next)
+    {
+      uint32_t os, oe;	/* start-end in this vma  */
+      uint32_t ol;	/* len */
+
+      /*
+	 buf
+	  |--------vma---------|
+		|----ol----|
+		os	   oe
+       */
+      os = (addr <= vma->vm_start) ? vma->vm_start : addr;
+      oe = (end >= vma->vm_end) ? vma->vm_end : end;
+      ol = oe - os;
+
+      munmap (vma->vm_buf + (os - vma->vm_start), ol);
+
+      if (os > vma->vm_start && oe < vma->vm_end)
+	{
+	  /* Split */
+	  struct nds32_vm_area *vma_tmp = nds32_alloc_vma ();
+	  vma_tmp->vm_start = oe;
+	  vma_tmp->vm_end = vma->vm_end;
+	  vma_tmp->vm_buf = vma->vm_buf + (oe - vma->vm_start);
+	  vma_tmp->vm_prot = vma->vm_prot;
+	  vma->vm_end = os;
+	  nds32_link_vma (mm, vma_tmp);
+	}
+      else if (os == vma->vm_start && oe == vma->vm_end)
+	{
+	  /* Unlink complelte */
+	  vma->vm_prev->vm_next = vma->vm_next;
+	  vma->vm_next->vm_prev = vma->vm_prev;
+	  nds32_free_vma (vma);
+	}
+      else if (oe < vma->vm_end)
+	vma->vm_start  = oe;
+      else if (os > vma->vm_start)
+	vma->vm_end = os;
+    }
+}
+
+void
+nds32_freeall_vma (struct nds32_mm *mm)
+{
+  struct nds32_vm_area *vma;
+  struct nds32_vm_area *next;
+
+  for (vma = MM_HEAD (mm)->vm_next; vma != MM_HEAD (mm); vma = next)
+    {
+      next = vma->vm_next;
+      munmap (vma->vm_buf, vma->vm_end - vma->vm_start);
+      nds32_free_vma (vma);
+    }
+
+  nds32_mm_init (mm);
+}
+
+/* Dump VMA list for debugging.  */
+
+void
+nds32_dump_vma (struct nds32_mm *mm)
+{
+  struct nds32_vm_area *vma;
+
+  for (vma = MM_HEAD (mm)->vm_next; vma != MM_HEAD (mm); vma = vma->vm_next)
+    printf ("%08x-%08x @ %p\n", vma->vm_start, vma->vm_end, vma->vm_buf);
+}
+
+/* Find a suitable address for addr/len.  */
+
+uint32_t
+nds32_get_unmapped_area (struct nds32_mm *mm, uint32_t addr, uint32_t len)
+{
+  struct nds32_vm_area *vma;
+
+  if (addr == 0)
+    addr = mm->free_cache;
+
+  vma = nds32_find_vma (mm, addr);
+
+  if (!vma)
+    return addr;
+
+  do
+    {
+      if (addr + len <= vma->vm_start)
+	return addr;
+      addr = vma->vm_end;
+      vma = vma->vm_next;
+    }
+  while (vma != MM_HEAD (mm));
+
+  return -1;
+}
+
+void
+nds32_mm_init (struct nds32_mm *mm)
+{
+  mm->mmap.vm_start = 0;
+  mm->mmap.vm_end = 0;
+  mm->mmap.vm_buf = NULL;
+  mm->mmap.vm_prev = mm->mmap.vm_next = MM_HEAD (mm);
+  mm->start_sp = STACK_TOP;
+  mm->free_cache = TASK_UNMAPPED_BASE;
+#if defined (USE_TLB)
+  mm->icache = mm->dcache = NULL;
+  /* mm->cache_miss = 0;
+  mm->cache_ihit = mm->cache_dhit = 0; */
+#endif
+}
+
+/* munmap () for Linux VMA.  */
+
+int
+nds32_munmap (sim_cpu *cpu, uint32_t addr, size_t len)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  struct nds32_mm *mm = STATE_MM (sd);
+
+  nds32_unlink_vma (mm, PAGE_ALIGN (addr),
+		    PAGE_ROUNDUP (addr + len) - PAGE_ALIGN (addr));
+
+  return 0; /* FIXME?  */
+}
+
+/* mmap for Linux VMA.  */
+
+void *
+nds32_mmap (sim_cpu *cpu, uint32_t addr, size_t len,
+	      int prot, int flags, int fd, off_t offset)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  struct nds32_mm *mm = STATE_MM (sd);
+  host_callback *cb = STATE_CALLBACK (sd);
+  void *phy = NULL;
+  struct nds32_vm_area *vma;
+
+  /* For debugging */
+  prot |= PROT_READ | PROT_WRITE;
+
+  if (flags & MAP_ANONYMOUS)
+    phy = mmap (NULL, len, prot, flags & ~MAP_FIXED, fd, offset);
+  else if (fd < 0 || fd > MAX_CALLBACK_FDS || cb->fd_buddy[fd] < 0)
+    return (void *) EBADF;
+  else
+    {
+      fd = cb->fdmap[fd];
+      phy = mmap (NULL, len, prot, flags & ~MAP_FIXED, fd, offset);
+    }
+
+  if (phy == MAP_FAILED)
+    return phy;
+
+  /* FIXME: FIXME FIXME:
+
+     I implemented this way because I want to emulate Linux mmap,
+     "overlapped part of the existing mapping(s) will be discarded."
+
+     But I found it became a VERY severe performance bottleneck,
+     since sim_core_find_mapping searches sequentially.
+     The same program with dynamically linked could be 9 times slower
+     than statically linked one.
+
+     I should study how Linux manage process address space (e.g., vm_struct),
+     and implement it here instead of using GDB sim-core.  */
+
+  if (flags & MAP_FIXED)
+    {
+      /* Detach before attach */
+      nds32_munmap (cpu, addr, len);
+    }
+
+  addr = nds32_get_unmapped_area (mm, addr, len);
+  vma = nds32_alloc_vma ();
+  vma->vm_buf = phy;
+  vma->vm_start = addr;
+  vma->vm_prot = prot;
+  vma->vm_end = addr + PAGE_ROUNDUP (len);
+  nds32_link_vma (mm, vma);
+
+  return (void *) addr;
+}
+
+uint32_t
+nds32_sys_brk (sim_cpu *cpu, uint32_t addr)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  struct nds32_mm *mm = STATE_MM (sd);
+
+  /* FIXME: Check sys_brk () in kernel/mm/mmap.c for details.  */
+
+  if (mm->brk == 0)
+    return 0;
+
+  if (addr == 0)
+    return mm->brk;
+
+  if (PAGE_ALIGN (addr) == PAGE_ALIGN (mm->brk))
+    return mm->brk = addr;
+
+  if (addr < mm->brk)
+    {
+      /* delete pages */
+      nds32_munmap (cpu, PAGE_ROUNDUP (addr), mm->brk - PAGE_ROUNDUP (addr));
+      return mm->brk = addr;
+    }
+  else
+    {
+      /* create pages */
+      nds32_mmap (cpu, PAGE_ROUNDUP (mm->brk), addr - PAGE_ROUNDUP (mm->brk),
+		PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS,
+		-1, 0);
+      return mm->brk = addr;
+    }
+}
diff --git a/sim/nds32/nds32-mm.h b/sim/nds32/nds32-mm.h
new file mode 100644
index 0000000..145d1fa
--- /dev/null
+++ b/sim/nds32/nds32-mm.h
@@ -0,0 +1,86 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef NDS32_MM_H
+#define NDS32_MM_H
+
+#include <stdint.h>
+#include "sim-main.h"
+#include "sim-base.h"
+
+#define ALIGN(x, a)		((x) & ~(a-1))
+#define ROUNDUP(x, a)		(ALIGN ((x) + ((a) - 1), a))
+
+#define PAGE_SIZE		0x1000
+#define PAGE_ALIGN(x)		ALIGN (x, PAGE_SIZE)
+#define PAGE_ROUNDUP(x)		ROUNDUP (x, PAGE_SIZE)
+
+#define TASK_SIZE		0xbf000000
+#define STACK_TOP		TASK_SIZE
+#define RLIMIT_STACK_SIZE	(8 * 1024 * 1024)
+#define TASK_UNMAPPED_BASE	PAGE_ALIGN (TASK_SIZE / 3)
+#define MM_HEAD(mm)     (&(mm)->mmap)
+
+extern struct _device nds32_mm_devices;
+
+struct nds32_vm_area
+{
+  uint32_t vm_start;			/* First address of this interval */
+  uint32_t vm_end;			/* First address after this interval */
+  struct nds32_vm_area *vm_next;
+  struct nds32_vm_area *vm_prev;
+  uint32_t vm_prot;
+  char *vm_buf;
+};
+
+struct nds32_mm
+{
+  struct nds32_vm_area mmap;		/* Head node for vm_area */
+
+#if defined USE_TLB
+  /* A simple vma lookup cache for non-/EXEC pages. */
+  struct nds32_vm_area *dcache;
+  struct nds32_vm_area *icache;
+  /* Accounting hit rates.  */
+  /* uint64_t cache_miss;
+  uint64_t cache_ihit;
+  uint64_t cache_dhit; */
+#endif
+
+  uint32_t start_brk;			/* Start address of brk */
+  uint32_t brk;				/* Final address of brk */
+  uint32_t limit_data;			/* Limit of data */
+  uint32_t start_sp;			/* Start address of stack */
+  uint32_t limit_sp;			/* Limit of stack */
+  uint32_t free_cache;			/* Last address for mmap. */
+};
+
+void nds32_mm_init (struct nds32_mm *mm);
+struct nds32_vm_area *nds32_find_vma (struct nds32_mm *mm, uint32_t addr);
+void nds32_dump_vma (struct nds32_mm *mm);
+uint32_t nds32_sys_brk (sim_cpu *cpu, uint32_t addr);
+int nds32_munmap (sim_cpu *cpu, uint32_t addr, size_t len);
+void *nds32_mmap (sim_cpu *cpu, uint32_t addr, size_t len,
+	      int prot, int flags, int fd, off_t offset);
+uint32_t nds32_get_unmapped_area (struct nds32_mm *mm, uint32_t addr,
+				  uint32_t len);
+void nds32_freeall_vma (struct nds32_mm *mm);
+
+#endif
diff --git a/sim/nds32/nds32-pfm.c b/sim/nds32/nds32-pfm.c
new file mode 100644
index 0000000..44fcadf
--- /dev/null
+++ b/sim/nds32/nds32-pfm.c
@@ -0,0 +1,74 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "nds32-pfm.h"
+#include "nds32-sim.h"
+
+#include <stdint.h>
+
+/* I put performance monitor definitions here.  */
+void
+nds32_pfm_ctl (sim_cpu *cpu)
+{
+  int en, ie, ovf, ks, ku;
+  int sel0, sel1, sel2;
+
+  en = CCPU_SR_GET (PFM_CTL, PFM_CTL_EN);
+  ie = CCPU_SR_GET (PFM_CTL, PFM_CTL_IE);
+  ovf = CCPU_SR_GET (PFM_CTL, PFM_CTL_OVF);
+  ks = CCPU_SR_GET (PFM_CTL, PFM_CTL_KS);
+  ku = CCPU_SR_GET (PFM_CTL, PFM_CTL_KU);
+  sel0 = CCPU_SR_GET (PFM_CTL, PFM_CTL_SEL0);
+  sel1 = CCPU_SR_GET (PFM_CTL, PFM_CTL_SEL1);
+  sel2 = CCPU_SR_GET (PFM_CTL, PFM_CTL_SEL2);
+}
+
+void
+nds32_pfm_event (sim_cpu *cpu, int pfm_event)
+{
+  int sel[3];
+  int en, ovf;
+  int i;
+
+  en = CCPU_SR_GET (PFM_CTL, PFM_CTL_EN);
+  ovf = CCPU_SR_GET (PFM_CTL, PFM_CTL_OVF);
+
+  sel[0] = CCPU_SR_GET (PFM_CTL, PFM_CTL_SEL0);
+  sel[1] = CCPU_SR_GET (PFM_CTL, PFM_CTL_SEL1);
+  sel[2] = CCPU_SR_GET (PFM_CTL, PFM_CTL_SEL2);
+
+  switch (pfm_event)
+    {
+    case PFM_CYCLE:
+    case PFM_INST:
+      for (i = 0; i < 3; i++)
+	{
+	  if (sel[i] == pfm_event && (en & (1 << i)))
+	    {
+	      CCPU_SR[SRIDX_PFMC0 + i].u++;
+	      if (CCPU_SR[SRIDX_PFMC0 + i].u == 0)
+		ovf |= (1 << i);
+	    }
+	}
+      break;
+    }
+
+  CCPU_SR_PUT (PFM_CTL, PFM_CTL_OVF, ovf);
+}
diff --git a/sim/nds32/nds32-pfm.h b/sim/nds32/nds32-pfm.h
new file mode 100644
index 0000000..42c5420
--- /dev/null
+++ b/sim/nds32/nds32-pfm.h
@@ -0,0 +1,51 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef NDS32_PFM_CTL_H
+#define NDS32_PFM_CTL_H
+
+#include "sim-main.h"
+#include <stdint.h>
+
+void nds32_pfm_ctl (sim_cpu *cpu);
+void nds32_pfm_event (sim_cpu *cpu, int pfm_event);
+
+enum PERFM_EVENT_ENUM
+{
+  PFM_CYCLE = 0,
+  PFM_INST,
+
+  PFM_COND_BRANCH = 64 + 2,
+  PFM_TAKEN_COND,
+  PFM_PREFETCH,
+  PFM_RET,
+  PFM_JR,
+  PFM_JAL,
+  PFM_NOP,
+  PFM_SCW,
+  PFM_IDSB,
+  PFM_CCTL,
+  PFM_TAKEN_INT,
+  PFM_LOADS,
+
+  PFM_COND_BRANCH_MISPREDICT = 128 + 2,
+};
+
+#endif
diff --git a/sim/nds32/nds32-sim.h b/sim/nds32/nds32-sim.h
new file mode 100644
index 0000000..bd21b11
--- /dev/null
+++ b/sim/nds32/nds32-sim.h
@@ -0,0 +1,180 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NDS32_SIM_H_
+#define _NDS32_SIM_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <stdint.h>
+
+#include "sim-main.h"
+
+typedef unsigned long long ulongest_t;
+typedef signed long long longest_t;
+
+enum nds32_cpu_regnum
+{
+  GPR_TA = 15,
+  GPR_FP = 28,
+  GPR_GP = 29,
+  GPR_LP = 30,
+  GPR_SP = 31,
+
+  USR0_D0LO = 0,
+  USR0_D0HI = 1,
+  USR0_D1LO = 2,
+  USR0_D1HI = 3,
+  USR0_ITB = 28,
+  USR0_IFCLP = 29,
+  USR0_PC = 31,
+};
+
+#define SRIDX(M,m,e)  ((M << 7) | (m << 3) | e)
+#define UXIDX(g,u)    ((g << 5) | u)
+
+enum nds32_exceptions
+{
+  EXP_RESET = 0,
+  EXP_TLB_FILL = 1,
+  EXP_NO_PTE = 2,
+  EXP_TLB_MISC = 3,
+  EXP_TLB_VLPT_MISS = 4,
+  EXP_MACHINE_ERROR = 5,
+  EXP_DEBUG = 6,
+  EXP_GENERAL = 7,
+  EXP_SYSCALL = 8,
+  EXP_HW0 = 9,	/* HW0-5: 9-14 */
+  EXP_VEP0 = 9,	/* VEP0-64: 9-72 */
+  EXP_SW0 = 15,
+
+  EXP_BADOP,
+};
+
+uint32_t nds32_raise_exception (sim_cpu *cpu, enum nds32_exceptions
e, int sig, char *msg, ...);
+
+/* Do not use thsi directly. */
+ulongest_t __nds32_ld (sim_cpu *cpu, SIM_ADDR addr, int size, int aligned_p);
+void __nds32_st (sim_cpu *cpu, SIM_ADDR addr, int size, ulongest_t
val, int aligned_p);
+/* Use these wrappers. */
+#define nds32_ld_aligned(CPU, ADDR, SIZE)		__nds32_ld (CPU, ADDR, SIZE, 1)
+#define nds32_st_aligned(CPU, ADDR, SIZE, VAL)		__nds32_st (CPU,
ADDR, SIZE, VAL, 1)
+#define nds32_ld_unaligned(CPU, ADDR, SIZE)		__nds32_ld (CPU, ADDR, SIZE, 0)
+#define nds32_st_unaligned(CPU, ADDR, SIZE, VAL)	__nds32_st (CPU,
ADDR, SIZE, VAL, 0)
+
+void nds32_init_libgloss (SIM_DESC sd, struct bfd *abfd, char **argv,
char **env);
+void nds32_init_linux (SIM_DESC sd, struct bfd *abfd, char **argv, char **env);
+
+sim_cia nds32_decode32_lwc (sim_cpu *cpu, const uint32_t insn, sim_cia cia);
+sim_cia nds32_decode32_swc (sim_cpu *cpu, const uint32_t insn, sim_cia cia);
+sim_cia nds32_decode32_ldc (sim_cpu *cpu, const uint32_t insn, sim_cia cia);
+sim_cia nds32_decode32_sdc (sim_cpu *cpu, const uint32_t insn, sim_cia cia);
+sim_cia nds32_decode32_cop (sim_cpu *cpu, const uint32_t insn, sim_cia cia);
+void nds32_bad_op (sim_cpu *cpu, uint32_t cia, uint32_t insn, char *tag);
+
+void nds32_bad_op (sim_cpu *cpu, uint32_t cia, uint32_t insn, char *tag);
+
+#if 1
+#define SIM_IO_DPRINTF(sd, fmt, args...)   sim_io_printf (sd, fmt, ## args)
+#else
+#define SIM_IO_DPRINTF(...)	do { } while (0)
+#endif
+
+enum
+{
+  SRIDX_PSW	= SRIDX (1, 0, 0),
+  SRIDX_IPSW	= SRIDX (1, 0, 1),
+  SRIDX_P_IPSW	= SRIDX (1, 0, 2),
+  PSW_GIE	= 0,
+  PSW_BE	= 5,
+  PSW_IFCON	= 15,
+
+  SRIDX_IVB	= SRIDX (1, 1, 1),
+  IVB_EVIC	= 13,
+  IVB_ESZ	= 14,
+  IVB_ESZ_N	= 2,
+  IVB_IVBASE	= 16,
+  IVB_IVBASE_N	= 16,
+
+  SRIDX_EVA	= SRIDX (1, 2, 1),
+  SRIDX_P_EVA	= SRIDX (1, 2, 2),
+  SRIDX_ITYPE	= SRIDX (1, 3, 1),
+  SRIDX_P_ITYPE	= SRIDX (1, 3, 2),
+  ITYPE_ETYPE	= 0,
+  ITYPE_ETYPE_N	= 4,
+  ITYPE_INST	= 4,
+  ITYPE_SWID	= 16,
+  ITYPE_SWID_N	= 15,
+
+  SRIDX_MERR	= SRIDX (1, 4, 1),
+  SRIDX_IPC	= SRIDX (1, 5, 1),
+  SRIDX_P_IPC	= SRIDX (1, 5, 2),
+  SRIDX_OIPC	= SRIDX (1, 5, 3),
+  SRIDX_P_P0	= SRIDX (1, 6, 2),
+  SRIDX_P_P1	= SRIDX (1, 7, 2),
+  SRIDX_INT_MASK= SRIDX (1, 8, 0),
+  SRIDX_INT_PEND= SRIDX (1, 9, 0),
+
+  SRIDX_MSC_CFG	= SRIDX (0, 4, 0),
+  MSC_CFG_PFM	= 2,
+  MSC_CFG_DIV	= 5,
+  MSC_CFG_MAC	= 6,
+  MSC_CFG_IFC	= 19,
+  MSC_CFG_EIT	= 24,
+
+  SRIDX_PFMC0	= SRIDX (4, 0, 0),
+  SRIDX_PFMC1	= SRIDX (4, 0, 1),
+  SRIDX_PFMC2	= SRIDX (4, 0, 2),
+  SRIDX_PFM_CTL	= SRIDX (4, 1, 0),
+  PFM_CTL_EN	= 0,
+  PFM_CTL_EN_N	= 3,
+  PFM_CTL_IE	= 3,
+  PFM_CTL_IE_N	= 3,
+  PFM_CTL_OVF	= 6,
+  PFM_CTL_OVF_N	= 3,
+  PFM_CTL_KS	= 9,
+  PFM_CTL_KS_N	= 3,
+  PFM_CTL_KU	= 12,
+  PFM_CTL_KU_N	= 3,
+  PFM_CTL_SEL0	= 15,
+  PFM_CTL_SEL0_N= 1,
+  PFM_CTL_SEL1	= 16,
+  PFM_CTL_SEL1_N= 6,
+  PFM_CTL_SEL2	= 22,
+  PFM_CTL_SEL2_N= 6,
+};
+
+ATTRIBUTE_UNUSED static void
+__put_field (uint32_t *src, int shift, int bs, uint32_t val)
+{
+  uint32_t mask = (1 << bs) - 1;
+
+  val &= mask;
+  *src = (*src & ~(mask << shift)) | (val << shift);
+}
+
+#define CCPU_SR_TEST(SREG,BIT)	(cpu->reg_sr[SRIDX_##SREG].u & (1 << BIT))
+#define CCPU_SR_SET(SREG,BIT)	do { cpu->reg_sr[SRIDX_##SREG].u |= (1
<< BIT); } while (0)
+#define CCPU_SR_CLEAR(SREG,BIT)	do { cpu->reg_sr[SRIDX_##SREG].u &=
~(1 << BIT); } while (0)
+#define CCPU_SR_GET(SREG,BIT)	((cpu->reg_sr[SRIDX_##SREG].u >> BIT) &
((1 << BIT##_N) - 1))
+#define CCPU_SR_PUT(SREG,BIT,V)	do { __put_field
(&cpu->reg_sr[SRIDX_##SREG].u, BIT, BIT##_N, V); } while (0)
+
+#endif
diff --git a/sim/nds32/nds32-syscall.c b/sim/nds32/nds32-syscall.c
new file mode 100644
index 0000000..117f411
--- /dev/null
+++ b/sim/nds32/nds32-syscall.c
@@ -0,0 +1,599 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+
+#include <errno.h>
+
+#include "gdb/callback.h"
+#include "targ-vals.h"
+
+#if defined (__linux__) || defined (__CYGWIN__)
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/utsname.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#elif defined (__WIN32__)
+#include "mingw32-hdep.h"
+#endif
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "nds32-sim.h"
+#include "nds32-mm.h"
+#include "nds32-syscall.h"
+
+#if 0
+  /* More standard syscalls.  */
+  {CB_SYS_lstat,	19},
+  {CB_SYS_truncate,	21},
+  {CB_SYS_ftruncate,	22},
+  {CB_SYS_pipe,		23},
+#endif
+
+CB_TARGET_DEFS_MAP cb_nds32_libgloss_syscall_map[] =
+{
+   /* These are used by the ANSI C support of libc.  */
+  {CB_SYS_exit,		1},
+  {CB_SYS_open,		2},
+  {CB_SYS_close,	3},
+  {CB_SYS_read,		4},
+  {CB_SYS_write,	5},
+  {CB_SYS_lseek,	6},
+  {CB_SYS_unlink,	7},
+  {CB_SYS_getpid,	8},
+  {CB_SYS_kill,		9},
+  {CB_SYS_fstat,	10},
+
+  /* ARGV support.  */
+  {CB_SYS_argvlen,	12},
+  {CB_SYS_argv,		13},
+
+  /* These are extras added for one reason or another.  */
+  {CB_SYS_chdir,	14},
+  {CB_SYS_stat,		15},
+  {CB_SYS_chmod,	16},
+  {CB_SYS_utime,	17},
+  {CB_SYS_time,		18},
+
+  {CB_SYS_gettimeofday,	19},
+  {CB_SYS_times,	20},
+  {CB_SYS_link,		21},
+  /* SYS_argc		= 172, */
+  /* SYS_argnlen	= 173, */
+  /* SYS_argn		= 174, */
+  /* RedBoot. */
+  {CB_SYS_rename,	3001},
+  {CB_SYS_NDS32_isatty,	3002},
+  /* SYS_system		= 3003, */
+
+  /* NDS32 specific */
+  {CB_SYS_NDS32_errno,	6001},
+  {CB_SYS_NDS32_getcmdline, 6002},
+
+  {-1, -1}
+};
+
+CB_TARGET_DEFS_MAP cb_nds32_linux_syscall_map[] =
+{
+  {CB_SYS_exit,		LINUX_SYS_BASE + 1},
+  {CB_SYS_read,		LINUX_SYS_BASE + 3},
+  {CB_SYS_write,	LINUX_SYS_BASE + 4},
+  {CB_SYS_open,		LINUX_SYS_BASE + 5},
+  {CB_SYS_close,	LINUX_SYS_BASE + 6},
+  {CB_SYS_link,		LINUX_SYS_BASE + 9},
+  {CB_SYS_unlink,	LINUX_SYS_BASE + 10},
+  {CB_SYS_chdir,	LINUX_SYS_BASE + 12},
+  {CB_SYS_time,		LINUX_SYS_BASE + 13},
+  {CB_SYS_chmod,	LINUX_SYS_BASE + 15},
+  {CB_SYS_lseek,	LINUX_SYS_BASE + 19},
+  {CB_SYS_getpid,	LINUX_SYS_BASE + 20},
+  {CB_SYS_utime,	LINUX_SYS_BASE + 30},
+  {CB_SYS_access,	LINUX_SYS_BASE + 33},
+  {CB_SYS_rename,	LINUX_SYS_BASE + 38},
+  {CB_SYS_times,	LINUX_SYS_BASE + 43},
+  {CB_SYS_brk,		LINUX_SYS_BASE + 45},
+  {CB_SYS_ioctl,	LINUX_SYS_BASE + 54},
+  {CB_SYS_gettimeofday,	LINUX_SYS_BASE + 78},
+  /* {CB_SYS_settimeofday,	LINUX_SYS_BASE + 79}, */
+  {CB_SYS_mmap,		LINUX_SYS_BASE + 90},
+  {CB_SYS_munmap,	LINUX_SYS_BASE + 91},
+  {CB_SYS_stat,		LINUX_SYS_BASE + 106},
+  {CB_SYS_lstat,	LINUX_SYS_BASE + 107},
+  {CB_SYS_fstat,	LINUX_SYS_BASE + 108},
+  {CB_SYS_uname,	LINUX_SYS_BASE + 122},
+  {CB_SYS_mprotect,	LINUX_SYS_BASE + 125},
+  {CB_SYS_llseek,	LINUX_SYS_BASE + 140},
+  {CB_SYS_readv,	LINUX_SYS_BASE + 145},
+  {CB_SYS_writev,	LINUX_SYS_BASE + 146},
+  {CB_SYS_getpagesize,	LINUX_SYS_BASE + 166},
+  {CB_SYS_ugetrlimit,	LINUX_SYS_BASE + 191},
+  {CB_SYS_mmap2,	LINUX_SYS_BASE + 192},
+  {CB_SYS_stat64,	LINUX_SYS_BASE + 195},
+  {CB_SYS_lstat64,	LINUX_SYS_BASE + 196},
+  {CB_SYS_fstat64,	LINUX_SYS_BASE + 197},
+  {CB_SYS_getuid32,	LINUX_SYS_BASE + 199},
+  {CB_SYS_getgid32,	LINUX_SYS_BASE + 200},
+  {CB_SYS_geteuid32,	LINUX_SYS_BASE + 201},
+  {CB_SYS_getegid32,	LINUX_SYS_BASE + 202},
+  {CB_SYS_setuid32,	LINUX_SYS_BASE + 213},
+  {CB_SYS_setgid32,	LINUX_SYS_BASE + 214},
+  {CB_SYS_exit_group,	LINUX_SYS_BASE + 248},
+  {CB_SYS_fcntl64,	LINUX_SYS_BASE + 221},
+
+  {-1, -1}
+};
+
+/* Check
+	linux: arch/nds32/include/asm/stat.h
+	newlib: libc/include/sys/stat.h
+   for details.  */
+static const char cb_linux_stat_map_32[] =
+"st_dev,2:space,2:st_ino,4:st_mode,2:st_nlink,2:st_uid,2:st_gid,2:st_rdev,2:space,2:"
+"st_size,4:st_blksize,4:st_blocks,4:st_atime,4:st_atimensec,4:"
+"st_mtime,4:st_mtimensec,4:st_ctime,4:st_ctimensec,4:space,4:space,4";
+
+static const char cb_linux_stat_map_64[] =
+"st_dev,8:space,4:__st_ino,4:st_mode,4:st_nlink,4:st_uid,4:st_gid,4:st_rdev,8:"
+"space,8:st_size,8:st_blksize,4:space,4:st_blocks,8:st_atime,4:st_atimensec,4:"
+"st_mtime,4:st_mtimensec,4:st_ctime,4:st_ctimensec,4:st_ino,8";
+
+static const char cb_libgloss_stat_map_32[] =
+"st_dev,2:st_ino,2:st_mode,4:st_nlink,2:st_uid,2:st_gid,2:st_rdev,2:"
+"st_size,4:st_atime,4:space,4:st_mtime,4:space,4:st_ctime,4:space,4:"
+"st_blksize,4:st_blocks,4:space,8";
+
+/* Utility of cb_syscall to fetch a path name.
+   The buffer is malloc'd and the address is stored in BUFP.
+   The result is that of get_string, but prepended with
+   simulator_sysroot if the string starts with '/'.
+   If an error occurs, no buffer is left malloc'd.  */
+
+/* COLE: This code is copied from comm/syscall.c,
+   because it's a static function. */
+static int
+get_path (host_callback *cb, CB_SYSCALL *sc, uint32_t addr, char **bufp)
+{
+#define MAX_PATH_LEN	1024
+  char *buf = xmalloc (MAX_PATH_LEN);
+  int result;
+  int sysroot_len = strlen (simulator_sysroot);
+
+  result = cb_get_string (cb, sc, buf, MAX_PATH_LEN - sysroot_len, addr);
+  if (result == 0)
+    {
+      /* Prepend absolute paths with simulator_sysroot.  Relative paths
+	 are supposed to be relative to a chdir within that path, but at
+	 this point unknown where.  */
+      if (simulator_sysroot[0] != '\0' && *buf == '/')
+	{
+	  /* Considering expected rareness of syscalls with absolute
+	     file paths (compared to relative file paths and instruction
+	     execution), it does not seem worthwhile to rearrange things
+	     to get rid of the string moves here; we'd need at least an
+	     extra call to check the initial '/' in the path.  */
+	  memmove (buf + sysroot_len, buf, sysroot_len);
+	  memcpy (buf, simulator_sysroot, sysroot_len);
+	}
+
+      *bufp = buf;
+    }
+  else
+    free (buf);
+  return result;
+}
+
+/* Read/write functions for system call interface.  */
+
+static int
+syscall_read_mem (host_callback *cb, struct cb_syscall *sc,
+		  unsigned long taddr, char *buf, int bytes)
+{
+  SIM_DESC sd = (SIM_DESC) sc->p1;
+  SIM_CPU *cpu = (SIM_CPU *) sc->p2;
+
+  return sim_core_read_buffer (sd, cpu, read_map, buf, taddr, bytes);
+}
+
+static int
+syscall_write_mem (host_callback *cb, struct cb_syscall *sc,
+		  unsigned long taddr, const char *buf, int bytes)
+{
+  SIM_DESC sd = (SIM_DESC) sc->p1;
+  SIM_CPU *cpu = (SIM_CPU *) sc->p2;
+
+  return sim_core_write_buffer (sd, cpu, write_map, buf, taddr, bytes);
+}
+
+void
+nds32_syscall (sim_cpu *cpu, int swid, sim_cia cia)
+{
+  SIM_DESC sd = CPU_STATE (cpu);
+  host_callback *cb = STATE_CALLBACK (sd);
+  CB_SYSCALL sc;
+  int cbid;
+
+  CB_SYSCALL_INIT (&sc);
+
+  sc.func = swid;
+  sc.arg1 = CCPU_GPR[0].s;
+  sc.arg2 = CCPU_GPR[1].s;
+  sc.arg3 = CCPU_GPR[2].s;
+  sc.arg4 = CCPU_GPR[3].s;
+
+  sc.p1 = (PTR) sd;
+  sc.p2 = (PTR) cpu;
+  sc.result = -1;
+  sc.errcode = 0;
+  sc.read_mem = syscall_read_mem;
+  sc.write_mem = syscall_write_mem;
+
+  /* FIXME and TODO: Handling big endian.  */
+
+  /* switch (swid) */
+  switch (cbid = cb_target_to_host_syscall (cb, sc.func))
+    {
+    default:
+      cb_syscall (cb, &sc);
+      if (sc.result == -1 && sc.errcode == TARGET_ENOSYS)
+	{
+	  nds32_bad_op (cpu, cia, swid, "syscall");
+	  return;
+	}
+      break;
+
+    /*
+     * System calls used by libgloss and Linux.
+     */
+
+    case CB_SYS_exit_group:
+    case CB_SYS_exit:
+      sim_engine_halt (CPU_STATE (cpu), cpu, NULL, cia,
+		       sim_exited, CCPU_GPR[0].s);
+      break;
+
+    case CB_SYS_llseek:
+      {
+	unsigned int fd = CCPU_GPR[0].u;
+	unsigned long offhi = CCPU_GPR[1].u;
+	unsigned long offlo = CCPU_GPR[2].u;
+	unsigned int whence = CCPU_GPR[4].u;
+	loff_t roff;
+
+	sc.func = swid;
+	sc.arg1 = fd;
+	sc.arg2 = offlo;
+	sc.arg3 = whence;
+
+	SIM_ASSERT (offhi == 0);
+
+	sc.func = TARGET_LINUX_SYS_lseek;
+	cb_syscall (cb, &sc);
+	roff = sc.result;
+
+	/* Copy the result only if user really passes other then NULL.  */
+	if (sc.result != -1 && CCPU_GPR[3].u)
+	  sim_write (sd, CCPU_GPR[3].u, (const unsigned char *) &roff,
+		     sizeof (loff_t));
+      }
+
+    case CB_SYS_getpid:
+      sc.result = getpid ();
+      break;
+
+    case CB_SYS_stat:
+    case CB_SYS_lstat:
+    case CB_SYS_fstat:
+      if (STATE_ENVIRONMENT (sd) == USER_ENVIRONMENT)
+	cb->stat_map = cb_linux_stat_map_32;
+      else
+	cb->stat_map = cb_libgloss_stat_map_32;
+      cb_syscall (cb, &sc);
+      break;
+
+    case CB_SYS_stat64:
+      cb->stat_map = cb_linux_stat_map_64;
+      sc.func = TARGET_LINUX_SYS_stat;
+      cb_syscall (cb, &sc);
+      break;
+    case CB_SYS_lstat64:
+      cb->stat_map = cb_linux_stat_map_64;
+      sc.func = TARGET_LINUX_SYS_lstat;
+      cb_syscall (cb, &sc);
+      break;
+    case CB_SYS_fstat64:
+      cb->stat_map = cb_linux_stat_map_64;
+      sc.func = TARGET_LINUX_SYS_fstat;
+      cb_syscall (cb, &sc);
+      break;
+
+    case CB_SYS_gettimeofday:
+      {
+	struct timeval tv;
+	struct timezone tz;
+	struct {
+	  uint32_t tv_sec;
+	  uint32_t tv_usec;
+	} target_tv;
+	struct {
+	  uint32_t tz_minuteswest;
+	  uint32_t tz_dsttime;
+	} target_tz;
+
+	sc.result = gettimeofday (&tv, &tz);
+
+	target_tv.tv_sec = tv.tv_sec;
+	target_tv.tv_usec = tv.tv_usec;
+	target_tz.tz_minuteswest = tz.tz_minuteswest;
+	target_tz.tz_dsttime = tz.tz_dsttime;
+
+	if (CCPU_GPR[0].u)
+	  sim_write (sd, CCPU_GPR[0].u, (const unsigned char *) &target_tv,
+		     sizeof (target_tv));
+	if (CCPU_GPR[1].u)
+	  sim_write (sd, CCPU_GPR[1].u, (const unsigned char *) &target_tz,
+		     sizeof (target_tz));
+      }
+      break;
+
+    /* glibc will try to use this, but we should reject it,
+       so getrlimit will be used instread.  */
+    case CB_SYS_ugetrlimit:
+      sc.result = -1;
+      sc.errcode = TARGET_ENOSYS;
+      break;
+
+    /*
+     * System calls used by Linux only.
+     */
+
+    case CB_SYS_brk:
+      sc.result = nds32_sys_brk (cpu, CCPU_GPR[0].u);
+      break;
+
+    case CB_SYS_ioctl:
+      sc.result = ioctl (CCPU_GPR[0].s, CCPU_GPR[1].s, CCPU_GPR[2].s);
+      break;
+
+    case CB_SYS_fcntl64:
+      sc.result = fcntl (CCPU_GPR[0].s, CCPU_GPR[1].s, CCPU_GPR[2].s);
+      break;
+
+    case CB_SYS_times:
+      {
+	struct tms tms;
+	struct {
+	  uint32_t tms_utime;
+	  uint32_t tms_stime;
+	  uint32_t tms_cutime;
+	  uint32_t tms_cstime;
+	} target_tms;
+
+	sc.result = times (&tms);
+	target_tms.tms_utime = tms.tms_utime;
+	target_tms.tms_stime = tms.tms_stime;
+	target_tms.tms_cutime = tms.tms_cutime;
+	target_tms.tms_cstime = tms.tms_cstime;
+
+	SIM_ASSERT (sizeof (target_tms) == 16);
+
+	if (CCPU_GPR[0].u)
+	  sim_write (sd, CCPU_GPR[0].u, (const unsigned char *) &target_tms,
+		     sizeof (target_tms));
+      }
+      break;
+
+    case CB_SYS_access:
+      {
+	char *path;
+
+	get_path (cb, &sc, CCPU_GPR[0].u, &path);
+	sc.result = access (path, CCPU_GPR[1].u);
+	free (path);
+      }
+      break;
+
+    case CB_SYS_link:
+      {
+	char *oldpath;
+	char *newpath;
+
+	get_path (cb, &sc, CCPU_GPR[0].u, &oldpath);
+	get_path (cb, &sc, CCPU_GPR[1].u, &newpath);
+
+	sc.result = link (oldpath, newpath);
+
+	free (oldpath);
+	free (newpath);
+      }
+      break;
+
+    case CB_SYS_uname:
+      {
+	struct utsname buf;
+
+	if ((sc.result = uname (&buf)) == 0 && CCPU_GPR[0].u)
+	  sim_write (sd, CCPU_GPR[0].u, (const unsigned char *) &buf,
+		     sizeof (buf));
+      }
+      break;
+
+    case CB_SYS_getpagesize:
+      sc.result = PAGE_SIZE;
+      break;
+
+    case CB_SYS_getuid32:
+      sc.result = getuid ();
+      break;
+
+    case CB_SYS_getgid32:
+      sc.result = getgid ();
+      break;
+
+    case CB_SYS_geteuid32:
+      sc.result = geteuid ();
+      break;
+
+    case CB_SYS_getegid32:
+      sc.result = getegid ();
+      break;
+
+    case CB_SYS_setuid32:
+      sc.result = setuid (CCPU_GPR[0].u);
+      break;
+
+    case CB_SYS_setgid32:
+      sc.result = setgid (CCPU_GPR[0].u);
+      break;
+
+    /* case CB_SYS_readv: */
+    case CB_SYS_writev:
+      {
+	/* ssize_t writev(int fd, const struct iovec *iov, int iovcnt); */
+	uint32_t iov_base = 0, iov_len = 0;
+	int fd = CCPU_GPR[0].u;
+	int piov = CCPU_GPR[1].u;
+	int iovcnt = CCPU_GPR[2].u;
+	int i;
+	int ret = 0;
+
+	if (fd < 0 || fd > MAX_CALLBACK_FDS || cb->fd_buddy[fd] < 0)
+	  {
+	    sc.result = -1;
+	    sc.errcode = TARGET_EBADF;
+	    break;
+	  }
+	fd = cb->fdmap[fd];
+
+	/* I'm not sure whether use write () to implement wrivev ()
+	   is better or not.  */
+	for (i = 0; i < iovcnt; i++)
+	  {
+	    /* Read the iov struct from target.  */
+	    sim_read (sd, piov + i * 8 /* sizeof (struct iovec) */,
+		      (unsigned char *) &iov_base, 4);
+	    sim_read (sd, piov + i * 8 + 4,
+		      (unsigned char *) &iov_len, 4);
+
+	    sc.func = TARGET_LINUX_SYS_write;
+	    sc.arg1 = fd;
+	    sc.arg2 = iov_base;
+	    sc.arg3 = iov_len;
+	    cb_syscall (cb, &sc);
+
+	    ret += sc.result;
+	    if (sc.result < 0)	/* on error */
+	      goto out;
+	    else if (sc.result != iov_len) /* fail to write whole buffer */
+	      break;
+	  }
+	sc.result = ret;
+      }
+      break;
+
+    case CB_SYS_mmap2:
+      {
+	uint32_t addr = CCPU_GPR[0].u;
+	size_t len = CCPU_GPR[1].s;
+	int prot = CCPU_GPR[2].s;
+	int flags = CCPU_GPR[3].s;
+	int fd = CCPU_GPR[4].s;
+	off_t pgoffset = CCPU_GPR[5].u;
+
+       /* void *mmap2 (void *addr, size_t length, int prot,
+		       int flags, int fd, off_t pgoffset);  */
+	sc.result = (long) nds32_mmap (cpu, addr, len, prot, flags, fd,
+				       pgoffset * PAGE_SIZE);
+      }
+      break;
+    case CB_SYS_munmap:
+      {
+	uint32_t addr = CCPU_GPR[0].u;
+	size_t len = CCPU_GPR[1].s;
+
+	sc.result = nds32_munmap (cpu, addr, len);
+      }
+      break;
+
+    case CB_SYS_setrlimit:
+      {
+	struct rlimit rlim;
+
+	/* int setrlimit(int resource, const struct rlimit *rlim); */
+	sim_read (sd, CCPU_GPR[1].u, (unsigned char *) &rlim, sizeof (rlim));
+	sc.result = setrlimit (CCPU_GPR[0].s, &rlim);
+      }
+      break;
+
+    case CB_SYS_getrlimit:
+      {
+	struct rlimit rlim;
+
+	/* int getrlimit(int resource, struct rlimit *rlim); */
+	sc.result = getrlimit (CCPU_GPR[0].s, &rlim);
+	if (sc.result >= 0)
+	  sim_write (sd, CCPU_GPR[1].u, (const unsigned char *) &rlim,
+		     sizeof (rlim));
+      }
+      break;
+
+    case CB_SYS_mprotect:
+      sc.result = 0; /* Just do nothing now. */
+      break;
+
+    case CB_SYS_NDS32_isatty:
+      sc.result = sim_io_isatty (sd, CCPU_GPR[0].s);
+      if (sc.result == -1)
+	sc.result = 0; /* -1 is returned if EBADF, but caller wants 0. */
+      break;
+
+    case CB_SYS_NDS32_getcmdline:
+      sc.result = CCPU_GPR[0].u;
+      sim_write (sd, CCPU_GPR[0].u, (unsigned char*)sd->cmdline,
+		 strlen (sd->cmdline) + 1);
+      break;
+
+    /* This is used by libgloss only.  */
+    case CB_SYS_NDS32_errno:
+      sc.result = sim_io_get_errno (sd);
+      break;
+    }
+
+out:
+  if (sc.result < 0)
+    {
+      /* cb_syscall should set this value.
+	 Otherwise, the syscall is not handled by it.  */
+      if (sc.errcode == 0)
+	sc.errcode = errno;
+
+      /* Our libgloss implementation uses SYS_NDS32_errno for `errno'.
+	 Syscalls per se only return -1 when fail.  */
+      if (cb->syscall_map == cb_nds32_libgloss_syscall_map)
+	CCPU_GPR[0].s = -1;
+      else /* cb_nds32_linux_syscall_map */
+	CCPU_GPR[0].s = -sc.errcode;
+    }
+  else
+    CCPU_GPR[0].s = sc.result;
+  return;
+}
diff --git a/sim/nds32/nds32-syscall.h b/sim/nds32/nds32-syscall.h
new file mode 100644
index 0000000..b6f25c5
--- /dev/null
+++ b/sim/nds32/nds32-syscall.h
@@ -0,0 +1,87 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NDS32_SYSCALL_H_
+#define _NDS32_SYSCALL_H_
+#include "gdb/callback.h"
+
+/* Check
+	   gdb: include/gdb/callback.h
+	kernel: arch/nds32/include/asm/unistd.h
+	newlib: libgloss/nds32/syscall.h
+   for details.  */
+
+#define CB_SYS_BASE		0x1000
+#define CB_SYS_link		(CB_SYS_BASE + 9)
+#define CB_SYS_access		(CB_SYS_BASE + 33)
+#define CB_SYS_times		(CB_SYS_BASE + 43)
+#define CB_SYS_brk		(CB_SYS_BASE + 45)
+#define CB_SYS_ioctl		(CB_SYS_BASE + 54)
+#define CB_SYS_setrlimit	(CB_SYS_BASE + 75)
+#define CB_SYS_getrlimit	(CB_SYS_BASE + 76)
+#define CB_SYS_gettimeofday	(CB_SYS_BASE + 78)
+#define CB_SYS_settimeofday	(CB_SYS_BASE + 79)
+#define CB_SYS_mmap		(CB_SYS_BASE + 90)
+#define CB_SYS_munmap		(CB_SYS_BASE + 91)
+#define CB_SYS_uname		(CB_SYS_BASE + 122)
+#define CB_SYS_mprotect		(CB_SYS_BASE + 125)
+#define CB_SYS_llseek		(CB_SYS_BASE + 140)
+#define CB_SYS_readv		(CB_SYS_BASE + 145)
+#define CB_SYS_writev		(CB_SYS_BASE + 146)
+#define CB_SYS_getpagesize	(CB_SYS_BASE + 166)
+#define CB_SYS_sigaction	(CB_SYS_BASE + 174)
+#define CB_SYS_ugetrlimit	(CB_SYS_BASE + 191)
+#define CB_SYS_mmap2		(CB_SYS_BASE + 192)
+#define CB_SYS_stat64		(CB_SYS_BASE + 195)
+#define CB_SYS_lstat64		(CB_SYS_BASE + 196)
+#define CB_SYS_fstat64		(CB_SYS_BASE + 197)
+#define CB_SYS_getuid32		(CB_SYS_BASE + 199)
+#define CB_SYS_getgid32		(CB_SYS_BASE + 200)
+#define CB_SYS_geteuid32	(CB_SYS_BASE + 201)
+#define CB_SYS_getegid32	(CB_SYS_BASE + 202)
+#define CB_SYS_setuid32		(CB_SYS_BASE + 213)
+#define CB_SYS_setgid32		(CB_SYS_BASE + 214)
+#define CB_SYS_fcntl64		(CB_SYS_BASE + 221)
+#define CB_SYS_exit_group	(CB_SYS_BASE + 248)
+
+#define CB_SYS_NDS32_isatty	(CB_SYS_BASE + 0x202)
+#define CB_SYS_NDS32_errno	(CB_SYS_BASE + 0x203)
+#define CB_SYS_NDS32_getcmdline	(CB_SYS_BASE + 0x204)
+
+#if 0
+  /* More standard syscalls.  */
+  {CB_SYS_lstat,	19},
+  {CB_SYS_truncate,	21},
+  {CB_SYS_ftruncate,	22},
+  {CB_SYS_pipe,		23},
+#endif
+
+#define LINUX_SYS_BASE		0x5000
+#define TARGET_LINUX_SYS_write	(LINUX_SYS_BASE + 4)
+#define TARGET_LINUX_SYS_lseek	(LINUX_SYS_BASE + 19)
+#define TARGET_LINUX_SYS_stat	(LINUX_SYS_BASE + 106)
+#define TARGET_LINUX_SYS_lstat	(LINUX_SYS_BASE + 107)
+#define TARGET_LINUX_SYS_fstat	(LINUX_SYS_BASE + 108)
+
+void nds32_syscall (sim_cpu *cpu, int swid, sim_cia cia);
+
+extern CB_TARGET_DEFS_MAP cb_nds32_libgloss_syscall_map[];
+extern CB_TARGET_DEFS_MAP cb_nds32_linux_syscall_map[];
+#endif
diff --git a/sim/nds32/rbtree.c b/sim/nds32/rbtree.c
new file mode 100644
index 0000000..cc68559
--- /dev/null
+++ b/sim/nds32/rbtree.c
@@ -0,0 +1,547 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "rbtree.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include <unistd.h>
+
+struct rbnode rbnode_null = RB_NULL_INIT;
+static rbnode_t rbnode_allocate_node (void *key);
+static void rbtree_insert_fixup (rbtree_t tree, rbnode_t node);
+static void rbtree_delete_fixup (rbtree_t tree, rbnode_t node);
+static void rbtree_free_node (rbnode_t node);
+
+/* Is an empty red-black tree?  */
+
+int
+rbtree_isnull (rbnode_t node)
+{
+  return (node == RB_NULL);
+}
+
+/* Validate the tree.  */
+
+void
+rbtree_assert (int exp, const char *fmt, ...)
+{
+  va_list ap;
+
+  if (exp)
+    return;
+
+  va_start (ap, fmt);
+  vfprintf (stderr, fmt, ap);
+  _exit (1);
+  va_end (ap);
+}
+
+/* Create and allocate a red-black tree.  */
+
+rbtree_t
+rbtree_create_tree (rbcmp_ftype fcmp, rbtrav_ftype ftrav)
+{
+  rbtree_t tree;
+
+  tree = (rbtree_t) calloc (1, sizeof (*tree));
+  tree->root = RB_NULL;
+  tree->rbcmp = fcmp;
+  tree->rbtrav = ftrav;
+
+  return tree;
+}
+
+static void
+rbtree_destroy_tree_internal (rbtree_t tree, rbnode_t node)
+{
+  if (node == RB_NULL)
+    return;
+
+  rbtree_destroy_tree_internal (tree, node->left);
+  rbtree_destroy_tree_internal (tree, node->right);
+  rbtree_free_node (node);
+}
+
+/* Destroy and free a red-black tree.  */
+
+void
+rbtree_destroy_tree (rbtree_t tree)
+{
+  rbtree_destroy_tree_internal (tree, tree->root);
+
+  memset (tree, 0, sizeof (*tree));
+  free (tree);
+}
+
+/* Allocate a tree node.  */
+
+static rbnode_t
+rbnode_allocate_node (void *key)
+{
+  rbnode_t node;
+  node = (rbnode_t) calloc (1, sizeof (struct rbnode));
+  node->key = key;
+  node->left = node->right = RB_NULL;
+  node->color = RB_RED;
+
+  return node;
+}
+
+/* Free a tree node.  */
+
+static void
+rbtree_free_node (rbnode_t node)
+{
+  memset (node, 0, sizeof (*node));
+  free (node);
+}
+
+/* Given a tree node, return its root node.  */
+
+rbnode_t
+rbtree_root (rbnode_t node)
+{
+  while (node->parent)
+    node = node->parent;
+
+  return node;
+}
+
+void
+rbtree_rotate_left (rbtree_t tree, rbnode_t node)
+{
+  rbnode_t p, x, y;
+
+  RBTREE_ASSERT (node->right != RB_NULL);
+
+  x = node;
+  y = node->right;
+  p = node->parent;
+
+  y->parent = x->parent;
+
+  x->right = y->left;
+  y->left->parent = x;
+  y->left = x;
+  x->parent = y;
+
+  if (p == NULL)
+    tree->root = y;
+  else if (p->left == x)
+    p->left = y;
+  else
+    p->right = y;
+}
+
+void
+rbtree_rotate_right (rbtree_t tree, rbnode_t node)
+{
+  rbnode_t p, x, y;
+
+  RBTREE_ASSERT (node->left != RB_NULL);
+
+  x = node;
+  y = node->left;
+  p = node->parent;
+
+  y->parent = x->parent;
+
+  x->left = y->right;
+  y->right->parent = x;
+  y->right = x;
+  x->parent = y;
+
+  if (p == NULL)
+    tree->root = y;
+  else if (p->right == x)
+    p->right = y;
+  else
+    p->left = y;
+}
+
+static void
+rbtree_insert_fixup (rbtree_t tree, rbnode_t node)
+{
+  rbnode_t z, y, p, r, t;
+
+  /* -----------------------
+		r
+	      /   \
+    (parent) p    y (uncle)
+	    / \
+	       z (node)
+     ------------------------ */
+  z = node;
+  while ((p = z->parent) && (r = p->parent) && p->color == RB_RED)
+    {
+      if (p == r->left)
+	{
+	  y = r->right;
+
+	  /* case 1 */
+	  if (y->color == RB_RED)
+	    {
+	      p->color = RB_BLACK;
+	      y->color = RB_BLACK;
+	      r->color = RB_RED;
+	      z = r;
+	    }
+	  else
+	    {
+	      /* case 2 */
+	      if (z == p->right)
+		{
+		  rbtree_rotate_left (tree, p);
+		  z = p;
+		  p = z->parent;
+		}
+	      /* case 3 */
+	      rbtree_rotate_right (tree, r);
+	      p->color = RB_BLACK;
+	      r->color = RB_RED;
+	    }
+	}
+      else
+	{
+	  y = r->left;
+
+	  /* case 1 */
+	  if (y->color == RB_RED)
+	    {
+	      p->color = RB_BLACK;
+	      y->color = RB_BLACK;
+	      r->color = RB_RED;
+	      z = r;
+	    }
+	  else
+	    {
+	      /* case 2 */
+	      if (z == p->left)
+		{
+		  rbtree_rotate_right (tree, p);
+		  z = p;
+		  p = z->parent;
+		}
+	      /* case 3 */
+	      rbtree_rotate_left (tree, r);
+	      p->color = RB_BLACK;
+	      r->color = RB_RED;
+	    }
+	}
+    }
+  tree->root->color = RB_BLACK;
+}
+
+/* Insert a tree node of KEY.  */
+
+void
+rbtree_insert (rbtree_t tree, void *key)
+{
+  int cmp;
+  rbnode_t prev = NULL;
+  rbnode_t new_node;
+  rbnode_t node = tree->root;
+
+  new_node = rbnode_allocate_node (key);
+
+  while (node != RB_NULL)
+    {
+      cmp = tree->rbcmp (key, node->key);
+
+      prev = node;
+      if (cmp < 0) /* less than */
+	node = node->left;
+      else
+	node = node->right;
+    }
+
+  if (prev == NULL)
+    {
+      tree->root = new_node;
+      return;
+    }
+
+  new_node->parent = prev;
+  if (cmp < 0)
+    prev->left = new_node;
+  else
+    prev->right = new_node;
+
+  rbtree_insert_fixup (tree, new_node);
+  return;
+}
+
+/* Find a tree node of KEY.  */
+
+rbnode_t
+rbtree_find (rbtree_t tree, void *key)
+{
+  int cmp;
+  rbnode_t node = tree->root;
+
+  while (node != RB_NULL)
+    {
+      cmp = tree->rbcmp (key, node->key);
+
+      if (cmp == 0)
+	return node;
+      else if (cmp < 0)
+	node = node->left;
+      else
+	node = node->right;
+    }
+
+  return NULL;
+}
+
+static void
+rbtree_delete_fixup (rbtree_t tree, rbnode_t node)
+{
+  /* -----------------------
+		 p
+	       /   \
+	      x     w
+	     / \   / \
+		  C   E
+     ------------------------ */
+
+  rbnode_t x, w;
+
+  x = node;
+
+
+  while (x != tree->root && x->color == RB_BLACK)
+    {
+      if (x->parent->left == x)
+	{
+	  w = x->parent->right;
+	  if (w->color == RB_RED)
+	    {
+	      /* Case 1.  */
+	      x->parent->color = RB_RED;
+	      w->color = RB_BLACK;
+	      rbtree_rotate_left (tree, x->parent);
+	    }
+	  else if (w->left->color == RB_BLACK
+		   && w->right->color == RB_BLACK)
+	    {
+	      w->color = RB_RED;
+	      x = x->parent;
+	    }
+	  else
+	    {
+	      if (w->right->color == RB_BLACK)
+		{
+		  w->left->color = RB_BLACK;
+		  w->color = RB_RED;
+		  rbtree_rotate_right (tree, w);
+		  w = w->parent;
+		}
+	      w->color = w->parent->color;
+	      w->parent->color = RB_BLACK;
+	      w->right->color = RB_BLACK;
+	      rbtree_rotate_left (tree, w->parent);
+	      x = tree->root;
+	    }
+	}
+      else
+	{
+	  w = x->parent->left;
+	  if (w->color == RB_RED)
+	    {
+	      /* Case 1.  */
+	      x->parent->color = RB_RED;
+	      w->color = RB_BLACK;
+	      rbtree_rotate_right (tree, x->parent);
+	    }
+	  else if (w->right->color == RB_BLACK
+		   && w->left->color == RB_BLACK)
+	    {
+	      w->color = RB_RED;
+	      x = x->parent;
+	    }
+	  else
+	    {
+	      if (w->left->color == RB_BLACK)
+		{
+		  w->right->color = RB_BLACK;
+		  w->color = RB_RED;
+		  rbtree_rotate_left (tree, w);
+		  w = w->parent;
+		}
+	      w->color = w->parent->color;
+	      w->parent->color = RB_BLACK;
+	      w->left->color = RB_BLACK;
+	      rbtree_rotate_right (tree, w->parent);
+	      x = tree->root;
+	    }
+	}
+    }
+
+  x->color = RB_BLACK;
+}
+
+void
+rbtree_delete_node (rbtree_t tree, rbnode_t node)
+{
+  /* Case 1. z has no child - just remove it.  */
+  /* Case 2. z has only one child - splice out z.  */
+  /* case 3. z has two children - splice out its successor y and replce z.  */
+
+  rbnode_t z = node;
+  rbnode_t p = NULL;
+  rbnode_t x = NULL;
+  rbnode_t y = NULL;
+
+  /* z - The node to be deleted.
+     y - The node used to replace z.
+     x - The child of y to be re-parent.  */
+
+  if (z->left != RB_NULL && z->right != RB_NULL)
+    {
+      RBTREE_ASSERT (rbtree_minimum (z->right) == rbtree_successor (z));
+      y = rbtree_minimum (z->right);
+    }
+  else
+    y = z;
+
+  if (y->left != RB_NULL)
+    x = y->left;
+  else
+    x = y->right;
+
+  /* if (x != RB_NULL) */
+    x->parent = y->parent;
+
+  if ((p = y->parent) != NULL)
+    {
+      if (p->left == y)
+	p->left = x;
+      else
+	p->right = x;
+
+      if (x != RB_NULL)
+	x->parent = p;
+    }
+  else
+    {
+      tree->root = x;
+      x->parent = NULL;
+    }
+
+  if (z != y)
+    z->key = y->key;
+
+  if (y->color == RB_BLACK)
+    rbtree_delete_fixup (tree, x);
+
+  rbtree_free_node (y);
+}
+
+void
+rbtree_delete (rbtree_t tree, void *key)
+{
+  rbnode_t node = rbtree_find (tree, key);
+
+  if (node == NULL)
+    return;
+
+  rbtree_delete_node (tree, node);
+}
+
+/* Black hight of NODE.  */
+
+int
+rbtree_bh (rbnode_t node)
+{
+  int lbh, rbh;
+
+  if (node == RB_NULL)
+    return 1;
+
+  lbh = rbtree_bh (node->left);
+  rbh = rbtree_bh (node->right);
+
+  RBTREE_ASSERT (lbh == rbh);
+
+  return lbh + (node->color == RB_BLACK ? 1 : 0);
+}
+
+/* Traverse the tree and RBTRAV is call for each node with ARG.  */
+
+void
+rbtree_traverse_node (rbtree_t tree, rbnode_t node, rbtrav_ftype rbtrav,
+		      void *arg)
+{
+  if (node == RB_NULL)
+    return;
+
+  rbtrav (tree, node, arg);
+  rbtree_traverse_node (tree, node->left, rbtrav, arg);
+  rbtree_traverse_node (tree, node->right, rbtrav, arg);
+}
+
+rbnode_t rbtree_minimum (rbnode_t node)
+{
+  while (node->left != RB_NULL)
+    node = node->left;
+  return node;
+}
+
+rbnode_t rbtree_maximum (rbnode_t node)
+{
+  while (node->right != RB_NULL)
+    node = node->right;
+  return node;
+}
+
+/* Traverse tree with default RBTRAV callback.  */
+
+void
+rbtree_traverse (rbtree_t tree, void *arg)
+{
+  rbtree_traverse_node (tree, tree->root, tree->rbtrav, arg);
+}
+
+rbnode_t
+rbtree_successor (rbnode_t node)
+{
+  if (node->right != RB_NULL)
+    return rbtree_minimum (node->right);
+
+  while (node->parent != NULL && node->parent->right == node)
+    node = node->parent;
+  return node->parent;
+}
+
+rbnode_t
+rbtree_predecessor (rbnode_t node)
+{
+  if (node->left != RB_NULL)
+    return rbtree_maximum (node->left);
+
+  while (node->parent != NULL && node->parent->left == node)
+    node = node->parent;
+  return node->parent;
+}
diff --git a/sim/nds32/rbtree.h b/sim/nds32/rbtree.h
new file mode 100644
index 0000000..7903348
--- /dev/null
+++ b/sim/nds32/rbtree.h
@@ -0,0 +1,96 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef RBTREE_H
+#define RBTREE_H
+
+#include <stdio.h>
+
+enum RB_COLOR
+{
+  RB_BLACK = 0xabcd,
+  RB_RED = 0xdcba,
+};
+
+enum RB_ERROR
+{
+  RBE_PASS,
+  RBE_RULE1	= 0x81000000,
+  RBE_RULE2	= 0x82000000,
+  RBE_RULE3	= 0x84000000,
+  RBE_RULE4	= 0x88000000,
+  RBE_RULE5	= 0x90000000,
+  RBE_CONSIST	= 0xa0000000,
+  RBE_MASK	= 0xff000000,
+};
+
+typedef struct rbnode *rbnode_t;
+typedef struct rbtree *rbtree_t;
+
+typedef int (*rbcmp_ftype) (void *lhs, void *rhs);
+typedef void (*rbtrav_ftype) (rbtree_t tree, rbnode_t node, void *arg);
+
+struct rbnode
+{
+  struct rbnode *parent;
+  struct rbnode *left;
+  struct rbnode *right;
+  enum RB_COLOR color;
+  void *key;
+};
+
+#define RB_NULL_INIT {NULL, NULL, NULL, RB_BLACK}
+
+struct rbtree
+{
+  struct rbnode *root;
+  rbcmp_ftype rbcmp;
+  rbtrav_ftype rbtrav;
+};
+
+rbtree_t rbtree_create_tree (rbcmp_ftype fcmp, rbtrav_ftype ftrav);
+rbnode_t rbtree_root (rbnode_t node);
+int rbtree_bh (rbnode_t node);
+void rbtree_destroy_tree (rbtree_t tree);
+void rbtree_insert (rbtree_t tree, void *key);
+void rbtree_delete_node (rbtree_t tree, rbnode_t node);
+void rbtree_delete (rbtree_t tree, void *key);
+void rbtree_traverse (rbtree_t tree, void *arg);
+void rbtree_traverse_node (rbtree_t tree, rbnode_t node, rbtrav_ftype ftrav,
+			   void *arg);
+rbnode_t rbtree_find (rbtree_t tree, void *key);
+void rbtree_rotate_left (rbtree_t tree, rbnode_t node);
+void rbtree_rotate_right (rbtree_t tree, rbnode_t node);
+void rbtree_assert (int exp, const char *fmt, ...);
+int rbtree_isnull (rbnode_t node);
+int rbtree_verify (rbtree_t tree, FILE *fp);
+rbnode_t rbtree_minimum (rbnode_t node);
+rbnode_t rbtree_maximum (rbnode_t node);
+rbnode_t rbtree_successor (rbnode_t node);
+rbnode_t rbtree_predecessor (rbnode_t node);
+
+#define RBTREE_ASSERT(exp)	rbtree_assert (exp, #exp "\n")
+
+extern struct rbnode rbnode_null;
+#define RB_NULL	(&rbnode_null)
+
+/* extern struct rbnode rbnode_null; */
+
+#endif
diff --git a/sim/nds32/sim-main.h b/sim/nds32/sim-main.h
new file mode 100644
index 0000000..83337df
--- /dev/null
+++ b/sim/nds32/sim-main.h
@@ -0,0 +1,112 @@
+/* Simulator for NDS32 processors.
+
+   Copyright (C) 2011-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of simulators.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NDS32_MAIN_SIM_H_
+#define _NDS32_MAIN_SIM_H_
+
+#include <stdint.h>
+
+#include "sim-basics.h"
+#include "sim-signal.h"
+
+typedef struct _sim_cpu SIM_CPU; /* sim-reg.c needs this. */
+typedef unsigned32 sim_cia;
+
+#define CIA_GET(cpu)     CPU_PC_GET (cpu)
+#define CIA_SET(cpu,val) CPU_PC_SET ((cpu), (val))
+
+#include "sim-base.h"
+
+#include "nds32-mm.h"
+
+typedef union {
+  uint32_t u;
+  int32_t s;
+} reg_t;
+
+enum nds32_internal_flags
+{
+  /* Set NIF_EX9 to indicate the instructions is executed in ITB.
+     JAL and J work differently in ITB.  */
+  NIF_EX9 = 1,
+  /* Set NIF_BRANCH to indicate an branch is taken.  */
+  NIF_BRANCH = 2,
+};
+
+struct _sim_cpu {
+  /* 32 general purpose registers. */
+  reg_t reg_gpr[32];
+#define CCPU_GPR	(cpu->reg_gpr)
+
+  /* User registers. 32 group x 32 USR */
+  reg_t reg_usr[32 * 32];
+#define CCPU_USR	(cpu->reg_usr)
+
+  /* System registers.  Major x Minor x Ext */
+  reg_t reg_sr[8 * 16 * 8];
+#define CCPU_SR		(cpu->reg_sr)
+
+  /* Floating-point registers. 32 single union 32 double. FIXME */
+  reg_t reg_fpr[64];
+#define CCPU_FPR	(cpu->reg_fpr)
+
+  enum nds32_internal_flags iflags;
+  /* If NIF_BRANCH, this is the destination address.  */
+  SIM_ADDR	baddr;
+
+  sim_cpu_base base;
+};
+
+struct sim_state {
+  sim_cpu *cpu[MAX_NR_PROCESSORS];
+#if (WITH_SMP)
+#define STATE_CPU(sd,n) ((sd)->cpu[n])
+#else
+#define STATE_CPU(sd,n) ((sd)->cpu[0])
+#endif
+#define STATE_BOARD_DATA(sd) (&(sd)->board)
+
+  char cmdline[256];		/* cmdline buffer for -mcrt-arg hacking. */
+
+  struct bfd *interp_bfd;	/* For Linux dynamic linker.  */
+  uint32_t interp_base;		/* Base address of where interp is loaded. */
+  uint32_t exec_base;		/* Base address of where executable is loaded. */
+  /* If the same memory-region is already attached (registered) to sim-core,
+     the program just crashs. Unfortunately, we have no way to know whether
+     the region is attached or not, so I use `mem_attached' to bookkeep it.
+
+     MEMOPT provides `memory-delete all' command to delete all the mappings,
+     but if sim_core_attach is used in order to attach device_io, then
+     there is no way to detach all. */
+  int mem_attached;
+
+  struct nds32_mm mm;
+#define STATE_MM(sd) (&(sd)->mm)
+
+  int gprof;
+
+  sim_state_base base;
+};
+
+#include "sim-engine.h"
+#include "sim-options.h"
+#include "run-sim.h"
+
+#endif
diff --git a/sim/nds32/tconfig.in b/sim/nds32/tconfig.in
new file mode 100644
index 0000000..9fd14f4
--- /dev/null
+++ b/sim/nds32/tconfig.in
@@ -0,0 +1,12 @@
+#ifndef NDS32_TCONFIG_H
+#define NDS32_TCONFIG_H
+
+/* nds32 target configuration file.  */
+
+/* See sim-hload.c.  */
+#define SIM_HANDLES_LMA 1
+
+/* Fox Linux VMA support.  */
+#define WITH_DEVICES 1
+
+#endif



More information about the Gdb-patches mailing list