From 0930680cb96213cbe195d8f5a11516d80f80e88d Mon Sep 17 00:00:00 2001
From: KazushiKawamura <kazushi.kawamura@togawa.cs.waseda.ac.jp>
Date: Sun, 26 Aug 2018 00:01:12 +0900
Subject: [PATCH] Address boardstr issue

---
 hls_2018/router_03_boardstr/Makefile          |   20 +
 hls_2018/router_03_boardstr/Makefile.cygwin   |   14 +
 hls_2018/router_03_boardstr/ap_int.h          |  521 ++
 .../router_03_boardstr/etc/ap_fixed_sim.h     | 2451 +++++++
 hls_2018/router_03_boardstr/etc/ap_int_sim.h  | 1629 +++++
 hls_2018/router_03_boardstr/etc/ap_private.h  | 5858 +++++++++++++++++
 hls_2018/router_03_boardstr/main.cpp          |   99 +
 hls_2018/router_03_boardstr/router.cpp        |  518 ++
 hls_2018/router_03_boardstr/router.hpp        |   56 +
 9 files changed, 11166 insertions(+)
 create mode 100755 hls_2018/router_03_boardstr/Makefile
 create mode 100755 hls_2018/router_03_boardstr/Makefile.cygwin
 create mode 100755 hls_2018/router_03_boardstr/ap_int.h
 create mode 100755 hls_2018/router_03_boardstr/etc/ap_fixed_sim.h
 create mode 100755 hls_2018/router_03_boardstr/etc/ap_int_sim.h
 create mode 100755 hls_2018/router_03_boardstr/etc/ap_private.h
 create mode 100755 hls_2018/router_03_boardstr/main.cpp
 create mode 100755 hls_2018/router_03_boardstr/router.cpp
 create mode 100755 hls_2018/router_03_boardstr/router.hpp
diff --git a/hls_2018/router_03_boardstr/Makefile b/hls_2018/router_03_boardstr/Makefile
new file mode 100755
index 0000000..8f79e7a
--- /dev/null
+++ b/hls_2018/router_03_boardstr/Makefile
@@ -0,0 +1,20 @@
+TARGET = sim
+OBJS = $(CPPS:.cpp=.o)
+CPPS = $(wildcard *.cpp)
+CXX = g++
+CXXFLAGS = -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label -DSOFTWARE -DCALCTIME
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS)
+	$(CXX) -O3 -o $@ $(OBJS)
+
+run:
+	python3 ../NLGenerator.py -x 20 -y 20 -z 6 -l 100;\
+	python3 ./gen_boardstr.py Q-20x20x5_100_10.txt |\
+	./$(TARGET) -
+
+
+clean:
+	rm *.o
+	rm $(TARGET)
diff --git a/hls_2018/router_03_boardstr/Makefile.cygwin b/hls_2018/router_03_boardstr/Makefile.cygwin
new file mode 100755
index 0000000..866fdcd
--- /dev/null
+++ b/hls_2018/router_03_boardstr/Makefile.cygwin
@@ -0,0 +1,14 @@
+TARGET = sim
+OBJS = $(CPPS:.cpp=.o)
+CPPS = $(wildcard *.cpp)
+CXX = g++
+CXXFLAGS = -O3 -Wall -Wno-unknown-pragmas -Wno-unused-label -DSOFTWARE -DCALCTIME
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS)
+	$(CXX) -O3 -Wl,--stack,33554432 -o $@ $(OBJS)
+
+clean:
+	rm *.o
+	rm $(TARGET)
diff --git a/hls_2018/router_03_boardstr/ap_int.h b/hls_2018/router_03_boardstr/ap_int.h
new file mode 100755
index 0000000..b8d9fdc
--- /dev/null
+++ b/hls_2018/router_03_boardstr/ap_int.h
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2012 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __AESL_AP_SIM_H__
+#define __AESL_AP_SIM_H__
+
+#ifndef __cplusplus
+#error C++ is required to include this header file
+#else
+
+#include "etc/ap_int_sim.h"
+#include "etc/ap_fixed_sim.h"
+
+//Forward declaration
+template<int _AP_W, int _AP_I, ap_q_mode _AP_Q,
+         ap_o_mode _AP_O, int _AP_N> class ap_fixed;
+template<int _AP_W, int _AP_I, ap_q_mode _AP_Q,
+         ap_o_mode _AP_O, int _AP_N> class ap_ufixed;
+template<int _AP_W> class ap_int;
+template<int _AP_W> class ap_uint;
+
+//AP_INT
+//--------------------------------------------------------
+template<int _AP_W>
+class ap_int : public ap_private<_AP_W, true> {
+#ifdef _MSC_VER
+#pragma warning(disable: 4521 4522)
+#endif /* #ifdef _MSC_VER */
+public:
+    typedef ap_private<_AP_W, true> Base;
+
+    //Constructor
+    INLINE ap_int(): Base() {}
+    template<int _AP_W2>
+    INLINE ap_int(const volatile ap_int<_AP_W2> &op):Base((const ap_private<_AP_W2,true> &)(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_int(const ap_int<_AP_W2> &op):Base((const ap_private<_AP_W2,true> &)(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_int(const ap_uint<_AP_W2> &op):Base((const ap_private<_AP_W2,false> &)(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_int(const volatile ap_uint<_AP_W2> &op):Base((const ap_private<_AP_W2,false> &)(op)) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_int(const ap_range_ref<_AP_W2, _AP_S2>& ref):Base(ref) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_int(const ap_bit_ref<_AP_W2, _AP_S2>& ref):Base(ref) {}
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_int(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref):Base(ref) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                 :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                  :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                 :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                  :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_int(const ap_private<_AP_W2, _AP_S2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+                  _AP_N2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+                  _AP_N2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_int(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op):Base(op.to_ap_private()) {}
+
+#define CTOR(TYPE) \
+    INLINE ap_int(TYPE v):Base(v) {}
+    CTOR(bool)
+    CTOR(signed char)
+    CTOR(unsigned char)
+    CTOR(short)
+    CTOR(unsigned short)
+    CTOR(int)
+    CTOR(unsigned int)
+    CTOR(long)
+    CTOR(unsigned long)
+    CTOR(unsigned long long)
+    CTOR(long long)
+    CTOR(float)
+    CTOR(double)
+    CTOR(const char*)
+    CTOR(const std::string&)
+#undef CTOR
+    INLINE ap_int(const char* str, signed char rd):Base(str, rd) {}
+     //Assignment
+     //Another form of "write"
+    INLINE void operator = (const ap_int<_AP_W>& op2) volatile {
+      const_cast<ap_int*>(this)->operator = (op2);
+    }
+
+    INLINE void operator = (const volatile ap_int<_AP_W>& op2) volatile {
+      const_cast<Base*>(this)->operator = (op2);
+    }
+
+    INLINE ap_int<_AP_W>& operator = (const volatile ap_int<_AP_W>& op2) {
+      Base::operator = (const_cast<ap_int<_AP_W>& >(op2));
+        return *this;
+    }
+
+    INLINE ap_int<_AP_W>& operator = (const ap_int<_AP_W>& op2) {
+      Base::operator = ((const ap_private<_AP_W, true>&)op2);
+        return *this;
+    }
+};
+
+//AP_UINT
+//---------------------------------------------------------------
+template<int _AP_W>
+class ap_uint: public ap_private<_AP_W, false> {
+#ifdef _MSC_VER
+#pragma warning( disable : 4521 4522 )
+#endif
+public:
+    typedef ap_private<_AP_W, false> Base;
+    //Constructor
+    INLINE ap_uint(): Base() {}
+  INLINE ap_uint(const ap_uint<_AP_W>& op) :Base(dynamic_cast<const ap_private<_AP_W, false>&>(op)) {}
+  INLINE ap_uint(const volatile ap_uint<_AP_W>& op):Base(dynamic_cast<const volatile ap_private<_AP_W, false>&>(op)){}
+    template<int _AP_W2>
+    INLINE ap_uint(const volatile ap_uint<_AP_W2> &op):Base((const ap_private<_AP_W2, false>&)(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_uint(const ap_uint<_AP_W2> &op) : Base((const ap_private<_AP_W2, false>&)(op)){}
+
+    template<int _AP_W2>
+    INLINE ap_uint(const ap_int<_AP_W2> &op) : Base((const ap_private<_AP_W2, true>&)(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_uint(const volatile ap_int<_AP_W2> &op) : Base((const ap_private<_AP_W2, false>&)(op)) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_uint(const ap_range_ref<_AP_W2, _AP_S2>& ref):Base(ref) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_uint(const ap_bit_ref<_AP_W2, _AP_S2>& ref):Base(ref) {}
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_uint(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& ref):Base(ref) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                   :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                   :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                   :Base(op.to_ap_private()) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2, _AP_N2>& op)
+                   :Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_uint(const ap_private<_AP_W2, _AP_S2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+                  _AP_N2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+                  _AP_N2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_uint(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op):Base(op.to_ap_private()) {}
+
+#define CTOR(TYPE) \
+    INLINE ap_uint(TYPE v):Base(v) {}
+    CTOR(bool)
+    CTOR(signed char)
+    CTOR(unsigned char)
+    CTOR(short)
+    CTOR(unsigned short)
+    CTOR(int)
+    CTOR(unsigned int)
+    CTOR(long)
+    CTOR(unsigned long)
+    CTOR(unsigned long long)
+    CTOR(long long)
+    CTOR(float)
+    CTOR(double)
+    CTOR(const char*)
+    CTOR(const std::string&)
+#undef CTOR
+    INLINE ap_uint(const char* str, signed char rd):Base(str, rd) {}
+     //Assignment
+     //Another form of "write"
+    INLINE void operator = (const ap_uint<_AP_W>& op2) volatile {
+      Base::operator = (op2);
+    }
+
+    INLINE void operator = (const volatile ap_uint<_AP_W>& op2) volatile {
+      Base::operator = (op2);
+    }
+
+    INLINE ap_uint<_AP_W>& operator = (const volatile ap_uint<_AP_W>& op2) {
+      Base::operator = (op2);
+        return *this;
+    }
+
+    INLINE ap_uint<_AP_W>& operator = (const ap_uint<_AP_W>& op2) {
+      Base::operator = ((const ap_private<_AP_W, false>&)(op2));
+        return *this;
+    }
+};
+
+#define ap_bigint ap_int
+#define ap_biguint ap_uint
+
+//AP_FIXED
+//---------------------------------------------------------------------
+template<int _AP_W, int _AP_I, ap_q_mode _AP_Q = AP_TRN,
+         ap_o_mode _AP_O = AP_WRAP, int _AP_N = 0>
+class ap_fixed: public ap_fixed_base<_AP_W, _AP_I, true, _AP_Q, _AP_O, _AP_N> {
+#ifdef _MSC_VER
+#pragma warning( disable : 4521 4522 )
+#endif
+public:
+    typedef ap_fixed_base<_AP_W, _AP_I, true, _AP_Q, _AP_O, _AP_N> Base;
+     //Constructor
+    INLINE ap_fixed():Base() {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const ap_fixed<_AP_W2, _AP_I2,  _AP_Q2, _AP_O2,
+                    _AP_N2>& op): Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2,
+                    _AP_N2>& op): Base(ap_fixed_base<_AP_W2, _AP_I2,
+                   false, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_fixed(const ap_int<_AP_W2>& op):
+                   Base(ap_private<_AP_W2, true>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_fixed(const ap_uint<_AP_W2>& op):Base(ap_private<_AP_W2, false>(op)) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+            ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const volatile ap_fixed<_AP_W2, _AP_I2,  _AP_Q2, _AP_O2,
+                    _AP_N2>& op): Base(ap_fixed_base<_AP_W2, _AP_I2,
+                    true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const volatile ap_ufixed<_AP_W2, _AP_I2, _AP_Q2, _AP_O2,
+                    _AP_N2>& op): Base(ap_fixed_base<_AP_W2, _AP_I2,
+                   false, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_fixed(const volatile ap_int<_AP_W2>& op):
+                   Base(ap_private<_AP_W2, true>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_fixed(const volatile ap_uint<_AP_W2>& op):Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op):Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_fixed(const ap_bit_ref<_AP_W2, _AP_S2>& op):
+                   Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_fixed(const ap_range_ref<_AP_W2, _AP_S2>& op):
+                   Base(op) {}
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_fixed(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op):
+                   Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op): Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed(const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op): Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_fixed(const ap_private<_AP_W2, _AP_S2>& op):Base(op) {}
+
+ #define CTOR(TYPE) \
+    INLINE ap_fixed(TYPE v):Base(v) {}
+    CTOR(bool)
+    CTOR(signed char)
+    CTOR(unsigned char)
+    CTOR(short)
+    CTOR(unsigned short)
+    CTOR(int)
+    CTOR(unsigned int)
+    CTOR(long)
+    CTOR(unsigned long)
+    CTOR(unsigned long long)
+    CTOR(long long)
+    CTOR(float)
+    CTOR(double)
+    CTOR(const char*)
+    CTOR(const std::string&)
+#undef CTOR
+    INLINE ap_fixed(const char* str, signed char rd):Base(str, rd) {}
+
+    //Assignment
+    INLINE ap_fixed& operator = (const ap_fixed<_AP_W, _AP_I,
+                    _AP_Q, _AP_O, _AP_N>& op) {
+      Base::operator = (op);
+        return *this;
+    }
+
+  INLINE ap_fixed& operator = (const volatile ap_fixed<_AP_W, _AP_I,
+                    _AP_Q, _AP_O, _AP_N>& op) {
+        Base::operator = (op);
+        return *this;
+    }
+};
+
+//AP_ UFIXED
+//--- ----------------------------------------------------------------
+template<int _AP_W, int _AP_I, ap_q_mode _AP_Q = AP_TRN,
+         ap_o_mode _AP_O = AP_WRAP, int _AP_N = 0>
+class ap_ufixed : public ap_fixed_base<_AP_W, _AP_I, false, _AP_Q, _AP_O, _AP_N> {
+#ifdef _MSC_VER
+#pragma warning(disable: 4521 4522)
+#endif /* #ifdef _MSC_VER */
+public:
+    typedef ap_fixed_base<_AP_W, _AP_I, false, _AP_Q, _AP_O, _AP_N> Base;
+
+     //Constructor
+    INLINE ap_ufixed():Base() {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const ap_fixed<_AP_W2, _AP_I2, _AP_Q2,
+                     _AP_O2, _AP_N2>& op) : Base(ap_fixed_base<_AP_W2,
+                     _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const ap_ufixed<_AP_W2, _AP_I2,  _AP_Q2,
+                     _AP_O2, _AP_N2>& op): Base(ap_fixed_base<_AP_W2, _AP_I2,
+                      false, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_ufixed(const ap_int<_AP_W2>& op):
+      Base((const ap_private<_AP_W2, true>&)(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_ufixed(const ap_uint<_AP_W2>& op):
+      Base((const ap_private<_AP_W2, false>&)(op)) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const volatile ap_fixed<_AP_W2, _AP_I2, _AP_Q2,
+                     _AP_O2, _AP_N2>& op) : Base(ap_fixed_base<_AP_W2,
+                     _AP_I2, true, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2, int _AP_I2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const volatile ap_ufixed<_AP_W2, _AP_I2,  _AP_Q2,
+                     _AP_O2, _AP_N2>& op): Base(ap_fixed_base<_AP_W2, _AP_I2,
+                      false, _AP_Q2, _AP_O2, _AP_N2>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_ufixed(const volatile ap_int<_AP_W2>& op):
+                     Base(ap_private<_AP_W2, true>(op)) {}
+
+    template<int _AP_W2>
+    INLINE ap_ufixed(const volatile ap_uint<_AP_W2>& op):
+                     Base(ap_private<_AP_W2, false>(op)) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+                     _AP_O2, _AP_N2>& op):Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_ufixed(const ap_bit_ref<_AP_W2, _AP_S2>& op):
+                   Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_ufixed(const ap_range_ref<_AP_W2, _AP_S2>& op):
+                   Base(op) {}
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_ufixed(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op):
+                   Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op): Base(op) {}
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_ufixed(const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+                    _AP_Q2, _AP_O2, _AP_N2>& op): Base(op) {}
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_ufixed(const ap_private<_AP_W2, _AP_S2>& op):Base(op) {}
+
+ #define CTOR(TYPE) \
+    INLINE ap_ufixed(TYPE v):Base(v) {}
+    CTOR(bool)
+    CTOR(signed char)
+    CTOR(unsigned char)
+    CTOR(short)
+    CTOR(unsigned short)
+    CTOR(int)
+    CTOR(unsigned int)
+    CTOR(long)
+    CTOR(unsigned long)
+    CTOR(unsigned long long)
+    CTOR(long long)
+    CTOR(float)
+    CTOR(double)
+    CTOR(const char*)
+    CTOR(const std::string&)
+#undef CTOR
+    INLINE ap_ufixed(const char* str, signed char rd):Base(str, rd) {}
+
+    //Assignment
+    INLINE ap_ufixed& operator = (const ap_ufixed<_AP_W, _AP_I,
+                    _AP_Q, _AP_O, _AP_N>& op) {
+      Base::operator = (op);
+        return *this;
+    }
+
+    INLINE ap_ufixed& operator = (const volatile ap_ufixed<_AP_W, _AP_I,
+                    _AP_Q, _AP_O, _AP_N>& op) {
+        Base::V = const_cast<ap_ufixed&>(op);
+        return *this;
+    }
+};
+
+#if defined(SYSTEMC_H) || defined(SYSTEMC_INCLUDED)
+template<int _AP_W>
+INLINE void sc_trace(sc_core::sc_trace_file *tf, const ap_int<_AP_W> &op,
+                     const std::string &name) {
+    if (tf)
+        tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+
+template<int _AP_W>
+INLINE void sc_trace(sc_core::sc_trace_file *tf, const ap_uint<_AP_W> &op,
+                     const std::string &name) {
+    if (tf)
+        tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+
+template<int _AP_W, int _AP_I, ap_q_mode _AP_Q,
+         ap_o_mode _AP_O, int _AP_N>
+INLINE void sc_trace(sc_core::sc_trace_file *tf, const ap_fixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N >&op, const std::string &name) {
+        tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+
+template<int _AP_W, int _AP_I, ap_q_mode _AP_Q,
+         ap_o_mode _AP_O, int _AP_N>
+INLINE void sc_trace(sc_core::sc_trace_file *tf, const ap_ufixed<_AP_W, _AP_I, _AP_Q, _AP_O, _AP_N >&op, const std::string &name) {
+        tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+#endif /* #if defined(SYSTEMC_H) || defined(SYSTEMC_INCLUDED) */
+#endif /* #ifndef __cplusplus */
+#endif /* #ifndef __AESL_AP_SIM_H__ */
\ No newline at end of file
diff --git a/hls_2018/router_03_boardstr/etc/ap_fixed_sim.h b/hls_2018/router_03_boardstr/etc/ap_fixed_sim.h
new file mode 100755
index 0000000..5be571d
--- /dev/null
+++ b/hls_2018/router_03_boardstr/etc/ap_fixed_sim.h
@@ -0,0 +1,2451 @@
+/*
+ * Copyright 2012 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __AESL_GCC_AP_FIXED_H__
+#define __AESL_GCC_AP_FIXED_H__
+
+#ifndef __cplusplus
+    #error C++ is required to include this header file
+#endif /* #ifndef __cplusplus */
+
+
+#include <math.h>
+#ifndef __AESL_APDT_IN_SCFLOW__
+    #include "etc/ap_int_sim.h"
+#else
+    #include "../etc/ap_private.h"
+#endif /* #ifndef __AESL_APDT_IN_SCFLOW__ */
+
+#define FLOAT_MAN 23
+#define FLOAT_EXP 8
+#define DOUBLE_MAN 52
+#define DOUBLE_EXP 11
+// #define DOUBLE_MAN_MASK (~0ULL >> (64-DOUBLE_MAN-2))
+#define DOUBLE_MAN_MASK 0x3fffffffffffffULL
+#define BIAS(e) ((1ULL<<(e-1))-1)
+#define FLOAT_BIAS BIAS(FLOAT_EXP)
+#define DOUBLE_BIAS BIAS(DOUBLE_EXP)
+
+/// Forward declaration.
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> struct ap_fixed_base;
+
+///Proxy class, which allows bit selection  to be used as both rvalue(for reading) and
+//lvalue(for writing)
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+struct af_bit_ref {
+#ifdef _MSC_VER
+    #pragma warning(disable: 4521 4522)
+#endif /* #ifdef _MSC_VER */
+    ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& d_bv;
+    int d_index;
+public:
+    INLINE af_bit_ref(const af_bit_ref<_AP_W, _AP_I, _AP_S,
+                                        _AP_Q, _AP_O, _AP_N>& ref):
+           d_bv(ref.d_bv), d_index(ref.d_index) {}
+
+    INLINE af_bit_ref(ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>* bv, int index=0):
+    d_bv(*bv),d_index(index) {}
+
+    INLINE operator bool() const {
+        return d_bv.V[d_index];
+    }
+
+    INLINE af_bit_ref& operator=(unsigned long long val) {
+        if (val)
+            d_bv.V.set(d_index);
+        else
+            d_bv.V.clear(d_index);
+        return *this;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE af_bit_ref& operator =(const ap_private<_AP_W2,_AP_S2>& val) {
+        return operator=(val!=0);
+    }
+
+    INLINE af_bit_ref& operator =(const af_bit_ref<_AP_W, _AP_I, _AP_S,
+                                        _AP_Q, _AP_O, _AP_N>& val) {
+        return operator=((unsigned long long)(bool)val);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE af_bit_ref operator=(const af_bit_ref<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& val) {
+        return operator=((unsigned long long)(bool)val);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE af_bit_ref& operator = ( const ap_bit_ref<_AP_W2, _AP_S2> &val) {
+        return operator =((unsigned long long) (bool) val);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE af_bit_ref& operator = ( const ap_range_ref<_AP_W2,_AP_S2>& val) {
+        return operator =((const ap_private<_AP_W2, false>) val);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE af_bit_ref& operator= (const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+                                    _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=((const ap_private<_AP_W2, false>)(val));
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE af_bit_ref& operator= (const ap_concat_ref<_AP_W2, _AP_T3, _AP_W3, _AP_T3>& val) {
+        return operator=((const ap_private<_AP_W2 + _AP_W3, false>)(val));
+    }
+
+    template<int _AP_W2, int _AP_S2>
+    INLINE ap_concat_ref<1, af_bit_ref, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (ap_private<_AP_W2, _AP_S2>& op) {
+        return ap_concat_ref<1, af_bit_ref, _AP_W2,
+               ap_private<_AP_W2, _AP_S2> >(*this, op);
+    }
+
+    template<int _AP_W2, int _AP_S2>
+    INLINE ap_concat_ref<1, af_bit_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_bit_ref<_AP_W2, _AP_S2> &op) {
+        return ap_concat_ref<1, af_bit_ref, 1,
+               ap_bit_ref<_AP_W2, _AP_S2> >(*this,
+               const_cast<ap_bit_ref<_AP_W2, _AP_S2>& >(op));
+    }
+
+    template<int _AP_W2, int _AP_S2>
+    INLINE ap_concat_ref<1, af_bit_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_range_ref<_AP_W2, _AP_S2> &op) {
+        return ap_concat_ref<1, af_bit_ref, _AP_W2,
+               ap_range_ref<_AP_W2, _AP_S2> >(*this,
+               const_cast<ap_range_ref<_AP_W2, _AP_S2>& >(op));
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_concat_ref<1, af_bit_ref, _AP_W2 + _AP_W3,
+                        ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &op) {
+        return ap_concat_ref<1, af_bit_ref, _AP_W2 + _AP_W3,
+                 ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+                 const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& >(op));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<1, af_bit_ref, _AP_W2,
+             af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &op) {
+       return ap_concat_ref<1, af_bit_ref,  _AP_W2,
+             af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+             _AP_N2> >(*this, const_cast<af_range_ref<_AP_W2, _AP_I2,
+             _AP_S2, _AP_Q2, _AP_O2,_AP_N2>& >(op));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<1, af_bit_ref, 1,
+             af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &op) {
+       return ap_concat_ref<1, af_bit_ref, 1,
+             af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+             const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+              _AP_O2, _AP_N2>& >(op));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator == (const af_bit_ref<_AP_W2, _AP_I2,
+                             _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+        return get() == op.get();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator != (const af_bit_ref<_AP_W2, _AP_I2,
+                             _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+        return get() != op.get();
+    }
+
+    INLINE bool operator ~ () const {
+        bool bit = (d_bv.V)[d_index];
+        return bit ? false : true;
+    }
+
+    INLINE int length() const {
+        return 1;
+    }
+
+    INLINE bool get() {
+        return d_bv.V[d_index];
+    }
+
+    INLINE bool get() const {
+        return d_bv.V[d_index];
+    }
+
+    INLINE std::string to_string() const {
+        return d_bv.V[d_index] ? "1" : "0";
+    }
+};
+
+///Range(slice)  reference
+//------------------------------------------------------------
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+struct af_range_ref {
+#ifdef _MSC_VER
+    #pragma warning(disable: 4521 4522)
+#endif /* #ifdef _MSC_VER */
+    ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &d_bv;
+    int l_index;
+    int h_index;
+
+public:
+    INLINE af_range_ref(const af_range_ref<_AP_W, _AP_I, _AP_S,
+                                        _AP_Q, _AP_O, _AP_N>& ref):
+           d_bv(ref.d_bv), l_index(ref.l_index), h_index(ref.h_index) {}
+
+    INLINE af_range_ref(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>* bv, int h, int l):
+        d_bv(*bv),l_index(l),h_index(h) {
+        //if (h < l)
+          // fprintf(stderr,
+           //"Warning! The bits selected will be returned in reverse order\n");
+    }
+
+    INLINE operator ap_private<_AP_W, false> () const {
+        if (h_index >= l_index) {
+          ap_private<_AP_W, false> val(d_bv.V);
+          ap_private<_AP_W,false> mask(-1);
+          mask>>=_AP_W-(h_index-l_index+1);
+          val>>=l_index;
+          return val&=mask;
+        } else {
+          ap_private<_AP_W, false> val = 0;
+          for (int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+            if ((d_bv.V)[j]) val.set(i);
+          return val;
+        }
+    }
+
+    INLINE operator unsigned long long() const {
+        return get().to_uint64();
+    }
+
+    template<int _AP_W2,bool _AP_S2>
+    INLINE af_range_ref& operator =(const ap_private<_AP_W2,_AP_S2>& val) {
+      ap_private<_AP_W, false> vval= ap_private<_AP_W, false>(val);
+        if (l_index > h_index) {
+            for (int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+                    vval[i]? d_bv.V.set(j):d_bv.V.clear(j);
+        } else {
+          ap_private<_AP_W,false> mask(-1);
+            if (l_index>0) {
+                mask<<=l_index;
+                vval<<=l_index;
+            }
+            if (h_index<_AP_W-1) {
+              ap_private<_AP_W,false> mask2(-1);
+              mask2>>=_AP_W-h_index-1;
+              mask&=mask2;
+              vval&=mask2;
+            }
+            mask.flip();
+            d_bv.V &= mask;
+            d_bv.V |= vval;
+        }
+        return *this;
+    }
+
+    INLINE af_range_ref& operator = (unsigned long long val) {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template<int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+    INLINE af_range_ref& operator =
+        (const ap_concat_ref <_AP_W3, _AP_T3, _AP_W4, _AP_T4>& val) {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template <int _AP_W3, bool _AP_S3>
+    INLINE af_range_ref& operator =(const ap_bit_ref<_AP_W3, _AP_S3>& val) {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template <int _AP_W3, bool _AP_S3>
+    INLINE af_range_ref& operator =(const ap_range_ref<_AP_W3,_AP_S3>& val) {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator =(tmpVal);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+             ap_o_mode _AP_O2, int _AP_N2>
+    INLINE af_range_ref& operator= (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+        const ap_private<_AP_W2, false> tmp= val.get();
+        return operator = (tmp);
+    }
+
+    INLINE af_range_ref& operator= (const af_range_ref<_AP_W, _AP_I, _AP_S,
+                                        _AP_Q, _AP_O, _AP_N>& val) {
+        const ap_private<_AP_W, false> tmp= val.get();
+        return operator = (tmp);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE af_range_ref& operator= (const ap_fixed_base<_AP_W2,
+                          _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=(val.to_ap_private());
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator == (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs==rhs;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator != (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs!=rhs;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator > (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs>rhs;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator >= (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs>=rhs;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator < (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs<rhs;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator <= (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs<=rhs;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator == (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs==rhs;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator != (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs!=rhs;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator > (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs>rhs;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator >= (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs>=rhs;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator < (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs<rhs;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator <= (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op2) {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs<=rhs;
+    }
+
+    template<int _AP_W2>
+    INLINE void set(const ap_private<_AP_W2,false>& val) {
+        ap_private<_AP_W,_AP_S> vval=val;
+        if (l_index>h_index) {
+            for (int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+                    vval[i]? d_bv.V.set(j):d_bv.V.clear(j);
+        } else {
+          ap_private<_AP_W,_AP_S> mask(-1);
+            if (l_index>0) {
+                ap_private<_AP_W,false> mask1(-1);
+                mask1>>=_AP_W-l_index;
+                mask1.flip();
+                mask=mask1;
+                //vval&=mask1;
+                vval<<=l_index;
+            }
+            if (h_index<_AP_W-1) {
+                ap_private<_AP_W,false> mask2(-1);
+                mask2<<=h_index+1;
+                mask2.flip();
+                mask&=mask2;
+                vval&=mask2;
+            }
+            mask.flip();
+            d_bv&=mask;
+            d_bv|=vval;
+        }
+
+    }
+
+    INLINE ap_private<_AP_W,false> get() const {
+        if (h_index<l_index) {
+          ap_private<_AP_W, false> val(0);
+          for (int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+            if ((d_bv.V)[j]) val.set(i);
+          return val;
+        } else {
+          ap_private<_AP_W, false> val = ap_private<_AP_W,false>(d_bv.V);
+          val>>= l_index;
+          if (h_index<_AP_W-1)
+            {
+              ap_private<_AP_W,false> mask(-1);
+              mask>>=_AP_W-(h_index-l_index+1);
+              val&=mask;
+            }
+          return val;
+        }
+    }
+
+    template<int _AP_W2, int _AP_S2>
+    INLINE ap_concat_ref<_AP_W, af_range_ref, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (ap_private<_AP_W2, _AP_S2>& op) {
+        return ap_concat_ref<_AP_W, af_range_ref, _AP_W2,
+               ap_private<_AP_W2, _AP_S2> >(*this, op);
+    }
+
+    template<int _AP_W2, int _AP_S2>
+    INLINE ap_concat_ref<_AP_W, af_range_ref, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_bit_ref<_AP_W2, _AP_S2> &op) {
+        return ap_concat_ref<_AP_W, af_range_ref, 1,
+               ap_bit_ref<_AP_W2, _AP_S2> >(*this,
+               const_cast<ap_bit_ref<_AP_W2, _AP_S2>& >(op));
+    }
+
+    template<int _AP_W2, int _AP_S2>
+    INLINE ap_concat_ref<_AP_W, af_range_ref, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_range_ref<_AP_W2, _AP_S2> &op) {
+        return ap_concat_ref<_AP_W, af_range_ref, _AP_W2,
+               ap_range_ref<_AP_W2, _AP_S2> >(*this,
+               const_cast<ap_range_ref<_AP_W2, _AP_S2>& >(op));
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_concat_ref<_AP_W, af_range_ref, _AP_W2 + _AP_W3,
+                        ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &op) {
+        return ap_concat_ref<_AP_W, af_range_ref, _AP_W2 + _AP_W3,
+                 ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+                 const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3,
+                  _AP_T3>& >(op));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, af_range_ref, _AP_W2,
+             af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &op) {
+       return ap_concat_ref<_AP_W, af_range_ref,  _AP_W2,
+             af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+             const_cast<af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2,
+             _AP_N2>& > (op));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, af_range_ref, 1,
+             af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &op) {
+       return ap_concat_ref<_AP_W, af_range_ref, 1,
+             af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+             const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+             _AP_O2, _AP_N2>& >(op));
+    }
+
+    INLINE int length() const {
+        return h_index>=l_index?h_index-l_index+1:l_index-h_index+1;
+    }
+
+    INLINE int to_int() const {
+        ap_private<_AP_W,false> val=get();
+        return val.to_int();
+    }
+
+    INLINE unsigned int to_uint() const {
+        ap_private<_AP_W,false> val=get();
+        return val.to_uint();
+    }
+
+    INLINE long to_long() const {
+        ap_private<_AP_W,false> val=get();
+        return val.to_long();
+    }
+
+    INLINE unsigned long to_ulong() const {
+        ap_private<_AP_W,false> val=get();
+        return val.to_ulong();
+    }
+
+    INLINE ap_slong to_int64() const {
+        ap_private<_AP_W,false> val=get();
+        return val.to_int64();
+    }
+
+    INLINE ap_ulong to_uint64() const {
+        ap_private<_AP_W,false> val=get();
+        return val.to_uint64();
+    }
+
+    INLINE std::string to_string(uint8_t radix) const {
+        return get().to_string(radix);
+    }
+
+};
+
+//-----------------------------------------------------------------------------
+///ap_fixed_base: AutoPilot fixed point
+//-----------------------------------------------------------------------------
+template<int _AP_W, int _AP_I, bool _AP_S=true, ap_q_mode _AP_Q=AP_TRN,
+         ap_o_mode _AP_O=AP_WRAP, int _AP_N=0>
+struct ap_fixed_base {
+#ifdef _MSC_VER
+    #pragma warning(disable: 4521 4522)
+#endif /* #ifdef _MSC_VER */
+public:
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> friend struct
+ap_fixed_base;
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> friend struct
+af_bit_ref;
+
+    INLINE void overflow_adjust(bool underflow, bool overflow,
+                                bool lD, bool sign) {
+        if (!overflow && !underflow) return;
+        switch (_AP_O) {
+            case AP_WRAP:
+                if (_AP_N == 0)
+                    return;
+                if (_AP_S) {
+                    //signed SC_WRAP
+                    //n_bits == 1;
+                    if (_AP_N > 1) {
+                        ap_private<_AP_W, _AP_S>  mask(-1);
+                        if (_AP_N >= _AP_W) mask = 0;
+                        else mask.lshr(_AP_N);
+                        if (sign)
+                            V &= mask;
+                        else
+                            V |= ~mask;
+                    }
+                    sign ? V.set(_AP_W - 1) : V.clear(_AP_W - 1);
+                } else {
+                    //unsigned SC_WRAP
+                    ap_private<_AP_W, _AP_S> mask(-1);
+                    if (_AP_N >= _AP_W) mask = 0;
+                    else mask.lshr(_AP_N);
+                    mask.flip();
+                    V |= mask;
+                }
+                break;
+            case AP_SAT_ZERO:
+                V.clear();
+                break;
+            case AP_WRAP_SM:
+                {
+                bool Ro = ap_private_ops::get<_AP_W, _AP_S, _AP_W -1>(V); // V[_AP_W -1];
+                if (_AP_N == 0) {
+                    if (lD != Ro) {
+                        V.flip();
+                        lD ? ap_private_ops::set<_AP_W, _AP_S, _AP_W - 1>(V) :
+                            ap_private_ops::clear<_AP_W, _AP_S, _AP_W - 1>(V);
+                    }
+                } else {
+                    if (_AP_N == 1 && sign != Ro) {
+                        V.flip();
+                    } else if (_AP_N > 1) {
+                        bool lNo = ap_private_ops::get<_AP_W, _AP_S, _AP_W - _AP_N> (V); // V[_AP_W - _AP_N];
+                        if (lNo == sign)
+                            V.flip();
+                        ap_private<_AP_W, false> mask(-1);
+                        if (_AP_N >= _AP_W) mask = 0;
+                        else mask.lshr(_AP_N);
+                        if (sign)
+                            V &= mask;
+                        else
+                            V |= mask.flip();
+                        sign ? ap_private_ops::set<_AP_W, _AP_S, _AP_W - 1>(V) : ap_private_ops::clear<_AP_W, _AP_S, _AP_W - 1>(V);
+                    }
+                }
+                }
+                break;
+            default:
+                if (_AP_S) {
+                    if (overflow) {
+                        V.set(); ap_private_ops::clear<_AP_W, _AP_S, _AP_W-1>(V);
+                    } else if (underflow) {
+                        V.clear();
+                        ap_private_ops::set<_AP_W, _AP_S, _AP_W-1>(V);
+                        if (_AP_O == AP_SAT_SYM)
+                            ap_private_ops::set<_AP_W, _AP_S, 0>(V);
+                    }
+                } else {
+                    if (overflow)
+                        V.set();
+                    else if (underflow)
+                        V.clear();
+                }
+        }
+    }
+
+    INLINE bool quantization_adjust(bool qb, bool r, bool s) {
+    	bool carry=ap_private_ops::get<_AP_W, _AP_S, _AP_W-1>(V);
+    	switch (_AP_Q) {
+    	    case AP_TRN:
+    		return false;
+    	    case AP_RND_ZERO:
+    		qb &= s || r;
+    		break;
+    	    case AP_RND_MIN_INF:
+    		qb &= r;
+    		break;
+    	    case AP_RND_INF:
+    		qb &= !s || r;
+    		break;
+    	    case AP_RND_CONV:
+    		qb &= ap_private_ops::get<_AP_W, _AP_S, 0>(V) || r;
+    		break;
+    	    case AP_TRN_ZERO:
+    		qb = s && ( qb || r );
+    		break;
+    	    default:;
+
+    	}
+    	if (qb) ++V;
+    	//only when old V[_AP_W-1]==1 && new V[_AP_W-1]==0
+    	return carry && !(ap_private_ops::get<_AP_W, _AP_S, _AP_W-1>(V)); //(!V[_AP_W-1]);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2>
+    struct RType {
+        enum {
+            _AP_F=_AP_W-_AP_I,
+            F2=_AP_W2-_AP_I2,
+            mult_w = _AP_W+_AP_W2,
+            mult_i = _AP_I+_AP_I2,
+            mult_s = _AP_S||_AP_S2,
+            plus_w = AP_MAX(_AP_I+(_AP_S2&&!_AP_S),_AP_I2+(_AP_S&&!_AP_S2))+1+AP_MAX(_AP_F,F2),
+            plus_i = AP_MAX(_AP_I+(_AP_S2&&!_AP_S),_AP_I2+(_AP_S&&!_AP_S2))+1,
+            plus_s = _AP_S||_AP_S2,
+            minus_w = AP_MAX(_AP_I+(_AP_S2&&!_AP_S),_AP_I2+(_AP_S&&!_AP_S2))+1+AP_MAX(_AP_F,F2),
+            minus_i = AP_MAX(_AP_I+(_AP_S2&&!_AP_S),_AP_I2+(_AP_S&&!_AP_S2))+1,
+            minus_s = true,
+#ifndef __SC_COMPATIBLE__
+            div_w = _AP_W + AP_MAX(_AP_W2 - _AP_I2, 0) + _AP_S2,
+#else
+            div_w = _AP_W + AP_MAX(_AP_W2 - _AP_I2, 0) + _AP_S2 + AP_MAX(_AP_I2, 0),
+#endif /* #ifndef __SC_COMPATIBLE__ */
+            div_i = _AP_I + (_AP_W2-_AP_I2) + _AP_S2,
+            div_s = _AP_S||_AP_S2,
+            logic_w = AP_MAX(_AP_I+(_AP_S2&&!_AP_S),_AP_I2+(_AP_S&&!_AP_S2))+AP_MAX(_AP_F,F2),
+            logic_i = AP_MAX(_AP_I+(_AP_S2&&!_AP_S),_AP_I2+(_AP_S&&!_AP_S2)),
+            logic_s = _AP_S||_AP_S2
+        };
+
+        typedef ap_fixed_base<mult_w, mult_i, mult_s> mult;
+        typedef ap_fixed_base<plus_w, plus_i, plus_s> plus;
+        typedef ap_fixed_base<minus_w, minus_i, minus_s> minus;
+        typedef ap_fixed_base<logic_w, logic_i, logic_s> logic;
+        typedef ap_fixed_base<div_w, div_i, div_s> div;
+        typedef ap_fixed_base<_AP_W, _AP_I, _AP_S> arg1;
+    };
+    INLINE void report() {
+#if 0
+        if (_AP_W > 1024 && _AP_W <= 4096) {
+            fprintf(stderr, "[W] W=%d is out of bound (1<=W<=1024):"
+                            " for synthesis, please define macro AP_INT_TYPE_EXT(N) to"
+                            " extend the valid range.\n", _AP_W);
+        } else
+#endif /* #if 0 */
+        if (_AP_W > MAX_MODE(AP_INT_MAX_W) * 1024) {
+            fprintf(stderr, "[E] ap_%sfixed<%d, ...>: Bitwidth exceeds the "
+                   "default max value %d. Please use macro "
+                   "AP_INT_MAX_W to set a larger max value.\n",
+                            _AP_S?"":"u", _AP_W,
+                            MAX_MODE(AP_INT_MAX_W) * 1024);
+            exit(1);
+        }
+    }
+
+    /// Constructors.
+    // -------------------------------------------------------------------------
+#if 0
+    #ifdef __SC_COMPATIBLE__
+    INLINE ap_fixed_base():V(uint32_t(_AP_W), uint64_t(0)) {}
+    #else
+    INLINE ap_fixed_base():V(uint32_t(_AP_W)) {}
+    #endif /* #ifdef __SC_COMPATIBLE__ */
+#else
+    INLINE ap_fixed_base():V(0) {}
+#endif /* #if 0 */
+    //  INLINE ap_fixed_base():V() {}
+    //  INLINE  explicit ap_fixed_base(const ap_private<_AP_W+_AP_I, _AP_S>& _V):V(_V) {}
+    INLINE ap_fixed_base(const ap_fixed_base& op):V(op.V) {}
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed_base(const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op):V(0) {
+	enum {N2=_AP_W2,_AP_F=_AP_W-_AP_I,F2=_AP_W2-_AP_I2,QUAN_INC=F2>_AP_F && !(_AP_Q==AP_TRN ||
+		(_AP_Q==AP_TRN_ZERO && !_AP_S2))};
+	if (!op) return;
+	bool carry=false;
+	//handle quantization
+	enum { sh_amt =(F2>_AP_F)?F2-_AP_F:_AP_F-F2};
+	const ap_private<_AP_W2, _AP_S2>& val = op.V;
+	bool neg_src=val.isNegative();
+	if (F2==_AP_F)
+	    V=val;
+
+	else if (F2>_AP_F) {
+	    if (sh_amt >= _AP_W2)
+		V = neg_src ? -1 : 0;
+	    else
+		V = _AP_S2?val.ashr(sh_amt):val.lshr(sh_amt);
+	    if (_AP_Q!=AP_TRN && !(_AP_Q==AP_TRN_ZERO && !_AP_S2)) {
+		bool qb = false;
+		if (F2-_AP_F>_AP_W2)
+		    qb = neg_src;
+		else
+		    qb = ap_private_ops::get<_AP_W2, _AP_S2, F2-_AP_F-1>(val);
+
+		bool r=false;
+		enum { pos3 = F2-_AP_F-2};
+		if (pos3>=_AP_W2-1)
+		    r=val!=0;
+		else if (pos3>=0)
+		r = (val<<(_AP_W2-1-pos3))!=0;
+		carry = quantization_adjust(qb,r,neg_src);
+	    }
+	} else { //no quantization
+	    if (sh_amt < _AP_W) {
+		V=val;
+		V <<= sh_amt;
+	    }
+	}
+	//hanle overflow/underflow
+	if ((_AP_O!=AP_WRAP || _AP_N != 0) &&
+		((!_AP_S && _AP_S2) || _AP_I-_AP_S <
+		 _AP_I2 - _AP_S2 + (QUAN_INC|| (_AP_S2 &&
+		     _AP_O==AP_SAT_SYM)))) {//saturation
+	    bool deleted_zeros = _AP_S2?true:!carry,
+		 deleted_ones = true;
+	    bool lD=(_AP_I2>_AP_I && _AP_W2-_AP_I2+_AP_I>=0) &&
+		ap_private_ops::get<_AP_W2, _AP_S2, _AP_W2-_AP_I2+_AP_I>(val);
+	    enum { pos1=F2-_AP_F+_AP_W, pos2=F2-_AP_F+_AP_W+1};
+	    if (pos1 < _AP_W2) {
+		bool Range1_all_ones= true;
+		bool Range1_all_zeros= true;
+		if (pos1 >= 0) {
+		    enum { __W = (_AP_W2-pos1) > 0 ? (_AP_W2-pos1) : 1 };
+		    const ap_private<__W, _AP_S2> Range1=ap_private<__W, _AP_S2>(val.lshr(pos1));
+		    Range1_all_ones=Range1.isAllOnesValue();
+		    Range1_all_zeros=Range1.isMinValue();
+		} else {
+		    Range1_all_ones=false;
+		    Range1_all_zeros=val.isMinValue();
+		}
+		bool Range2_all_ones=true;
+		if (pos2<_AP_W2 && pos2>=0) {
+		    enum { __W = (_AP_W2-pos2)>0 ? (_AP_W2-pos2) : 1};
+		    ap_private<__W, true> Range2=ap_private<__W, true>(val.lshr(pos2));
+		    Range2_all_ones=Range2.isAllOnesValue();
+		} else if (pos2<0)
+		    Range2_all_ones=false;
+
+		deleted_zeros=deleted_zeros && (carry?Range1_all_ones:Range1_all_zeros);
+		deleted_ones=carry?Range2_all_ones&&(F2-_AP_F+_AP_W<0||!lD)
+		    :Range1_all_ones;
+		neg_src= neg_src&&!(carry && Range1_all_ones);
+	    } else
+		neg_src = neg_src && V[_AP_W-1];
+
+	    bool neg_trg= V.isNegative();
+	    bool overflow=(neg_trg||!deleted_zeros) && !val.isNegative();
+	    bool underflow=(!neg_trg||!deleted_ones)&&neg_src;
+	    //printf("neg_src = %d, neg_trg = %d, deleted_zeros = %d,
+	    //         deleted_ones = %d, overflow = %d, underflow = %d\n",
+	    //         neg_src, neg_trg, deleted_zeros, deleted_ones,
+	    //         overflow, underflow);
+	    if (_AP_O==AP_SAT_SYM && _AP_S2 && _AP_S)
+		underflow |= neg_src && (_AP_W>1?V.isMinSignedValue():true);
+	    overflow_adjust(underflow, overflow, lD, neg_src);
+	}
+	report();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed_base(const volatile ap_fixed_base<_AP_W2,_AP_I2,
+		_AP_S2,_AP_Q2,_AP_O2, _AP_N2> &op) : V(op.V) {
+	*this = const_cast<ap_fixed_base<_AP_W2,_AP_I2,
+	_AP_S2,_AP_Q2,_AP_O2, _AP_N2>&>(op);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_fixed_base(const ap_private<_AP_W2,_AP_S2>& op) {
+	ap_fixed_base<_AP_W2,_AP_W2,_AP_S2> f_op;
+	f_op.V=op;
+	*this = f_op;
+    }
+
+    INLINE ap_fixed_base(bool b) {
+	*this=(ap_private<1,false>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(char b) {
+	*this=(ap_private<8,false>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(signed char b) {
+	*this=(ap_private<8,true>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(unsigned char b) {
+	*this=(ap_private<8,false>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(signed short b) {
+	*this=(ap_private<16,true>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(unsigned short b) {
+	*this=(ap_private<16,false>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(signed int b) {
+	*this=(ap_private<32,true>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(unsigned int b) {
+	*this=(ap_private<32,false>)b;
+	report();
+    }
+# if defined __x86_64__
+    INLINE ap_fixed_base(signed long b) {
+	*this=(ap_private<64,true>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(unsigned long  b) {
+	*this=(ap_private<64,false>)b;
+	report();
+    }
+# else
+    INLINE ap_fixed_base(signed long  b) {
+	*this=(ap_private<32,true>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(unsigned long  b) {
+	*this=(ap_private<32,false>)b;
+	report();
+    }
+# endif
+
+    INLINE ap_fixed_base(ap_slong b) {
+	*this=(ap_private<64,true>)b;
+	report();
+    }
+
+    INLINE ap_fixed_base(ap_ulong b) {
+	*this=(ap_private<64,false>)b;
+	report();
+    }
+
+#if 1
+    INLINE ap_fixed_base(const char* val):V(0) {
+         ap_private<_AP_W, _AP_S> Tmp(val);
+         V = Tmp;
+    }
+
+    INLINE  ap_fixed_base(const char* val, signed char rd): V(0) {
+         ap_private<_AP_W, _AP_S> Tmp(val, rd);
+         V = Tmp;
+    }
+
+#endif
+
+    INLINE ap_fixed_base(const std::string& val) {
+        ap_private<_AP_W, _AP_S> Tmp(val, 2);
+        V = Tmp;
+        report();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_fixed_base(const ap_bit_ref<_AP_W2, _AP_S2>& op) {
+        *this = ((bool)op);
+        report();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_fixed_base(const ap_range_ref<_AP_W2, _AP_S2>& op) {
+	*this = ap_private<_AP_W2, _AP_S2>(op);
+	report();
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_fixed_base(const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& op) {
+	*this = ((const ap_private<_AP_W2 + _AP_W3, false>&)(op));
+	report();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed_base(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+         *this = (bool(op));
+        report();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_fixed_base(const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+        *this = (ap_private<_AP_W2, false>(op));
+        report();
+    }
+
+    //helper function
+    INLINE unsigned long long doubleToRawBits(double pf)const {
+        union {
+            unsigned long long __L;
+            double __D;
+        }LD;
+        LD.__D=pf;
+        return LD.__L;
+    }
+
+
+    INLINE double rawBitsToDouble(unsigned long long pi) const {
+        union {
+            unsigned long long __L;
+            double __D;
+        }LD;
+        LD.__L=pi;
+        return LD.__D;
+    }
+
+    INLINE float rawBitsToFloat(uint32_t pi) const {
+        union {
+            uint32_t __L;
+            float __D;
+        }LD;
+        LD.__L = pi;
+        return LD.__D;
+    }
+
+    INLINE ap_fixed_base(double d):V(0) {
+	if (!d) return;
+	const bool isneg=d<0;
+
+	const uint64_t ireg=doubleToRawBits(isneg?-d:d);
+	if ((ireg&0x7fffffffffffffffULL)!=0) {
+	    const int32_t exp=(((ireg)>>DOUBLE_MAN)&0x07ff)-DOUBLE_BIAS;
+	    ap_private<DOUBLE_MAN+2, true> man = ireg & DOUBLE_MAN_MASK;
+	    man.clear(DOUBLE_MAN+1);
+	    man.set(DOUBLE_MAN);
+	    if (isneg) {
+		man.flip();
+                man++;
+            }
+
+	    enum {_AP_S2=true,  _AP_W2=DOUBLE_MAN+2,_AP_F=_AP_W -_AP_I };
+	    const int _AP_I2=exp+2;
+            const int F2=_AP_W2-_AP_I2;
+	    const bool QUAN_INC=F2>_AP_F && !(_AP_Q==AP_TRN || (_AP_Q==AP_TRN_ZERO &&
+		    !_AP_S2));
+	    bool carry=false;
+	    //handle quantization
+	    const unsigned sh_amt=abs(F2-_AP_F); // sh_amt = F2>_AP_F ? F2 -_AP_F : _AP_F-F2;
+	    if (F2==_AP_F )
+		V=man;
+	    else if (F2>_AP_F) {
+		if (sh_amt >= DOUBLE_MAN+2)
+		    V=isneg?-1:0;
+		else
+		    V=(man>>sh_amt) | ((man & 1ULL<<(DOUBLE_MAN+1)) ? (DOUBLE_MAN_MASK>>(DOUBLE_MAN+2-sh_amt) <<(DOUBLE_MAN+2-sh_amt)):0);
+
+		if (_AP_Q!=AP_TRN && !(_AP_Q==AP_TRN_ZERO && !_AP_S2)) {
+		    const bool qb=((F2-_AP_F > DOUBLE_MAN+2) ? isneg : (man & (1ULL<<(F2-_AP_F-1))) != 0);
+		    const int pos3=F2-_AP_F-2;
+		    const bool r = (pos3>= 0) ? (man << AP_MAX(0, _AP_W2-pos3-1)& DOUBLE_MAN_MASK)!=0  : false;
+		    carry = quantization_adjust(qb,r,isneg);
+		}
+	    }
+	    else { //no quantization
+		//                V=man;
+		if (sh_amt < _AP_W) {
+		    V = man;
+		    V <<= sh_amt;
+		}
+	    }
+	    //handle overflow/underflow
+	    if ((_AP_O != AP_WRAP || _AP_N != 0) &&
+		    ((!_AP_S && _AP_S2) || _AP_I-_AP_S <
+		     _AP_I2-_AP_S2+(QUAN_INC|| (_AP_S2 &&
+			 _AP_O==AP_SAT_SYM)) )) {// saturation
+		bool deleted_zeros = _AP_S2?true:!carry,
+		     deleted_ones = true;
+		bool neg_src;
+		const bool lD=(_AP_I2>_AP_I) && (_AP_W2-_AP_I2+_AP_I>=0) && (man & (1ULL <<(DOUBLE_MAN+2-_AP_I2+_AP_I)));
+		int pos1=F2+_AP_W-_AP_F;
+		if (pos1 < _AP_W2) {
+		    int pos2=pos1+1;
+		    bool Range1_all_ones=true;
+		    bool Range1_all_zeros=true;
+		    if (pos1>=0) {
+			ap_private<_AP_W,_AP_S> Range1=
+			    ap_private<_AP_W,_AP_S>((man >> pos1) | ((1ULL<<(DOUBLE_MAN+1)&man) ? (DOUBLE_MAN_MASK >> (DOUBLE_MAN+2-pos1) <<(DOUBLE_MAN+2-pos1)):0));
+			Range1_all_ones = Range1.isAllOnesValue(); // Range1.isAllOnesValue();
+			Range1_all_zeros = Range1.isMinValue(); // Range1.isMinValue();
+		    } else {
+			Range1_all_ones=false;
+			Range1_all_zeros = man==0; // man.isMinValue();
+		    }
+		    bool Range2_all_ones=true;
+		    if (pos2<_AP_W2 && pos2>=0) {
+			ap_private<_AP_W, _AP_S> Range2=
+			    ap_private<_AP_W, _AP_S>((man >> pos2) | ((1ULL<<(DOUBLE_MAN+1)&man) ? (DOUBLE_MAN_MASK >> (DOUBLE_MAN+2-pos2) <<(DOUBLE_MAN+2-pos2)):0));
+			Range2_all_ones=Range2.isAllOnesValue(); // Range2.isAllOnesValue();
+		    } else if (pos2<0)
+			Range2_all_ones=false;
+		    deleted_zeros=deleted_zeros && (carry?Range1_all_ones:Range1_all_zeros);
+		    deleted_ones=carry?Range2_all_ones&&(F2-_AP_F+_AP_W<0||!lD) : Range1_all_ones;
+		    neg_src=isneg&&!(carry&Range1_all_ones);
+		} else
+		    neg_src = isneg &&  V[_AP_W -1];
+
+		const bool neg_trg=V.isNegative();
+		const bool overflow=(neg_trg||!deleted_zeros) && !isneg;
+		bool underflow=(!neg_trg||!deleted_ones)&&neg_src;
+		//printf("neg_src = %d, neg_trg = %d, deleted_zeros = %d,
+		//          deleted_ones = %d, overflow = %d, underflow = %d\n",
+		//          neg_src, neg_trg, deleted_zeros, deleted_ones,
+		//          overflow, underflow);
+		if (_AP_O==AP_SAT_SYM && _AP_S2 && _AP_S)
+		    underflow |= neg_src && (_AP_W>1?V.isMinSignedValue():true);
+		overflow_adjust(underflow,overflow,lD, neg_src);
+	    }
+	}
+	report();
+    }
+
+
+        ///assign operators
+    //-------------------------------------------------------------------------
+
+    INLINE volatile ap_fixed_base& operator=(const ap_fixed_base<_AP_W, _AP_I, _AP_S,
+                                    _AP_Q, _AP_O, _AP_N>& op) volatile {
+        V = op.V;
+        return *this;
+    }
+
+    INLINE ap_fixed_base& operator=(const ap_fixed_base<_AP_W, _AP_I, _AP_S,
+                                    _AP_Q, _AP_O, _AP_N>& op) {
+        V = op.V;
+        return *this;
+    }
+
+    INLINE volatile ap_fixed_base& operator=(const volatile ap_fixed_base<_AP_W, _AP_I, _AP_S,
+                                    _AP_Q, _AP_O, _AP_N>& op) volatile {
+        V = op.V;
+        return *this;
+    }
+
+    INLINE ap_fixed_base& operator=(const volatile ap_fixed_base<_AP_W, _AP_I, _AP_S,
+                                    _AP_Q, _AP_O, _AP_N>& op) {
+        V = op.V;
+        return *this;
+    }
+
+    // Set this ap_fixed_base with a bits string. That means the ssdm_int::V
+    // inside this ap_fixed_base is assigned by bv.
+    // Note the input parameter should be a fixed-point formatted bit string.
+    INLINE ap_fixed_base& setBits(unsigned long long bv) {
+        V=bv;
+        return *this;
+    }
+
+    // Return a ap_fixed_base object whose ssdm_int::V is assigned by bv.
+    // Note the input parameter should be a fixed-point formatted bit string.
+    static INLINE ap_fixed_base bitsToFixed(unsigned long long bv) {
+	ap_fixed_base Tmp=bv;
+	return Tmp;
+    }
+
+    // Explicit conversion functions to ap_private that captures
+    // all integer bits (bits are truncated)
+    INLINE ap_private<AP_MAX(_AP_I,1),_AP_S>
+    to_ap_private(bool Cnative = true) const {
+	ap_private<AP_MAX(_AP_I,1),_AP_S> ret = ap_private<AP_MAX(_AP_I,1),_AP_S> ((_AP_I >= 1) ? (_AP_S==true ? V.ashr(AP_MAX(0,_AP_W - _AP_I)) : V.lshr(AP_MAX(0,_AP_W - _AP_I))) : ap_private<_AP_W, _AP_S>(0));
+
+	if (Cnative) {
+	    bool r = false;
+	    if (_AP_I < _AP_W) {
+		if (_AP_I > 0) r = !(V.getLoBits(_AP_W - _AP_I).isMinValue());
+		else r = !(V.isMinValue());
+	    }
+	    if (r && V.isNegative()) { // if this is negative integer
+		++ret;//ap_private<AP_MAX(_AP_I,1),_AP_S>(1,_AP_S);
+	    }
+	} else {
+	    //Follow OSCI library, conversion from sc_fixed to sc_int
+	}
+	return ret;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE operator ap_private<_AP_W2,_AP_S2> () const {
+        return (ap_private<_AP_W2,_AP_S2>)to_ap_private();
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE operator ap_private<_AP_W2,_AP_S2,_AP_N2> () const {
+        return (ap_private<_AP_W2,_AP_S2,_AP_N2>)to_ap_private();
+    }
+
+    //Explict conversion function to C built-in integral type
+    INLINE int to_int() const {
+        return to_ap_private().to_int();
+    }
+
+    INLINE int to_uint() const {
+        return to_ap_private().to_uint();
+    }
+
+    INLINE ap_slong to_int64() const {
+        return to_ap_private().to_int64();
+    }
+
+    INLINE ap_ulong to_uint64() const {
+        return to_ap_private().to_uint64();
+    }
+
+    INLINE double to_double() const {
+        if (!V)
+	    return 0;
+	if (_AP_W>64 || (_AP_W - _AP_I) > 0) {
+	    bool isneg = _AP_S && V[_AP_W-1];
+	    uint64_t res = isneg ? 0x8000000000000000ULL : 0;
+	    ap_private<_AP_W, false>  tmp = V;
+	    if (isneg) tmp = -tmp;
+	    int i = _AP_W -1 - tmp.countLeadingZeros();
+	    int exp = _AP_I-(_AP_W-i);
+	    res|=((uint64_t)(exp+DOUBLE_BIAS))<<DOUBLE_MAN;
+	    if (i!=0) {
+		tmp.clear(i);
+		uint64_t man = ((i>DOUBLE_MAN)?tmp.lshr(i-DOUBLE_MAN):tmp).to_uint64() & DOUBLE_MAN_MASK;
+		res |= i<DOUBLE_MAN ? (man)<<(DOUBLE_MAN-i)& DOUBLE_MAN_MASK  : man;
+	    }
+	    double dp=rawBitsToDouble(res);
+	    return dp;
+	} else if (_AP_W - _AP_I > 0) {
+	    /* This specialization is disabled. It is giving wrong results in some cases.
+	       bool isneg=V.isNegative();
+	       double dp = V.get();
+	       dp /= (1<< (_AP_W - _AP_I));
+	       return dp;*/
+	} else
+	    return double(to_int64());
+    }
+
+    INLINE float to_float() const {
+	uint32_t res=0;
+	if (V==0)
+	    return 0;
+	bool isneg=V.isNegative();
+	ap_private<_AP_W, _AP_S> tmp=V;
+	if (isneg) tmp = -tmp;
+	if (_AP_W-_AP_I>0||_AP_W>64) {
+	    if (isneg)
+		res=0x80000000;
+	    int i=_AP_W-1;
+	    i-=tmp.countLeadingZeros();
+	    int exp=_AP_I-(_AP_W-i);
+	    res|=(exp+FLOAT_BIAS)<<FLOAT_MAN;
+
+	    ap_private<_AP_W, _AP_S> man = 0;
+	    if (i!=0) {
+		tmp.clear(i);
+		if (i>FLOAT_MAN)
+		    man=tmp.lshr(i-FLOAT_MAN);
+		else
+		    man=tmp;
+		res |= i < FLOAT_MAN?man.getZExtValue()<<(FLOAT_MAN-i):man.getZExtValue();
+	    }
+	} else {
+	    return float(to_int64());
+	}
+	float dp=rawBitsToFloat(res);
+	return dp;
+    }
+
+    INLINE operator double () const {
+	return to_double();
+    }
+#ifndef __SC_COMPATIBLE__
+    INLINE operator float () const {
+           return to_float();
+    }
+
+    INLINE operator char () const {
+        return (char) to_int();
+    }
+
+    INLINE operator unsigned char () const {
+        return (unsigned char) to_uint();
+    }
+
+    INLINE operator short () const {
+        return (short) to_int();
+    }
+
+    INLINE operator unsigned short () const {
+        return (unsigned short) to_uint();
+    }
+
+    INLINE operator int () const {
+        return to_int();
+    }
+
+    INLINE operator unsigned int () const {
+        return to_uint();
+    }
+#if 1
+#ifdef __x86_64__
+    INLINE operator long () const {
+        return (long)to_int64();
+    }
+
+    INLINE operator unsigned long () const {
+        return (unsigned long) to_uint64();
+    }
+#else
+    INLINE operator long () const {
+        return to_int64();
+    }
+
+    INLINE operator unsigned long () const {
+        return to_uint64();
+    }
+
+#endif
+#endif
+    INLINE operator unsigned long long () const {
+        return to_uint64();
+    }
+
+    INLINE operator long long () const {
+        return to_int64();
+    }
+#endif
+
+    INLINE std::string to_string(uint8_t radix=2, bool sign=false) const;
+
+    INLINE ap_slong bits_to_int64() const  {
+      ap_private<AP_MIN(_AP_W, 64), _AP_S> res(V);
+        return (ap_slong) res;
+    }
+
+    INLINE ap_ulong bits_to_uint64() const {
+      ap_private<AP_MIN(64,_AP_W), _AP_S> res(V);
+      return (ap_ulong) res;
+    }
+
+    INLINE int length() const {return _AP_W;}
+
+    // Count the number of zeros from the most significant bit
+    // to the first one bit. Note this is only for ap_fixed_base whose
+    // _AP_W <= 64, otherwise will incur assertion.
+    INLINE int countLeadingZeros() {
+        return V.countLeadingZeros();
+    }
+
+    ///Arithmetic:Binary
+    //-------------------------------------------------------------------------
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE typename RType<_AP_W2,_AP_I2,_AP_S2>::mult
+    operator * (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+	typename RType<_AP_W2,_AP_I2,_AP_S2>::mult r;
+	r.V = V * op2.V;
+	return r;
+    }
+
+    template<int _AP_W1, int _AP_I1, bool _AP_S1, int _AP_W2, int _AP_I2, bool _AP_S2>
+    static INLINE ap_fixed_base multiply(const ap_fixed_base<_AP_W1,_AP_I1,_AP_S1>& op1, const
+         ap_fixed_base<_AP_W2,_AP_I2,_AP_S2>& op2) {
+	ap_private<_AP_W+_AP_W2, _AP_S> OP1=op1.V;
+	ap_private<_AP_W2,_AP_S2> OP2=op2.V;
+	return OP1*OP2;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE typename RType<_AP_W2,_AP_I2,_AP_S2>::div
+    operator / (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+        enum {F2 = _AP_W2-_AP_I2, _W1=AP_MAX(_AP_W + AP_MAX(F2, 0), _AP_W2),
+              _W2=AP_MAX(_AP_W2,AP_MAX(_AP_W + AP_MAX(F2, 0), _AP_W2))};
+        ap_private<_W1, _AP_S> dividend = (ap_private<_W1, _AP_S>(V)) << ((_W1>_AP_W)?F2:0);
+        ap_private<_W1, _AP_S2> divisior = ap_private<_W2, _AP_S2>(op2.V);
+        ap_private<_W1, _AP_S> ret = ap_private<_W1,_AP_S> ((_AP_S||_AP_S2) ? dividend.sdiv(divisior): dividend.udiv(divisior));
+        typename RType<_AP_W2, _AP_I2, _AP_S2>::div r;
+        r.V = ret;
+        return r;
+    }
+#define OP_BIN_AF(Sym, Rty, Width, Sign, Fun)                                \
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> \
+    INLINE typename RType<_AP_W2,_AP_I2,_AP_S2>::Rty  \
+    operator Sym (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const \
+    {                                                                        \
+	enum {_AP_F=_AP_W-_AP_I, F2=_AP_W2-_AP_I2};                                \
+	typename RType<_AP_W2,_AP_I2,_AP_S2>::Rty  r, lhs(*this), rhs(op2);        \
+	r.V = lhs.V.Fun(rhs.V);                                                \
+	return r;                                                                \
+    }                                                                        \
+    INLINE typename RType<_AP_W,_AP_I,_AP_S>::Rty                        \
+    operator Sym (const ap_fixed_base& op2) const                        \
+    {                                                                        \
+	typename RType<_AP_W,_AP_I,_AP_S>::Rty  r;                                \
+	r.V = V Sym op2.V;                                                        \
+	return r;                                                                \
+    }                                                                        \
+
+    OP_BIN_AF(+, plus, plus_w, plus_s, Add)
+    OP_BIN_AF(-, minus, minus_w, minus_s, Sub)
+
+#define OP_LOGIC_BIN_AF(Sym, Rty, Width, Sign)                                \
+        template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> \
+        INLINE typename RType<_AP_W2,_AP_I2,_AP_S2>::Rty                \
+        operator Sym (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const \
+    {                                                                        \
+	typename RType<_AP_W2,_AP_I2,_AP_S2>::Rty r, lhs(*this), rhs(op2);  \
+	r.V=lhs.V Sym rhs.V;                                                \
+	return r;                                                                \
+    }                                                                        \
+    INLINE typename RType<_AP_W,_AP_I,_AP_S>::Rty                        \
+    operator Sym (const ap_fixed_base& op2) const                        \
+    {                                                                        \
+	typename RType<_AP_W,_AP_I,_AP_S>::Rty  r; \
+	r.V = V Sym op2.V;                                                        \
+	return r;                                                                \
+    }                                                                        \
+    INLINE typename RType<_AP_W,_AP_I,_AP_S>::Rty  operator Sym(int op2) const \
+    {                                                                        \
+	return V Sym (op2<<(_AP_W - _AP_I));                                \
+    }
+    OP_LOGIC_BIN_AF(&, logic, logic_w, logic_s)
+    OP_LOGIC_BIN_AF(|, logic, logic_w, logic_s)
+    OP_LOGIC_BIN_AF(^, logic, logic_w, logic_s)
+
+    ///Arithmic : assign
+    //-------------------------------------------------------------------------
+#define OP_ASSIGN_AF(Sym) \
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> \
+    INLINE ap_fixed_base& operator Sym##= (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) \
+    { \
+        *this=operator Sym (op2) ; \
+        return *this; \
+    }
+
+    OP_ASSIGN_AF(+)
+    OP_ASSIGN_AF(-)
+    OP_ASSIGN_AF(&)
+    OP_ASSIGN_AF(|)
+    OP_ASSIGN_AF(^)
+    OP_ASSIGN_AF(*)
+    OP_ASSIGN_AF(/)
+
+    ///Prefix increment, decrement
+    //-------------------------------------------------------------------------
+    INLINE ap_fixed_base& operator ++() {
+        operator+=(ap_fixed_base<1,1,false>(1)); //SystemC's semantics
+        return *this;
+    }
+
+    INLINE ap_fixed_base& operator --() {
+        operator-=(ap_fixed_base<1,1,false>(1)); //SystemC's semantics
+        return *this;
+    }
+
+    //Postfix increment, decrement
+    //-------------------------------------------------------------------------
+    INLINE const ap_fixed_base operator ++(int) {
+        ap_fixed_base t(*this);
+        operator++();
+        return t;
+    }
+
+    INLINE const ap_fixed_base operator --(int) {
+        ap_fixed_base t = *this;
+        operator--();
+        return t;
+    }
+
+    ///Unary arithmetic
+    //-------------------------------------------------------------------------
+    INLINE ap_fixed_base operator +() {return *this;}
+
+    INLINE ap_fixed_base<_AP_W + 1, _AP_I + 1, true> operator -() const {
+        ap_fixed_base<_AP_W + 1, _AP_I + 1, true> Tmp(*this);
+        Tmp.V = - Tmp.V;
+        return Tmp;
+    }
+
+    INLINE ap_fixed_base<_AP_W,_AP_I,true,_AP_Q,_AP_O, _AP_N> getNeg() {
+        ap_fixed_base<_AP_W,_AP_I,true,_AP_Q,_AP_O, _AP_N> Tmp(*this);
+        Tmp.V=-Tmp.V;
+        return Tmp;
+    }
+
+    ///Not (!)
+    //-------------------------------------------------------------------------
+    INLINE bool operator !() const {
+	return !V;
+    }
+
+    ///Bitwise complement
+    //-------------------------------------------------------------------------
+    INLINE ap_fixed_base<_AP_W, _AP_I, _AP_S>
+         operator ~() const {
+        ap_fixed_base<_AP_W, _AP_I, _AP_S> res(*this);
+        res.V.flip();
+        return res;
+    }
+
+    ///Shift
+    ///template argument as shift value
+    template<int _AP_SHIFT>
+    INLINE ap_fixed_base<_AP_W, _AP_I + _AP_SHIFT, _AP_S> lshift () const {
+        ap_fixed_base<_AP_W, _AP_I + _AP_SHIFT, _AP_S> r;
+        r.V = V;
+        return r;
+    }
+
+    template<int _AP_SHIFT>
+    INLINE ap_fixed_base<_AP_W, _AP_I - _AP_SHIFT, _AP_S> rshift () const {
+        ap_fixed_base<_AP_W, _AP_I - _AP_SHIFT, _AP_S> r;
+        r.V = V;
+        return r;
+    }
+
+    //Because the return type is the type of the the first operand, shift assign
+    //operators do not carry out any quantization or overflow
+    //While systemc, shift assigns for sc_fixed/sc_ufixed will result in
+    //quantization or overflow (depending on the mode of the first operand)
+    //-------------------------------------------------------------------------
+    INLINE ap_fixed_base operator << (int sh) const {
+        ap_fixed_base r;
+        bool isNeg=(sh&0x80000000) != 0;
+        sh=isNeg?-sh:sh;
+        bool shiftoverflow = sh >= _AP_W;
+        bool NegSrc = V.isNegative();
+        if (isNeg) {
+            if (shiftoverflow)
+                NegSrc?r.V.set():r.V.clear();
+            else
+                r.V=_AP_S?V.ashr(sh):V.lshr(sh);
+        } else {
+            if (shiftoverflow)
+                r.V.clear();
+            else
+                r.V=V<<sh;
+        }
+#ifdef __SC_COMPATIBLE__
+        if (sh == 0) return r;
+        if (isNeg == true && _AP_Q != AP_TRN) {
+           bool qb = false;
+           if (sh <= _AP_W) qb = V[sh - 1];
+           bool rb  =  false;
+           if (sh > 1 && sh <= _AP_W)
+                rb = (V << (_AP_W - sh + 1 )) != 0;
+           else if (sh > _AP_W)
+                rb = V != 0;
+           r.quantization_adjust(qb, rb, NegSrc);
+        } else if (isNeg == false && _AP_O != AP_WRAP) {
+             bool allones, allzeros;
+             if (sh < _AP_W ) {
+                 ap_private<_AP_W, _AP_S > range1 = V.lshr(_AP_W - sh - 1);
+                 allones = range1.isAllOnesValue();
+                 allzeros = range1.isMinValue();
+             } else {
+                 allones = false;
+                 allzeros = V.isMinValue();
+             }
+             bool overflow = !allzeros && !NegSrc;
+             bool underflow = !allones && NegSrc;
+             if (_AP_O == AP_SAT_SYM && _AP_S)
+                  underflow |= NegSrc && (_AP_W > 1 ? r.V.isMinSignedValue():true);
+             bool lD = false;
+             if ( sh < _AP_W ) lD = V[_AP_W - sh - 1];
+             r.overflow_adjust(underflow, overflow, lD, NegSrc);
+        }
+#endif
+        return r;
+    }
+
+    template<int _AP_W2>
+    INLINE ap_fixed_base operator<<(const ap_private<_AP_W2,true>& op2) const {
+        int sh = op2.to_int();
+        return operator << (sh);
+    }
+
+    INLINE ap_fixed_base operator << (unsigned int sh ) const {
+        ap_fixed_base r;
+        bool shiftoverflow = sh >= _AP_W;
+        r.V = shiftoverflow ? ap_private<_AP_W, _AP_S >(0) : V << sh;
+        if (sh == 0) return r;
+#ifdef __SC_COMPATIBLE__
+        bool NegSrc = V.isNegative();
+        if (_AP_O != AP_WRAP) {
+             bool allones, allzeros;
+             if (sh < _AP_W ) {
+                 ap_private<_AP_W, _AP_S > range1 = V.lshr(_AP_W - sh -1);
+                 allones = range1.isAllOnesValue();
+                 allzeros = range1.isMinValue();
+             } else {
+                 allones = false;
+                 allzeros = V.isMinValue();
+             }
+             bool overflow = !allzeros && !NegSrc;
+             bool underflow = !allones && NegSrc;
+             if (_AP_O == AP_SAT_SYM && _AP_S)
+                  underflow |= NegSrc && (_AP_W > 1 ? r.V.isMinSignedValue():true);
+             bool lD = false;
+             if ( sh < _AP_W ) lD = V[_AP_W - sh - 1];
+             r.overflow_adjust(underflow, overflow, lD, NegSrc);
+        }
+#endif
+        return r;
+    }
+
+    template<int _AP_W2>
+    INLINE ap_fixed_base operator << (const ap_private<_AP_W2,false>& op2) const {
+        unsigned int sh = op2.to_uint();
+        return operator << (sh);
+    }
+
+    INLINE ap_fixed_base operator >> (int sh) const {
+        ap_fixed_base r;
+        bool isNeg=(sh&0x80000000) != 0;
+        bool NegSrc = V.isNegative();
+        sh=isNeg?-sh:sh;
+        bool shiftoverflow = sh >= _AP_W;
+        if (isNeg && !shiftoverflow) r.V=V<<sh;
+        else {
+            if (shiftoverflow)
+                NegSrc?r.V.set():r.V.clear();
+            else
+                r.V=_AP_S?V.ashr(sh):V.lshr(sh);
+        }
+#ifdef __SC_COMPATIBLE__
+        if (sh == 0) return r;
+        if (isNeg == false && _AP_Q != AP_TRN) {
+           bool qb = false;
+           if (sh <= _AP_W) qb = V[sh - 1];
+           bool rb  =  false;
+           if (sh > 1 && sh <= _AP_W)
+                rb = (V << (_AP_W - sh + 1 )) != 0;
+           else if (sh > _AP_W)
+                rb = V != 0;
+           r.quantization_adjust(qb, rb, NegSrc);
+        } else if (isNeg == true && _AP_O != AP_WRAP) {
+             bool allones, allzeros;
+             if (sh < _AP_W ) {
+                 ap_private<_AP_W, _AP_S > range1 = V.lshr(_AP_W - sh - 1);
+                 allones = range1.isAllOnesValue();
+                 allzeros = range1.isMinValue();
+             } else {
+                 allones = false;
+                 allzeros = V.isMinValue();
+             }
+             bool overflow = !allzeros && !NegSrc;
+             bool underflow = !allones && NegSrc;
+             if (_AP_O == AP_SAT_SYM && _AP_S)
+                  underflow |= NegSrc && (_AP_W > 1 ? r.V.isMinSignedValue():true);
+             bool lD = false;
+             if ( sh < _AP_W ) lD = V[_AP_W - sh - 1];
+             r.overflow_adjust(underflow, overflow, lD, NegSrc);
+        }
+#endif
+        return r;
+    }
+
+    template<int _AP_W2>
+    INLINE ap_fixed_base operator >> (const ap_private<_AP_W2,true>& op2) const {
+        int sh = op2.to_int();
+        return operator >> (sh);
+    }
+
+    INLINE ap_fixed_base operator >> (unsigned int sh) const {
+        ap_fixed_base r;
+        bool NegSrc = V.isNegative();
+        bool shiftoverflow = sh >= _AP_W;
+        if (shiftoverflow)
+            NegSrc?r.V.set():r.V.clear();
+        else
+            r.V=_AP_S?V.ashr(sh):V.lshr(sh);
+#ifdef __SC_COMPATIBLE__
+        if (sh == 0) return r;
+        if (_AP_Q != AP_TRN) {
+           bool qb = false;
+           if (sh <= _AP_W) qb = V[sh - 1];
+           bool rb  =  false;
+           if (sh > 1 && sh <= _AP_W)
+                rb = (V << (_AP_W - sh + 1 )) != 0;
+           else if (sh > _AP_W)
+                rb = V != 0;
+           r.quantization_adjust(qb, rb, NegSrc);
+        }
+#endif
+        return r;
+    }
+
+    template<int _AP_W2>
+    INLINE ap_fixed_base operator >> (const ap_private<_AP_W2,false>& op2) const {
+        unsigned int sh = op2.to_uint();
+        return operator >> (sh);
+    }
+
+    ///shift assign
+    //-------------------------------------------------------------------------
+#define OP_AP_SHIFT_AP_ASSIGN_AF(Sym) \
+    template<int _AP_W2, bool _AP_S2> \
+    INLINE ap_fixed_base& operator Sym##=(const ap_private<_AP_W2,_AP_S2>& op2) \
+    { \
+        *this=operator Sym (op2); \
+        return *this; \
+    }
+
+    OP_AP_SHIFT_AP_ASSIGN_AF(<<)
+    OP_AP_SHIFT_AP_ASSIGN_AF(>>)
+
+    ///Support shift(ap_fixed_base)
+#define OP_AP_SHIFT_AF(Sym) \
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> \
+    INLINE ap_fixed_base operator Sym (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const \
+    { \
+        return operator Sym (op2.to_ap_private()); \
+    } \
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> \
+    INLINE ap_fixed_base& operator Sym##= (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) \
+    { \
+        *this=operator Sym (op2); \
+        return *this; \
+    }
+
+    OP_AP_SHIFT_AF(<<)
+    OP_AP_SHIFT_AF(>>)
+
+    INLINE ap_fixed_base& operator >>= (unsigned int sh) {
+        *this = operator >> (sh);
+        return *this;
+    }
+
+    INLINE ap_fixed_base& operator <<= (unsigned int sh) {
+        *this = operator << (sh);
+        return *this;
+    }
+
+    INLINE ap_fixed_base& operator >>= (int sh) {
+        *this = operator >> (sh);
+        return *this;
+    }
+
+    INLINE ap_fixed_base& operator <<= (int sh) {
+        *this = operator << (sh);
+        return *this;
+    }
+
+    ///Comparisons
+    //-------------------------------------------------------------------------
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator == (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+	enum {_AP_F=_AP_W-_AP_I,F2=_AP_W2-_AP_I2, shAmt1 = AP_MAX(F2-_AP_F, 0), shAmt2 = AP_MAX(_AP_F-F2,0), _AP_W3 = (_AP_F==F2) ? AP_MAX(_AP_W,_AP_W2) : AP_MAX(_AP_W+shAmt1, _AP_W2+shAmt2)};
+	ap_private<_AP_W3, _AP_S > OP1= ap_private<_AP_W3, _AP_S >(V)<<shAmt1;
+	ap_private<_AP_W3,_AP_S2 > OP2=ap_private<_AP_W3,_AP_S2 >(op2.V)<<shAmt2;
+	return OP1 == OP2;
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator != (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+        return !(*this==op2);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator > (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+	enum {_AP_F=_AP_W-_AP_I,F2=_AP_W2-_AP_I2, shAmt1 = AP_MAX(F2-_AP_F, 0), shAmt2 = AP_MAX(_AP_F-F2,0), _AP_W3 = (_AP_F==F2) ? AP_MAX(_AP_W,_AP_W2) : AP_MAX(_AP_W+shAmt1, _AP_W2+shAmt2)};
+	ap_private<_AP_W3, _AP_S > OP1= ap_private<_AP_W3, _AP_S >(V)<<shAmt1;
+	ap_private<_AP_W3,_AP_S2 > OP2=ap_private<_AP_W3,_AP_S2 >(op2.V)<<shAmt2;
+	if (_AP_S||_AP_S2)
+	    return OP1.sgt(OP2);
+	else
+	    return OP1.ugt(OP2);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator <= (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+        return !(*this>op2);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator < (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+	enum {_AP_F=_AP_W-_AP_I,F2=_AP_W2-_AP_I2, shAmt1 = AP_MAX(F2-_AP_F, 0), shAmt2 = AP_MAX(_AP_F-F2,0), _AP_W3 = (_AP_F==F2) ? AP_MAX(_AP_W,_AP_W2) : AP_MAX(_AP_W+shAmt1, _AP_W2+shAmt2)};
+	ap_private<_AP_W3, _AP_S > OP1= ap_private<_AP_W3, _AP_S >(V)<<shAmt1;
+	ap_private<_AP_W3,_AP_S2 > OP2=ap_private<_AP_W3,_AP_S2 >(op2.V)<<shAmt2;
+	if (_AP_S||_AP_S2)
+	    return OP1.slt(OP2);
+	else
+	    return OP1.ult(OP2);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE bool operator >= (const ap_fixed_base<_AP_W2,_AP_I2,_AP_S2,_AP_Q2,_AP_O2, _AP_N2>& op2) const {
+        return !(*this<op2);
+    }
+
+#define DOUBLE_CMP_AF(Sym) \
+    INLINE bool operator Sym (double d) const { \
+        return to_double() Sym d; \
+    }
+
+    DOUBLE_CMP_AF(==)
+    DOUBLE_CMP_AF(!=)
+    DOUBLE_CMP_AF(>)
+    DOUBLE_CMP_AF(>=)
+    DOUBLE_CMP_AF(<)
+    DOUBLE_CMP_AF(<=)
+
+    // Bit and Slice Select
+    INLINE af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N> operator [] (unsigned int index) {
+        assert(index<_AP_W&&"Attemping to read bit beyond MSB");
+        return af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>(this, index);
+    }
+
+    INLINE af_bit_ref<_AP_W, _AP_I,_AP_S,_AP_Q,_AP_O, _AP_N> bit(unsigned int index) {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>(this, index);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N> bit (const ap_private<_AP_W2,_AP_S2>& index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>(this, index.to_int());
+    }
+
+    INLINE bool bit (unsigned int index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return V[index];
+    }
+
+    INLINE bool operator [] (unsigned int index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return V[index];
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool bit (const ap_private<_AP_W2, _AP_S2>& index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return V[index.to_uint()];
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator [] (const ap_private<_AP_W2, _AP_S2>& index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return V[index.to_uint()];
+    }
+
+    INLINE af_bit_ref<_AP_W, _AP_I,_AP_S,_AP_Q,_AP_O, _AP_N> get_bit(int index) {
+        assert(index < _AP_I && "Attempting to read bit beyond MSB");
+        assert(index >= _AP_I - _AP_W&& "Attempting to read bit beyond MSB");
+        return af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>(this, index + _AP_W - _AP_I);
+    }
+
+    template<int _AP_W2>
+    INLINE af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N> get_bit (const ap_private<_AP_W2, true>& index) {
+        assert(index >= _AP_I - _AP_W && "Attempting to read bit with negative index");
+        assert(index < _AP_I && "Attempting to read bit beyond MSB");
+        return af_bit_ref<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>(this, index.to_int() + _AP_W - _AP_I);
+    }
+
+    INLINE bool get_bit (int index) const {
+        assert(index >= _AP_I - _AP_W && "Attempting to read bit with negative index");
+        assert(index < _AP_I && "Attempting to read bit beyond MSB");
+        return V[index + _AP_W - _AP_I];
+    }
+
+    template<int _AP_W2>
+    INLINE bool get_bit (const ap_private<_AP_W2, true>& index) const {
+        assert(index >= _AP_I - _AP_W && "Attempting to read bit with negative index");
+        assert(index < _AP_I && "Attempting to read bit beyond MSB");
+        return V[index.to_int() + _AP_W - _AP_I];
+    }
+
+    INLINE af_range_ref<_AP_W,_AP_I,_AP_S, _AP_Q, _AP_O, _AP_N>
+    range(int Hi, int Lo) {
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, Hi, Lo);
+    }
+
+    INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>
+    operator () (int Hi, int Lo) {
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, Hi, Lo);
+    }
+
+    INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>
+    range(int Hi, int Lo) const {
+        assert((Hi < _AP_W) && (Lo < _AP_W) &&"Out of bounds in range()");
+        return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(const_cast<ap_fixed_base*>(this), Hi, Lo);
+    }
+
+    INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>
+    operator () (int Hi, int Lo) const {
+        return this->range(Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE af_range_ref<_AP_W,_AP_I,_AP_S, _AP_Q, _AP_O, _AP_N>
+    range(const ap_private<_AP_W2, _AP_S2> &HiIdx,
+          const ap_private<_AP_W3, _AP_S3> &LoIdx) {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE af_range_ref<_AP_W,_AP_I,_AP_S, _AP_Q, _AP_O, _AP_N>
+    operator () (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+                 const ap_private<_AP_W3, _AP_S3> &LoIdx) {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE af_range_ref<_AP_W,_AP_I,_AP_S, _AP_Q, _AP_O, _AP_N>
+    range(const ap_private<_AP_W2, _AP_S2> &HiIdx,
+          const ap_private<_AP_W3, _AP_S3> &LoIdx) const {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>(const_cast<
+               ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>*>(this),
+               Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE af_range_ref<_AP_W,_AP_I,_AP_S, _AP_Q, _AP_O, _AP_N>
+    operator () (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+                 const ap_private<_AP_W3, _AP_S3> &LoIdx) const {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        return this->range(Hi, Lo);
+    }
+
+    INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>
+    range() {
+        return this->range(_AP_W - 1, 0);
+    }
+
+    INLINE af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>
+    range() const {
+        return this->range(_AP_W - 1, 0);
+    }
+
+    INLINE bool is_zero () const {
+        return V.isMinValue();
+    }
+
+    INLINE bool is_neg () const {
+        if (V.isNegative())
+            return true;
+        return false;
+    }
+
+    INLINE int wl () const {
+        return _AP_W;
+    }
+
+    INLINE int iwl () const {
+        return _AP_I;
+    }
+
+    INLINE ap_q_mode q_mode () const {
+        return _AP_Q;
+    }
+
+    INLINE ap_o_mode o_mode () const {
+        return _AP_O;
+    }
+
+    INLINE int n_bits () const {
+        return 0;
+    }
+
+  //private:
+public:
+    ap_private<_AP_W, _AP_S> V;
+};
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+std::string ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>::to_string(
+                                            uint8_t radix, bool sign) const {
+    std::string str;
+    str.clear();
+    char step;
+    std::string prefix;
+    switch (radix) {
+        case 2  : prefix = "0b"; step = 1; break;
+        case 8  : prefix = "0o"; step = 3; break;
+        case 16 : prefix = "0x"; step = 4; break;
+        default : break;
+    }
+    if (_AP_W <= _AP_I)
+        str = this->to_ap_private().to_string(radix,
+               radix == 10 ? _AP_S : sign);
+    else {
+        if (radix == 10) {
+            bool isNeg = _AP_S && V.isNegative();
+            if (_AP_I > 0) {
+                ap_private<AP_MAX(_AP_I, 1),  _AP_S> int_part(0);
+                int_part = this->to_ap_private();
+                str += int_part.to_string(radix, false);
+            } else {
+                if (isNeg) str += '-';
+            }
+            ap_fixed_base<_AP_W, _AP_I, _AP_S> tmp(*this);
+            if (isNeg && _AP_I <= 0) tmp = -tmp;
+            ap_fixed_base<_AP_W - AP_MIN(_AP_I, 0), 0, false> frac_part = tmp;
+            if (frac_part == 0) return str;
+            str += ".";
+            while (frac_part != 0) {
+               char digit = (frac_part * radix).to_ap_private();
+               str += static_cast<char>(digit + '0');
+               frac_part *= radix;
+            }
+        } else {
+            if (_AP_I > 0) {
+                for (signed i = _AP_W - _AP_I; i < _AP_W; i += step) {
+
+                     char digit = (char)(this->range(AP_MIN(i + step - 1, _AP_W - 1), i));
+                     str = (digit < 10 ? static_cast<char>(digit + '0') :
+                                        static_cast<char>(digit - 10 + 'a')) + str;
+                }
+            }
+            str += '.';
+            ap_fixed_base<AP_MAX(_AP_W - _AP_I, 1), 0, _AP_S> tmp(*this);
+            for (signed i = _AP_W - _AP_I - 1; i >= 0; i -= step) {
+                char digit = (char)(tmp.range(i, AP_MAX(0, i - step + 1)));
+                str += digit < 10 ? static_cast<char>(digit + '0') :
+                       static_cast<char>(digit - 10 + 'a');
+            }
+        }
+    }
+    str = prefix + str;
+    return str;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void b_not(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {
+    ret.V = op.V;
+    ret.V.flip();
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void b_and(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op1,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+    ret.V =  op1.V & op2.V;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void b_or(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op1,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+    ret.V =  op1.V | op2.V;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void b_xor(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op1,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op2) {
+    ret.V =  op1.V ^ op2.V;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+         int _AP_N, int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+          ap_o_mode _AP_O2, int _AP_N2>
+INLINE void neg(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op) {
+    ap_fixed_base<_AP_W2+!_AP_S2, _AP_I2+!_AP_S2, true, _AP_Q2, _AP_O2, _AP_N2> Tmp;
+    Tmp.V = - op.V;
+    ret = Tmp;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+         int _AP_N>
+INLINE void neg(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op) {
+    ret.V = -op.V;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+         int _AP_N, int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+         ap_o_mode _AP_O2, int _AP_N2>
+INLINE void lshift(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op,
+            int i) {
+    ap_fixed_base<_AP_W2 - _AP_I2 + AP_MAX(_AP_I, _AP_I2), AP_MAX(_AP_I, _AP_I2), _AP_S2, _AP_Q2, _AP_O2, _AP_N2> Tmp;
+    Tmp = op;
+    Tmp.V <<= i;
+    ret = Tmp;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+         int _AP_N>
+INLINE void lshift(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,
+            int i) {
+    ret.V = op.V << i;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+         int _AP_N, int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2,
+         ap_o_mode _AP_O2, int _AP_N2>
+INLINE void rshift(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2>& op,
+            int i) {
+    ap_fixed_base<_AP_I2 + AP_MAX(_AP_W - _AP_I, _AP_W2 - _AP_I2), _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> Tmp;
+    Tmp = op;
+    Tmp.V = _AP_S2 ? Tmp.V.ashr(i): Tmp.V.lshr(i);
+    ret = Tmp;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,
+         int _AP_N>
+INLINE void rshift(ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& ret,
+            const ap_fixed_base<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N>& op,
+            int i) {
+    ret.V = _AP_S ? op.V.ashr(i): op.V.lshr(i);
+}
+
+#define AF_CTOR_SPEC_BASE(_AP_W,_AP_S,C_TYPE) \
+  template<> INLINE ap_fixed_base<_AP_W,_AP_W,_AP_S,AP_TRN,AP_WRAP>::ap_fixed_base(C_TYPE i_op):V(i_op) \
+    { \
+  }
+
+#define AF_CTOR_SPEC(__W,C_TYPE) \
+    AF_CTOR_SPEC_BASE(__W,true,C_TYPE) \
+    AF_CTOR_SPEC_BASE(__W,false,C_TYPE)
+
+AF_CTOR_SPEC(1,bool)
+AF_CTOR_SPEC(8, signed char)
+AF_CTOR_SPEC(8, unsigned char)
+AF_CTOR_SPEC(16, signed short)
+AF_CTOR_SPEC(16, unsigned short)
+AF_CTOR_SPEC(32, signed int)
+AF_CTOR_SPEC(32, unsigned int)
+AF_CTOR_SPEC(64, ap_slong)
+AF_CTOR_SPEC(64, ap_ulong)
+
+///Output streaming
+//-----------------------------------------------------------------------------
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE std::ostream&
+operator <<(std::ostream& os, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& x) {
+     os << x.to_double();
+     return os;
+}
+
+///Input streaming
+//-----------------------------------------------------------------------------
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE std::istream&
+operator >> (std::istream& os, ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& x) {
+     double d;
+     os >> d;
+     x = ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>(x);
+     return os;
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE void print(const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& x) {
+    ap_private<_AP_W,_AP_S> data=x.V;
+    if (_AP_I>0) {
+        const ap_private<_AP_I,_AP_S> p1=data>>(_AP_W-_AP_I);
+        print(p1);
+
+    } else
+        printf("0");
+    printf(".");
+    if (_AP_I<_AP_W) {
+        const ap_private<_AP_W-_AP_I,false> p2=data;
+        print(p2,false);
+    }
+}
+
+///Operators mixing Integers with ap_fixed_base
+//-----------------------------------------------------------------------------
+#if 1
+#define AF_BIN_OP_WITH_INT_SF(BIN_OP,C_TYPE,_AP_W2,_AP_S2,RTYPE)        \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP (const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) \
+  {                                                                        \
+    return op.operator BIN_OP(ap_private<_AP_W2,_AP_S2>(i_op));        \
+  }
+#define AF_BIN_OP_WITH_INT(BIN_OP, C_TYPE, _AP_W2,_AP_S2,RTYPE)                \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP (const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) \
+  {                                                                        \
+    return op.operator BIN_OP (ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op)); \
+  }                                                                        \
+                                                                        \
+  template<int _AP_W, int _AP_I, bool _AP_S,ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N > \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP (C_TYPE i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) \
+  {                                                                        \
+    return ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op).operator BIN_OP (op); \
+  }
+
+#else
+#define AF_BIN_OP_WITH_INT_SF(BIN_OP,C_TYPE,_AP_W2,_AP_S2,RTYPE)        \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP (const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) \
+  {                                                                        \
+    return op BIN_OP (i_op);                                        \
+  }
+#define AF_BIN_OP_WITH_INT(BIN_OP, C_TYPE, _AP_W2,_AP_S2,RTYPE)                \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP (const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) \
+  {                                                                        \
+    return op.V BIN_OP (i_op<<(_AP_W-_AP_I));                        \
+  }                                                                        \
+                                                                          \
+                                                                          \
+  template<int _AP_W, int _AP_I, bool _AP_S,ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N > \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP (C_TYPE i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) \
+  {                                                                        \
+    return ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op).operator BIN_OP (op);                        \
+  }
+
+#endif
+#if 1
+#define AF_REL_OP_WITH_INT(REL_OP, C_TYPE, _AP_W2,_AP_S2)                \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+    INLINE bool operator REL_OP (const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) \
+    { \
+        return op.operator REL_OP (ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op)); \
+    } \
+    \
+    \
+    template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+    INLINE bool operator REL_OP (C_TYPE i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) \
+    { \
+        return ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op).operator REL_OP (op); \
+    }
+#else
+#define AF_REL_OP_WITH_INT(REL_OP, C_TYPE, _AP_W2,_AP_S2) \
+    template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+    INLINE bool operator REL_OP (const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) \
+    { \
+      return op.V.operator REL_OP (i_op<<(_AP_W-_AP_I));        \
+    } \
+    \
+    \
+    template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+    INLINE bool operator REL_OP (C_TYPE i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) \
+    { \
+      return (i_op<<(_AP_W-_AP_I)) REL_OP (op.V.VAL);        \
+    }
+#endif
+#if 1
+#define AF_ASSIGN_OP_WITH_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& operator ASSIGN_OP ( ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) { \
+    return op.operator ASSIGN_OP (ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op)); \
+  }
+#define AF_ASSIGN_OP_WITH_INT_SF(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& operator ASSIGN_OP ( ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) { \
+    return op.operator ASSIGN_OP (ap_private<_AP_W2,_AP_S2>(i_op)); \
+  }
+#else
+#define AF_ASSIGN_OP_WITH_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& operator ASSIGN_OP ( ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) { \
+    return op.V.operator ASSIGN_OP (i_op);                                \
+  }
+#define AF_ASSIGN_OP_WITH_INT_SF(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& operator ASSIGN_OP ( ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, C_TYPE i_op) { \
+    return op.V.operator ASSIGN_OP (i_op); \
+  }
+#endif
+
+#define AF_OPS_WITH_INT(C_TYPE, WI, SI) \
+  AF_BIN_OP_WITH_INT(+, C_TYPE, WI, SI, plus) \
+  AF_BIN_OP_WITH_INT(-, C_TYPE, WI, SI, minus) \
+  AF_BIN_OP_WITH_INT(*, C_TYPE, WI, SI, mult) \
+  AF_BIN_OP_WITH_INT(/, C_TYPE, WI, SI, div) \
+  AF_BIN_OP_WITH_INT_SF(>>, C_TYPE, WI, SI, arg1) \
+  AF_BIN_OP_WITH_INT_SF(<<, C_TYPE, WI, SI, arg1) \
+  AF_BIN_OP_WITH_INT(&, C_TYPE, WI, SI, logic) \
+  AF_BIN_OP_WITH_INT(|, C_TYPE, WI, SI, logic) \
+  AF_BIN_OP_WITH_INT(^, C_TYPE, WI, SI, logic) \
+  \
+  AF_REL_OP_WITH_INT(==, C_TYPE, WI, SI) \
+  AF_REL_OP_WITH_INT(!=, C_TYPE, WI, SI) \
+  AF_REL_OP_WITH_INT(>, C_TYPE, WI, SI) \
+  AF_REL_OP_WITH_INT(>=, C_TYPE, WI, SI) \
+  AF_REL_OP_WITH_INT(<, C_TYPE, WI, SI) \
+  AF_REL_OP_WITH_INT(<=, C_TYPE, WI, SI) \
+  \
+  AF_ASSIGN_OP_WITH_INT(+=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT(-=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT(*=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT(/=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT_SF(>>=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT_SF(<<=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT(&=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT(|=, C_TYPE, WI, SI) \
+  AF_ASSIGN_OP_WITH_INT(^=, C_TYPE, WI, SI)
+
+AF_OPS_WITH_INT(bool, 1, false)
+AF_OPS_WITH_INT(char, 8, true)
+AF_OPS_WITH_INT(signed char, 8, true)
+AF_OPS_WITH_INT(unsigned char, 8, false)
+AF_OPS_WITH_INT(short, 16, true)
+AF_OPS_WITH_INT(unsigned short, 16, false)
+AF_OPS_WITH_INT(int, 32, true)
+AF_OPS_WITH_INT(unsigned int, 32, false)
+# if defined __x86_64__
+AF_OPS_WITH_INT(long, 64, true)
+AF_OPS_WITH_INT(unsigned long, 64, false)
+# else
+AF_OPS_WITH_INT(long, 32, true)
+AF_OPS_WITH_INT(unsigned long, 32, false)
+# endif
+AF_OPS_WITH_INT(ap_slong, 64, true)
+AF_OPS_WITH_INT(ap_ulong, 64, false)
+
+#define AF_BIN_OP_WITH_AP_INT(BIN_OP, RTYPE) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE typename ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>::template RType<_AP_W,_AP_I,_AP_S>::RTYPE \
+  operator BIN_OP ( const ap_private<_AP_W2,_AP_S2>& i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) { \
+    return ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op).operator BIN_OP (op); \
+  } \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE typename ap_fixed_base<_AP_W,_AP_I,_AP_S>::template RType<_AP_W2,_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP ( const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, const ap_private<_AP_W2,_AP_S2>& i_op) { \
+    return op.operator BIN_OP (ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op)); \
+  }
+
+#define AF_REL_OP_WITH_AP_INT(REL_OP) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, const ap_private<_AP_W2,_AP_S2>& i_op) { \
+    return op.operator REL_OP ( ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op)); \
+  } \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const ap_private<_AP_W2,_AP_S2>& i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) { \
+    return ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op).operator REL_OP (op); \
+  }
+
+#define AF_ASSIGN_OP_WITH_AP_INT(ASSIGN_OP) \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& operator ASSIGN_OP ( ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op, const ap_private<_AP_W2,_AP_S2>& i_op) { \
+    return op.operator ASSIGN_OP (ap_fixed_base<_AP_W2,_AP_W2,_AP_S2>(i_op)); \
+  } \
+  template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O,int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE ap_private<_AP_W2,_AP_S2>& operator ASSIGN_OP ( ap_private<_AP_W2,_AP_S2>& i_op, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op) { \
+    return i_op.operator ASSIGN_OP (op.to_ap_private()); \
+  }
+
+AF_BIN_OP_WITH_AP_INT(+, plus)
+AF_BIN_OP_WITH_AP_INT(-, minus)
+AF_BIN_OP_WITH_AP_INT(*, mult)
+AF_BIN_OP_WITH_AP_INT(/, div)
+AF_BIN_OP_WITH_AP_INT(&, logic)
+AF_BIN_OP_WITH_AP_INT(|, logic)
+AF_BIN_OP_WITH_AP_INT(^, logic)
+
+AF_REL_OP_WITH_AP_INT(==)
+AF_REL_OP_WITH_AP_INT(!=)
+AF_REL_OP_WITH_AP_INT(>)
+AF_REL_OP_WITH_AP_INT(>=)
+AF_REL_OP_WITH_AP_INT(<)
+AF_REL_OP_WITH_AP_INT(<=)
+
+AF_ASSIGN_OP_WITH_AP_INT(+=)
+AF_ASSIGN_OP_WITH_AP_INT(-=)
+AF_ASSIGN_OP_WITH_AP_INT(*=)
+AF_ASSIGN_OP_WITH_AP_INT(/=)
+AF_ASSIGN_OP_WITH_AP_INT(&=)
+AF_ASSIGN_OP_WITH_AP_INT(|=)
+AF_ASSIGN_OP_WITH_AP_INT(^=)
+
+#define AF_REF_REL_OP_MIX_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)  \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE bool operator REL_OP ( const af_range_ref<_AP_W,_AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op, C_TYPE op2) {  \
+    return (ap_private<_AP_W, false>(op)).operator REL_OP (ap_private<_AP_W2,_AP_S2>(op2));  \
+  }  \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE bool operator REL_OP ( C_TYPE op2, const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op) {  \
+    return ap_private<_AP_W2,_AP_S2>(op2).operator REL_OP (ap_private<_AP_W, false>(op));  \
+  } \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE bool operator REL_OP ( const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op, C_TYPE op2) {  \
+    return (bool(op)) REL_OP op2;  \
+  }  \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> \
+  INLINE bool operator REL_OP ( C_TYPE op2, const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op) {  \
+    return op2 REL_OP (bool(op));  \
+  }
+
+#define AF_REF_REL_MIX_INT(C_TYPE, _AP_WI, _AP_SI) \
+AF_REF_REL_OP_MIX_INT(>, C_TYPE, _AP_WI, _AP_SI) \
+AF_REF_REL_OP_MIX_INT(<, C_TYPE, _AP_WI, _AP_SI) \
+AF_REF_REL_OP_MIX_INT(>=, C_TYPE, _AP_WI, _AP_SI) \
+AF_REF_REL_OP_MIX_INT(<=, C_TYPE, _AP_WI, _AP_SI) \
+AF_REF_REL_OP_MIX_INT(==, C_TYPE, _AP_WI, _AP_SI) \
+AF_REF_REL_OP_MIX_INT(!=, C_TYPE, _AP_WI, _AP_SI)
+
+AF_REF_REL_MIX_INT(bool, 1, false)
+AF_REF_REL_MIX_INT(char, 8, true)
+AF_REF_REL_MIX_INT(signed char, 8, true)
+AF_REF_REL_MIX_INT(unsigned char, 8, false)
+AF_REF_REL_MIX_INT(short, 16, true)
+AF_REF_REL_MIX_INT(unsigned short, 16, false)
+AF_REF_REL_MIX_INT(int, 32, true)
+AF_REF_REL_MIX_INT(unsigned int, 32, false)
+# if defined __x86_64__
+AF_REF_REL_MIX_INT(long, 64, true)
+AF_REF_REL_MIX_INT(unsigned long, 64, false)
+# else
+AF_REF_REL_MIX_INT(long, 32, true)
+AF_REF_REL_MIX_INT(unsigned long, 32, false)
+# endif
+AF_REF_REL_MIX_INT(ap_slong, 64, true)
+AF_REF_REL_MIX_INT(ap_ulong, 64, false)
+
+#define AF_REF_REL_OP_AP_INT(REL_OP)  \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const af_range_ref<_AP_W,_AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op, const ap_private<_AP_W2, _AP_S> &op2) {  \
+    return (ap_private<_AP_W, false>(op)).operator REL_OP (op2);  \
+  }  \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP (const ap_private<_AP_W2, _AP_S2> &op2, const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op) {  \
+    return op2.operator REL_OP (ap_private<_AP_W, false>(op));  \
+  } \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op, const ap_private<_AP_W2, _AP_S2> &op2) {  \
+    return (ap_private<1, false>(op)).operator REL_OP (op2);  \
+  }  \
+template<int _AP_W, int _AP_I, bool _AP_S, \
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const ap_private<_AP_W2, _AP_S2> &op2, const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op) {  \
+    return op2.operator REL_OP (ap_private<1,false>(op));  \
+  }
+
+AF_REF_REL_OP_AP_INT(>)
+AF_REF_REL_OP_AP_INT(<)
+AF_REF_REL_OP_AP_INT(>=)
+AF_REF_REL_OP_AP_INT(<=)
+AF_REF_REL_OP_AP_INT(==)
+AF_REF_REL_OP_AP_INT(!=)
+
+// Relational Operators with double
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE bool operator == ( double op1, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op2) {
+    return op2.operator == (op1);
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE bool operator != ( double op1, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op2) {
+    return op2.operator != (op1);
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE bool operator > ( double op1, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op2) {
+    return op2.operator < (op1);
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE bool operator >= ( double op1, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op2) {
+    return op2.operator <= (op1);
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE bool operator < ( double op1, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op2) {
+    return op2.operator > (op1);
+}
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N>
+INLINE bool operator <= ( double op1, const ap_fixed_base<_AP_W,_AP_I,_AP_S,_AP_Q,_AP_O, _AP_N>& op2) {
+    return op2.operator >= (op1);
+}
+
+#endif /* #ifndef __AESL_GCC_AP_FIXED_H__ */
\ No newline at end of file
diff --git a/hls_2018/router_03_boardstr/etc/ap_int_sim.h b/hls_2018/router_03_boardstr/etc/ap_int_sim.h
new file mode 100755
index 0000000..887ccd8
--- /dev/null
+++ b/hls_2018/router_03_boardstr/etc/ap_int_sim.h
@@ -0,0 +1,1629 @@
+/*
+ * Copyright 2012 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __AESL_GCC_AP_INT_H__
+#define __AESL_GCC_AP_INT_H__
+
+#ifndef __cplusplus
+#error C++ is required to include this header file
+#endif /* #ifndef __cplusplus */
+
+#undef _AP_DEBUG_
+#include <stdio.h>
+#include <iostream>
+
+// for safety
+#if (defined(_AP_N)|| defined(_AP_C))
+#error One or more of the following is defined: _AP_N, _AP_C. Definition conflicts with their usage as template parameters.
+#endif /* #if (defined(_AP_N)|| defined(_AP_C)) */
+
+// for safety
+#if (defined(_AP_W) || defined(_AP_I) || defined(_AP_S) || defined(_AP_Q) || defined(_AP_O) || defined(_AP_W2) || defined(_AP_I2) || defined(_AP_S2) || defined(_AP_Q2) || defined(_AP_O2))
+#error One or more of the following is defined: _AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2. Definition conflicts with their usage as template parameters.
+#endif /* #if (defined(_AP_W) || defined(_AP_I) || defined(_AP_S) || defined(_AP_Q) || defined(_AP_O) || defined(_AP_W2) || defined(_AP_I2) || defined(_AP_S2) || defined(_AP_Q2) || defined(_AP_O2)) */
+
+//for safety
+#if (defined(_AP_W3) || defined(_AP_S3) || defined(_AP_W4) || defined(_AP_S4))
+#error One or more of the following is defined: _AP_W3, _AP_S3, _AP_W4,_AP_S4. Definition conflicts with their usage as template parameters.
+#endif /* #if (defined(_AP_W3) || defined(_AP_S3) || defined(_AP_W4) || defined(_AP_S4)) */
+
+//for safety
+#if (defined(_AP_W1) || defined(_AP_S1) || defined(_AP_I1) || defined(_AP_T) || defined(_AP_T1) || defined(_AP_T2) || defined(_AP_T3) || defined(_AP_T4))
+#error One or more of the following is defined: _AP_W1, _AP_S1, _AP_I1, _AP_T,  _AP_T1, _AP_T2, _AP_T3, _AP_T4. Definition conflicts with their usage as template parameters.
+#endif /* #if (defined(_AP_W1) || defined(_AP_S1) || defined(_AP_I1) || defined(_AP_T) || defined(_AP_T1) || defined(_AP_T2) || defined(_AP_T3) || defined(_AP_T4)) */
+
+#define __AESL_APDT_IN_SCFLOW__
+#ifndef __AESL_APDT_IN_SCFLOW__
+    #include "etc/ap_private.h"
+#else
+    #include "../etc/ap_private.h"
+#endif /* #ifndef __AESL_APDT_IN_SCFLOW__ */
+
+#ifdef _AP_DEBUG_
+    #define AP_DEBUG(s) s
+#else
+    #define AP_DEBUG(s)
+#endif /* #ifdef _AP_DEBUG_ */
+
+#ifndef __SIMULATION__
+    #define __SIMULATION__
+#endif /* #ifndef __SIMULATION__ */
+
+#if !(defined SYSTEMC_H) && !(defined SYSTEMC_INCLUDED)
+    #ifndef SC_TRN
+        #define SC_TRN AP_TRN
+    #endif /* #ifndef SC_TRN */
+    #ifndef SC_RND
+        #define SC_RND AP_RND
+    #endif /* #ifndef SC_RND */
+    #ifndef SC_TRN_ZERO
+        #define SC_TRN_ZERO AP_TRN_ZERO
+    #endif /* #ifndef SC_TRN_ZERO */
+    #ifndef SC_RND_ZERO
+        #define SC_RND_ZERO AP_RND_ZERO
+    #endif /* #ifndef SC_RND_ZERO */
+    #ifndef SC_RND_INF
+        #define SC_RND_INF AP_RND_INF
+    #endif /* #ifndef SC_RND_INF */
+    #ifndef SC_RND_MIN_INF
+        #define SC_RND_MIN_INF AP_RND_MIN_INF
+    #endif /* #ifndef SC_RND_MIN_INF */
+    #ifndef SC_RND_CONV
+        #define SC_RND_CONV AP_RND_CONV
+    #endif /* #ifndef SC_RND_CONV */
+    #ifndef SC_WRAP
+        #define SC_WRAP AP_WRAP
+    #endif /* #ifndef SC_WRAP */
+    #ifndef SC_SAT
+        #define SC_SAT AP_SAT
+    #endif /* #ifndef SC_SAT */
+    #ifndef SC_SAT_ZERO
+        #define SC_SAT_ZERO AP_SAT_ZERO
+    #endif /* #ifndef SC_SAT_ZERO */
+    #ifndef SC_SAT_SYM
+        #define SC_SAT_SYM AP_SAT_SYM
+    #endif /* #ifndef SC_SAT_SYM */
+    #ifndef SC_WRAP_SM
+        #define SC_WRAP_SM AP_WRAP_SM
+    #endif /* #ifndef SC_WRAP_SM */
+    #ifndef SC_BIN
+        #define SC_BIN 	AP_BIN
+    #endif /* #ifndef SC_BIN */
+    #ifndef SC_OCT
+        #define SC_OCT  AP_OCT
+    #endif /* #ifndef SC_OCT */
+    #ifndef SC_DEC
+        #define SC_DEC AP_DEC
+    #endif /* #ifndef SC_DEC */
+    #ifndef SC_HEX
+        #define SC_HEX AP_HEX
+    #endif /* #ifndef SC_HEX */
+#endif /* #if !(defined SYSTEMC_H) && !(defined SYSTEMC_INCLUDED) */
+#ifndef AP_INT_MAX_W
+#define AP_INT_MAX_W 1024
+#endif
+#define BIT_WIDTH_UPPER_LIMIT (1 << 15)
+#if AP_INT_MAX_W > BIT_WIDTH_UPPER_LIMIT
+#error "Bitwidth exceeds 32768 (1 << 15), the maximum allowed value"
+#endif
+#define MAX_MODE(BITS) ((BITS + 1023) / 1024)
+
+///Forward declaration
+template<int _AP_W, bool _AP_S> struct ap_range_ref;
+template<int _AP_W, bool _AP_S> struct ap_bit_ref;
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,
+         ap_o_mode _AP_O, int _AP_N> struct ap_fixed_base;
+template<int _AP_W, int _AP_I, bool _AP_S,
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> struct af_range_ref;
+template<int _AP_W, int _AP_I, bool _AP_S,
+         ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> struct af_bit_ref;
+template<int _AP_W> class ap_uint;
+
+enum {
+    AP_BIN = 2,
+    AP_OCT = 8,
+    AP_DEC = 10,
+    AP_HEX = 16
+};
+
+///Why to use reference?
+///Because we will operate the original object indirectly by operating the
+///result object directly after concating or part selecting
+
+///Proxy class which allows concatination to be used as rvalue(for reading) and
+//lvalue(for writing)
+
+/// Concatination reference.
+// ----------------------------------------------------------------
+template<int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2>
+struct ap_concat_ref {
+#ifdef _MSC_VER
+    #pragma warning(disable: 4521 4522)
+#endif /* #ifdef _MSC_VER */
+    enum {_AP_WR=_AP_W1+_AP_W2,};
+    _AP_T1& mbv1;
+    _AP_T2& mbv2;
+
+    INLINE ap_concat_ref(const ap_concat_ref<_AP_W1, _AP_T1,
+          _AP_W2, _AP_T2>& ref):
+           mbv1(ref.mbv1), mbv2(ref.mbv2) {}
+
+    INLINE ap_concat_ref(_AP_T1& bv1, _AP_T2& bv2):mbv1(bv1),mbv2(bv2) {}
+
+    template <int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref& operator = (const ap_private<_AP_W3,_AP_S3>& val) {
+        ap_private<_AP_W1+_AP_W2, false>  vval(val);
+        int W_ref1=mbv1.length();
+        int W_ref2=mbv2.length();
+        ap_private<_AP_W1,false> mask1(-1);
+        mask1>>=_AP_W1-W_ref1;
+        ap_private<_AP_W2,false> mask2(-1);
+        mask2>>=_AP_W2-W_ref2;
+        mbv1.set(ap_private<_AP_W1,false>((vval>>W_ref2)&mask1));
+        mbv2.set(ap_private<_AP_W2,false>(vval&mask2));
+        return *this;
+    }
+
+    INLINE ap_concat_ref& operator = (unsigned long long val) {
+        ap_private<_AP_W1+_AP_W2, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template<int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+    INLINE ap_concat_ref& operator =
+        (const ap_concat_ref <_AP_W3, _AP_T3, _AP_W4, _AP_T4>&  val) {
+        ap_private<_AP_W1+_AP_W2, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    INLINE ap_concat_ref& operator =
+        (const ap_concat_ref <_AP_W1, _AP_T1, _AP_W2, _AP_T2>&  val) {
+        ap_private<_AP_W1+_AP_W2, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template <int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref& operator =(const ap_bit_ref<_AP_W3, _AP_S3>& val) {
+        ap_private<_AP_W1+_AP_W2, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template <int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref& operator =(const ap_range_ref<_AP_W3,_AP_S3>& val) {
+        ap_private<_AP_W1+_AP_W2, false> tmpVal(val);
+        return operator =(tmpVal);
+    }
+
+    template<int _AP_W3, int _AP_I3, bool _AP_S3,
+             ap_q_mode _AP_Q3, ap_o_mode _AP_O3, int _AP_N3>
+    INLINE ap_concat_ref& operator= (const af_range_ref<_AP_W3, _AP_I3, _AP_S3,
+                                    _AP_Q3, _AP_O3, _AP_N3>& val) {
+        return operator = ((const ap_private<_AP_W3, false>)(val));
+    }
+
+    template<int _AP_W3, int _AP_I3, bool _AP_S3,
+             ap_q_mode _AP_Q3, ap_o_mode _AP_O3, int _AP_N3>
+    INLINE ap_concat_ref& operator= (const ap_fixed_base<_AP_W3, _AP_I3, _AP_S3,
+                                    _AP_Q3, _AP_O3, _AP_N3>& val) {
+        return operator = (val.to_ap_private());
+    }
+
+    template<int _AP_W3, int _AP_I3, bool _AP_S3,
+             ap_q_mode _AP_Q3, ap_o_mode _AP_O3, int _AP_N3>
+    INLINE ap_concat_ref& operator= (const af_bit_ref<_AP_W3, _AP_I3, _AP_S3,
+                                    _AP_Q3, _AP_O3, _AP_N3>& val) {
+        return operator=((unsigned long long)(bool)(val));
+     }
+
+
+    INLINE operator ap_private<_AP_WR, false> () const {
+        return get();
+    }
+
+    INLINE operator unsigned long long () const {
+         return get().to_uint64();
+    }
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_range_ref<_AP_W3, _AP_S3> >
+        operator, (const ap_range_ref<_AP_W3, _AP_S3> &a2) {
+        return ap_concat_ref<_AP_WR, ap_concat_ref,
+                           _AP_W3, ap_range_ref<_AP_W3, _AP_S3> >(*this,
+                           const_cast<ap_range_ref<_AP_W3, _AP_S3> &>(a2));
+    }
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_private<_AP_W3, _AP_S3> >
+        operator, (ap_private<_AP_W3, _AP_S3> &a2) {
+          return ap_concat_ref<_AP_WR, ap_concat_ref,
+                               _AP_W3, ap_private<_AP_W3, _AP_S3> >(*this, a2);
+    }
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, ap_private<_AP_W3, _AP_S3> >
+        operator, (const ap_private<_AP_W3, _AP_S3> &a2) {
+          return ap_concat_ref<_AP_WR, ap_concat_ref,
+                               _AP_W3, ap_private<_AP_W3, _AP_S3> >(*this,
+                               const_cast<ap_private<_AP_W3, _AP_S3>&>(a2));
+    }
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_concat_ref<_AP_WR, ap_concat_ref, 1, ap_bit_ref<_AP_W3, _AP_S3> >
+        operator, (const ap_bit_ref<_AP_W3, _AP_S3> &a2) {
+        return ap_concat_ref<_AP_WR, ap_concat_ref,
+                               1, ap_bit_ref<_AP_W3, _AP_S3> >(*this,
+                          const_cast<ap_bit_ref<_AP_W3, _AP_S3> &>(a2));
+    }
+
+    template<int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+    INLINE ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3+_AP_W4, ap_concat_ref<_AP_W3,_AP_T3,_AP_W4,_AP_T4> >
+        operator, (const ap_concat_ref<_AP_W3,_AP_T3,_AP_W4,_AP_T4> &a2)
+    {
+        return ap_concat_ref<_AP_WR, ap_concat_ref,
+                               _AP_W3+_AP_W4, ap_concat_ref<_AP_W3,_AP_T3,_AP_W4,
+                               _AP_T4> >(*this, const_cast<ap_concat_ref<_AP_W3,
+                               _AP_T3,_AP_W4, _AP_T4>& >(a2));
+    }
+
+    template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3, ap_o_mode _AP_O3, int _AP_N3>
+    INLINE
+    ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >
+    operator, (const af_range_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3,
+               _AP_O3, _AP_N3> &a2) {
+        return ap_concat_ref<_AP_WR, ap_concat_ref, _AP_W3, af_range_ref<_AP_W3,
+                _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >(*this,
+                const_cast<af_range_ref<_AP_W3,_AP_I3, _AP_S3, _AP_Q3,
+                _AP_O3, _AP_N3>& >(a2));
+    }
+
+    template <int _AP_W3, int _AP_I3, bool _AP_S3, ap_q_mode _AP_Q3, ap_o_mode _AP_O3, int _AP_N3>
+    INLINE
+    ap_concat_ref<_AP_WR, ap_concat_ref, 1, af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >
+    operator, (const af_bit_ref<_AP_W3, _AP_I3, _AP_S3, _AP_Q3,
+               _AP_O3, _AP_N3> &a2) {
+        return ap_concat_ref<_AP_WR, ap_concat_ref, 1, af_bit_ref<_AP_W3,
+                _AP_I3, _AP_S3, _AP_Q3, _AP_O3, _AP_N3> >(*this,
+                const_cast<af_bit_ref<_AP_W3,_AP_I3, _AP_S3,
+                _AP_Q3, _AP_O3, _AP_N3>& >(a2));
+    }
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_private<AP_MAX(_AP_WR,_AP_W3), _AP_S3>
+        operator & (const ap_private<_AP_W3,_AP_S3>& a2) {
+        return get() & a2;
+    }
+
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_private<AP_MAX(_AP_WR,_AP_W3), _AP_S3>
+        operator | (const ap_private<_AP_W3,_AP_S3>& a2) {
+        return get() | a2;
+    }
+
+
+    template<int _AP_W3, bool _AP_S3>
+    INLINE ap_private<AP_MAX(_AP_WR,_AP_W3), _AP_S3>
+        operator ^ (const ap_private<_AP_W3,_AP_S3>& a2) {
+      return ap_private<AP_MAX(_AP_WR,_AP_W3), _AP_S3>(get() ^ a2);
+    }
+
+    INLINE const ap_private<_AP_WR, false> get() const
+    {
+      ap_private<_AP_W1+_AP_W2, false> tmpVal = ap_private<_AP_W1+_AP_W2, false> (mbv1.get());
+      ap_private<_AP_W1+_AP_W2, false> tmpVal2 = ap_private<_AP_W1+_AP_W2, false> (mbv2.get());
+        int W_ref2 = mbv2.length();
+        tmpVal <<= W_ref2;
+        tmpVal |= tmpVal2;
+        return tmpVal;
+    }
+
+    INLINE const ap_private<_AP_WR, false> get() {
+      ap_private<_AP_W1+_AP_W2, false> tmpVal = ap_private<_AP_W1+_AP_W2, false> ( mbv1.get());
+      ap_private<_AP_W1+_AP_W2, false> tmpVal2 = ap_private<_AP_W1+_AP_W2, false> (mbv2.get());
+        int W_ref2 = mbv2.length();
+        tmpVal <<= W_ref2;
+        tmpVal |= tmpVal2;
+        return tmpVal;
+    }
+
+    template <int _AP_W3>
+    INLINE void set(const ap_private<_AP_W3,false> & val) {
+        ap_private<_AP_W1+_AP_W2, false> vval(val);
+        int W_ref1=mbv1.length();
+        int W_ref2=mbv2.length();
+        ap_private<_AP_W1,false> mask1(-1);
+        mask1>>=_AP_W1-W_ref1;
+        ap_private<_AP_W2,false> mask2(-1);
+        mask2>>=_AP_W2-W_ref2;
+        mbv1.set(ap_private<_AP_W1,false>((vval>>W_ref2)&mask1));
+        mbv2.set(ap_private<_AP_W2,false>(vval&mask2));
+    }
+
+    INLINE int length() const {
+        return mbv1.length()+mbv2.length();
+    }
+
+    INLINE std::string to_string(uint8_t radix=2) const {
+        return get().to_string(radix);
+    }
+};
+
+///Proxy class, which allows part selection to be used as rvalue(for reading) and
+//lvalue(for writing)
+
+///Range(slice)  reference
+//------------------------------------------------------------
+template<int _AP_W, bool _AP_S>
+struct ap_range_ref {
+#ifdef _MSC_VER
+    #pragma warning( disable : 4521 4522 )
+#endif /* #ifdef _MSC_VER */
+    ap_private<_AP_W,_AP_S> &d_bv;
+    int l_index;
+    int h_index;
+
+public:
+    INLINE ap_range_ref(const ap_range_ref<_AP_W, _AP_S>& ref):
+         d_bv(ref.d_bv), l_index(ref.l_index), h_index(ref.h_index) {}
+
+    INLINE ap_range_ref(ap_private<_AP_W,_AP_S>* bv, int h, int l):
+        d_bv(*bv),l_index(l),h_index(h) {
+        //if (h < l)
+	  //fprintf(stderr, "Warning! The bits selected will be returned in reverse order\n");
+    }
+
+    INLINE operator ap_private<_AP_W, false> () const {
+        ap_private<_AP_W, false> val(0);
+        if(h_index>=l_index) {
+	  if (_AP_W > 64) {
+            val=d_bv;
+            ap_private<_AP_W,false> mask(-1);
+            mask>>=_AP_W-(h_index-l_index+1);
+            val>>=l_index;
+            val&=mask;
+	  } else {
+	    const static uint64_t mask = (~0ULL>> (64>_AP_W ? (64-_AP_W):0));
+	    val = (d_bv >> l_index) & (mask >>(_AP_W-(h_index-l_index+1)));
+	  }
+        } else {
+            for(int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+                    if((d_bv)[j]) val.set(i);
+        }
+        return val;
+    }
+
+    INLINE operator unsigned long long () const {
+        return to_uint64();
+    }
+
+    template<int _AP_W2,bool _AP_S2>
+    INLINE ap_range_ref& operator =(const ap_private<_AP_W2,_AP_S2>& val) {
+        ap_private<_AP_W,false> vval=ap_private<_AP_W,false>(val);
+        if (l_index>h_index) {
+	        for (int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+	            (vval)[i]? d_bv.set(j):d_bv.clear(j);
+        } else {
+	        if (_AP_W > 64) {
+	          ap_private<_AP_W,false> mask(-1);
+	          if (l_index>0) {
+        	      mask<<=l_index;
+        	      vval<<=l_index;
+        	  }
+	  if(h_index<_AP_W-1)
+	    {
+	      ap_private<_AP_W,false> mask2(-1);
+	      mask2>>=_AP_W-h_index-1;
+	      mask&=mask2;
+	      vval&=mask2;
+	    }
+	  mask.flip();
+	  d_bv&=mask;
+	  d_bv|=vval;
+	} else {
+	  unsigned shift = 64-_AP_W;
+	  uint64_t mask = ~0ULL>>(shift);
+	  if(l_index>0)
+	    {
+	      vval = mask & vval << l_index;
+	      mask = mask & mask << l_index;
+	    }
+	  if(h_index<_AP_W-1)
+	    {
+	      uint64_t mask2 = mask;
+	      mask2 >>= (_AP_W-h_index-1);
+	      mask&=mask2;
+	      vval&=mask2;
+	    }
+	  mask = ~mask;
+	  d_bv&=mask;
+	  d_bv|=vval;
+	}
+      }
+      return *this;
+    }
+
+  INLINE ap_range_ref& operator = (unsigned long long val)
+    {
+        const ap_private<_AP_W,_AP_S> vval=val;
+        return operator = (vval);
+    }
+
+
+    INLINE ap_range_ref& operator =(const ap_range_ref<_AP_W, _AP_S>& val)
+    {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator =(tmpVal);
+    }
+
+
+
+    template<int _AP_W3, typename _AP_T3, int _AP_W4, typename _AP_T4>
+    INLINE ap_range_ref& operator =
+        (const ap_concat_ref <_AP_W3, _AP_T3, _AP_W4, _AP_T4>& val)
+    {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator = (tmpVal);
+    }
+
+    template <int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref& operator =(const ap_range_ref<_AP_W3,_AP_S3>& val)
+    {
+        const ap_private<_AP_W, false> tmpVal(val);
+        return operator =(tmpVal);
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_range_ref& operator= (const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+                                    _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=((const ap_private<_AP_W2, _AP_S2>)(val));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_range_ref& operator= (const ap_fixed_base<_AP_W2, _AP_I2, _AP_S2,
+                                    _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=(val.to_ap_private());
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_range_ref& operator= (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                                    _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=((unsigned long long)(bool)(val));
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_range_ref& operator= (const ap_bit_ref<_AP_W2, _AP_S2>& val) {
+        return operator=((unsigned long long)(bool)(val));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE
+    ap_concat_ref<_AP_W,ap_range_ref,_AP_W2,ap_range_ref<_AP_W2,_AP_S2> >
+    operator, (const ap_range_ref<_AP_W2,_AP_S2> &a2)
+    {
+        return
+            ap_concat_ref<_AP_W, ap_range_ref, _AP_W2,
+                   ap_range_ref<_AP_W2,_AP_S2> >(*this,
+                   const_cast<ap_range_ref<_AP_W2,_AP_S2>& >(a2));
+    }
+
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W,ap_range_ref,_AP_W2,ap_private<_AP_W2,_AP_S2> >
+    operator , (ap_private<_AP_W2,_AP_S2>& a2)
+    {
+        return
+            ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, ap_private<_AP_W2,_AP_S2> >(*this, a2);
+    }
+
+    INLINE ap_concat_ref<_AP_W,ap_range_ref,_AP_W,ap_private<_AP_W,_AP_S> >
+    operator , (ap_private<_AP_W, _AP_S>& a2)
+    {
+        return
+            ap_concat_ref<_AP_W, ap_range_ref, _AP_W,
+                          ap_private<_AP_W,_AP_S> >(*this, a2);
+    }
+
+
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE
+    ap_concat_ref<_AP_W,ap_range_ref,1,ap_bit_ref<_AP_W2,_AP_S2> >
+    operator, (const ap_bit_ref<_AP_W2,_AP_S2> &a2)
+    {
+        return
+            ap_concat_ref<_AP_W, ap_range_ref, 1,
+                      ap_bit_ref<_AP_W2,_AP_S2> >(*this, const_cast<ap_bit_ref<
+                      _AP_W2,_AP_S2>& >(a2));
+    }
+
+
+    template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE
+    ap_concat_ref<_AP_W, ap_range_ref, _AP_W2+_AP_W3, ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2)
+    {
+        return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2+_AP_W3,
+                       ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this,
+                       const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3,
+                       _AP_T3>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE
+    ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<_AP_W, ap_range_ref, _AP_W2, af_range_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+                const_cast<af_range_ref<_AP_W2,_AP_I2, _AP_S2,
+                _AP_Q2, _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE
+    ap_concat_ref<_AP_W, ap_range_ref, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<_AP_W, ap_range_ref, 1, af_bit_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+                const_cast<af_bit_ref<_AP_W2,_AP_I2, _AP_S2,
+                _AP_Q2, _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator == (const ap_range_ref<_AP_W2, _AP_S2>& op2)
+    {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs==rhs;
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator != (const ap_range_ref<_AP_W2, _AP_S2>& op2)
+    {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs!=rhs;
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator > (const ap_range_ref<_AP_W2, _AP_S2>& op2)
+    {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs>rhs;
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator >= (const ap_range_ref<_AP_W2, _AP_S2>& op2)
+    {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs>=rhs;
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator < (const ap_range_ref<_AP_W2, _AP_S2>& op2)
+    {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs<rhs;
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator <= (const ap_range_ref<_AP_W2, _AP_S2>& op2)
+    {
+        ap_private<_AP_W,false> lhs=get();
+        ap_private<_AP_W2,false> rhs=op2.get();
+        return lhs<=rhs;
+    }
+
+
+    template<int _AP_W2>
+    INLINE void set(const ap_private<_AP_W2,false>& val)
+    {
+        ap_private<_AP_W,_AP_S> vval=val;
+        if(l_index>h_index)
+        {
+            for(int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+                    (vval)[i]? d_bv.set(j):d_bv.clear(j);
+        } else {
+	  if (_AP_W>64 ) {
+            ap_private<_AP_W,_AP_S> mask(-1);
+            if(l_index>0)
+	      {
+                ap_private<_AP_W,false> mask1(-1);
+                mask1>>=_AP_W-l_index;
+                mask1.flip();
+                mask=mask1;
+                //vval&=mask1;
+                vval<<=l_index;
+	      }
+            if(h_index<_AP_W-1)
+	      {
+                ap_private<_AP_W,false> mask2(-1);
+                mask2<<=h_index+1;
+                mask2.flip();
+                mask&=mask2;
+                vval&=mask2;
+	      }
+            mask.flip();
+            d_bv&=mask;
+            d_bv|=vval;
+	  } else {
+	    uint64_t mask = ~0ULL >> (64-_AP_W);
+            if(l_index>0)
+	      {
+		uint64_t mask1 = mask;
+		mask1=mask & (mask1>>(_AP_W-l_index));
+		vval =mask&( vval <<l_index);
+		mask=~mask1&mask;
+		//vval&=mask1;
+	      }
+            if(h_index<_AP_W-1) {
+		uint64_t mask2 = ~0ULL >> (64-_AP_W);
+		mask2 = mask &(mask2<<(h_index+1));
+		mask&=~mask2;
+		vval&=~mask2;
+	      }
+            d_bv&=(~mask&(~0ULL >> (64-_AP_W)));
+            d_bv|=vval;
+	  }
+	}
+    }
+
+
+    INLINE ap_private<_AP_W,false> get() const
+    {
+      ap_private<_AP_W,false> val(0);
+      if(h_index<l_index) {
+	for(int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+	  if((d_bv)[j]) val.set(i);
+      } else {
+	val=d_bv;
+	val>>=l_index;
+	if(h_index<_AP_W-1)
+	  {
+	    if (_AP_W <= 64) {
+	      const static uint64_t mask = (~0ULL>> (64>_AP_W ? (64-_AP_W):0));
+	      val &=  (mask>> (_AP_W-(h_index-l_index+1)));
+	    } else {
+	      ap_private<_AP_W,false> mask(-1);
+	      mask>>=_AP_W-(h_index-l_index+1);
+	      val&=mask;
+	    }
+	  }
+      }
+      return val;
+    }
+
+
+  INLINE ap_private<_AP_W,false> get()
+  {
+    ap_private<_AP_W,false> val(0);
+    if(h_index<l_index) {
+      for(int i=0, j=l_index;j>=0&&j>=h_index;j--,i++)
+	if((d_bv)[j]) val.set(i);
+    } else {
+      val=d_bv;
+      val>>=l_index;
+      if(h_index<_AP_W-1)
+	{
+	  if (_AP_W <= 64 ) {
+	    static const uint64_t mask = ~0ULL>> (64>_AP_W ? (64-_AP_W):0);
+	    return val &= ( (mask) >> (_AP_W - (h_index-l_index+1)));
+	  } else {
+	    ap_private<_AP_W,false> mask(-1);
+	    mask>>=_AP_W-(h_index-l_index+1);
+	    val&=mask;
+	  }
+	}
+    }
+    return val;
+  }
+
+
+    INLINE int length() const
+    {
+        return h_index>=l_index?h_index-l_index+1:l_index-h_index+1;
+    }
+
+
+    INLINE int to_int() const
+    {
+        ap_private<_AP_W,false> val=get();
+        return val.to_int();
+    }
+
+
+    INLINE unsigned int to_uint() const
+    {
+        ap_private<_AP_W,false> val=get();
+        return val.to_uint();
+    }
+
+
+    INLINE long to_long() const
+    {
+        ap_private<_AP_W,false> val=get();
+        return val.to_long();
+    }
+
+
+    INLINE unsigned long to_ulong() const
+    {
+        ap_private<_AP_W,false> val=get();
+        return val.to_ulong();
+    }
+
+
+    INLINE ap_slong to_int64() const
+    {
+        ap_private<_AP_W,false> val=get();
+        return val.to_int64();
+    }
+
+
+    INLINE ap_ulong to_uint64() const
+    {
+        ap_private<_AP_W,false> val=get();
+        return val.to_uint64();
+    }
+
+    INLINE std::string to_string(uint8_t radix=2) const {
+        return get().to_string(radix);
+    }
+
+};
+
+///Proxy class, which allows bit selection to be used as rvalue(for reading) and
+//lvalue(for writing)
+
+///Bit reference
+//--------------------------------------------------------------
+template <int _AP_W, bool _AP_S>
+struct ap_bit_ref {
+#ifdef _MSC_VER
+#pragma warning( disable : 4521 4522 )
+#endif
+    ap_private<_AP_W,_AP_S>& d_bv;
+    int d_index;
+
+public:
+    INLINE ap_bit_ref(const ap_bit_ref<_AP_W, _AP_S>& ref):
+            d_bv(ref.d_bv), d_index(ref.d_index) {}
+
+    INLINE ap_bit_ref(ap_private<_AP_W,_AP_S>& bv, int index=0):
+        d_bv(bv),d_index(index)
+    {
+#ifdef _AP_DEBUG_
+        assert(d_index<_AP_W&&"index out of bound");
+#endif
+    }
+
+
+    INLINE operator bool () const
+    {
+      return d_bv.get_bit(d_index);
+    }
+
+
+    INLINE bool to_bool() const
+    {
+        return operator bool ();
+    }
+
+
+    INLINE ap_bit_ref& operator = (unsigned long long val)
+    {
+        if(val)
+            d_bv.set(d_index);
+        else
+            d_bv.clear(d_index);
+        return *this;
+    }
+
+
+#if 0
+    INLINE ap_bit_ref& operator = (bool val)
+    {
+        if(val)
+            d_bv.set(d_index);
+        else
+            d_bv.clear(d_index);
+        return *this;
+    }
+#endif
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref& operator =(const ap_private<_AP_W2,_AP_S2>& val)
+    {
+        return operator =((unsigned long long)(val != 0));
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref& operator =(const ap_bit_ref<_AP_W2,_AP_S2>& val)
+    {
+        return operator =((unsigned long long)(bool)val);
+    }
+
+    INLINE ap_bit_ref& operator =(const ap_bit_ref<_AP_W,_AP_S>& val)
+    {
+        return operator =((unsigned long long)(bool)val);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref& operator =(const ap_range_ref<_AP_W2,_AP_S2>&  val)
+    {
+        return operator =((unsigned long long)(bool) val);
+    }
+
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_bit_ref& operator= (const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+                                    _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=((const ap_private<_AP_W2, false>)(val));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+             ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_bit_ref& operator= (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                                    _AP_Q2, _AP_O2, _AP_N2>& val) {
+        return operator=((unsigned long long)(bool)(val));
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_bit_ref& operator= (const ap_concat_ref<_AP_W2, _AP_T3, _AP_W3, _AP_T3>& val) {
+        return operator=((const ap_private<_AP_W2 + _AP_W3, false>)(val));
+    }
+
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_private<_AP_W2,_AP_S2> >
+    operator , (ap_private<_AP_W2, _AP_S2>& a2)
+     {
+        return ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_private<_AP_W2,_AP_S2> >(*this, a2);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_range_ref<_AP_W2,_AP_S2> >
+    operator, (const ap_range_ref<_AP_W2, _AP_S2> &a2)
+    {
+        return
+            ap_concat_ref<1, ap_bit_ref, _AP_W2, ap_range_ref<_AP_W2,_AP_S2> >(*this,
+                    const_cast<ap_range_ref<_AP_W2, _AP_S2> &>(a2));
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<1, ap_bit_ref, 1, ap_bit_ref<_AP_W2,_AP_S2> >
+    operator, (const ap_bit_ref<_AP_W2, _AP_S2> &a2)
+    {
+        return
+            ap_concat_ref<1, ap_bit_ref, 1, ap_bit_ref<_AP_W2,_AP_S2> >(*this,
+                    const_cast<ap_bit_ref<_AP_W2, _AP_S2> &>(a2));
+    }
+
+
+    INLINE ap_concat_ref<1, ap_bit_ref, 1, ap_bit_ref >
+    operator, (const ap_bit_ref &a2)
+    {
+        return
+            ap_concat_ref<1, ap_bit_ref, 1, ap_bit_ref >(*this,
+                const_cast<ap_bit_ref&>(a2));
+    }
+
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_concat_ref<1, ap_bit_ref, _AP_W2+_AP_W3, ap_concat_ref<_AP_W2,_AP_T2,_AP_W3,_AP_T3> >
+    operator, (const ap_concat_ref<_AP_W2,_AP_T2,_AP_W3,_AP_T3> &a2)
+    {
+        return
+            ap_concat_ref<1,ap_bit_ref,_AP_W2+_AP_W3,
+                    ap_concat_ref<_AP_W2,_AP_T2,_AP_W3,_AP_T3> >(*this,
+                    const_cast<ap_concat_ref<_AP_W2,_AP_T2,_AP_W3,_AP_T3>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE
+    ap_concat_ref<1, ap_bit_ref, _AP_W2, af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<1, ap_bit_ref, _AP_W2, af_range_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+                const_cast<af_range_ref<_AP_W2,_AP_I2, _AP_S2, _AP_Q2,
+                _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE
+    ap_concat_ref<1, ap_bit_ref, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<1, ap_bit_ref, 1, af_bit_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this,
+                const_cast<af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                _AP_Q2, _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator == (const ap_bit_ref<_AP_W2, _AP_S2>& op) {
+        return get() == op.get();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator != (const ap_bit_ref<_AP_W2, _AP_S2>& op) {
+        return get() != op.get();
+    }
+
+
+    INLINE bool get() const
+    {
+        return operator bool ();
+    }
+
+
+    INLINE bool get()
+    {
+        return operator bool ();
+    }
+
+
+    template <int _AP_W3>
+    INLINE void set(const ap_private<_AP_W3, false>& val)
+    {
+        operator = (val);
+    }
+
+    INLINE bool operator ~ () const {
+        bool bit = (d_bv)[d_index];
+        return bit ? false : true;
+    }
+
+    INLINE int length() const { return 1; }
+
+    INLINE std::string to_string() const {
+        bool val = get();
+        return val ? "1" : "0";
+    }
+};
+
+/// Operators mixing Integers with AP_Int
+// ----------------------------------------------------------------
+#if 1
+#define OP_BIN_MIX_INT(BIN_OP, C_TYPE, _AP_WI, _AP_SI, RTYPE)  \
+  template<int _AP_W, bool _AP_S>					\
+  INLINE typename ap_private<_AP_WI,_AP_SI>::template RType<_AP_W,_AP_S>::RTYPE \
+  operator BIN_OP ( C_TYPE i_op, const ap_private<_AP_W,_AP_S> &op) {  \
+    return ap_private<_AP_WI,_AP_SI>(i_op).operator BIN_OP (op);	\
+  }									\
+  template<int _AP_W, bool _AP_S>					\
+  INLINE typename ap_private<_AP_W,_AP_S>::template RType<_AP_WI,_AP_SI>::RTYPE \
+  operator BIN_OP ( const ap_private<_AP_W,_AP_S> &op, C_TYPE i_op) {	\
+    return op.operator BIN_OP (ap_private<_AP_WI,_AP_SI>(i_op));	\
+  }
+#else
+#define OP_BIN_MIX_INT(BIN_OP, C_TYPE, _AP_WI, _AP_SI, RTYPE)  \
+    template<int _AP_W, bool _AP_S> \
+    INLINE typename ap_private<_AP_WI,_AP_SI>::template RType<_AP_W,_AP_S>::RTYPE \
+    operator BIN_OP ( C_TYPE i_op, const ap_private<_AP_W,_AP_S> &op) {  \
+        return ap_private<_AP_WI,_AP_SI>(i_op).operator BIN_OP (op);  \
+      } \
+    template<int _AP_W, bool _AP_S>   \
+    INLINE typename ap_private<_AP_W,_AP_S>::template RType<_AP_WI,_AP_SI>::RTYPE \
+      operator BIN_OP ( const ap_private<_AP_W,_AP_S> &op, C_TYPE i_op) {  \
+        return op.operator BIN_OP (ap_private<_AP_WI,_AP_SI>(i_op));  \
+    }
+#endif
+#define OP_REL_MIX_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)  \
+  template<int _AP_W, bool _AP_S>					\
+  INLINE bool operator REL_OP ( const ap_private<_AP_W,_AP_S> &op, C_TYPE op2) { \
+    return op.operator REL_OP (ap_private<_AP_W2, _AP_S2>(op2));	\
+  }									\
+  template<int _AP_W, bool _AP_S>					\
+  INLINE bool operator REL_OP ( C_TYPE op2, const ap_private<_AP_W,_AP_S> &op) { \
+    return ap_private<_AP_W2,_AP_S2>(op2).operator REL_OP (op);		\
+  }
+#define OP_ASSIGN_MIX_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)		\
+  template<int _AP_W, bool _AP_S>					\
+  INLINE ap_private<_AP_W,_AP_S> &operator ASSIGN_OP ( ap_private<_AP_W,_AP_S> &op, C_TYPE op2) { \
+    return op.operator ASSIGN_OP (ap_private<_AP_W2,_AP_S2>(op2));	\
+  }
+
+#define OP_BIN_SHIFT_INT(BIN_OP, C_TYPE, _AP_WI, _AP_SI, RTYPE)		\
+  template<int _AP_W, bool _AP_S>					\
+  C_TYPE  operator BIN_OP ( C_TYPE i_op, const ap_private<_AP_W,_AP_S> &op) { \
+    return i_op BIN_OP (op.getVal());					\
+  }									\
+  template<int _AP_W, bool _AP_S>					\
+  INLINE typename ap_private<_AP_W,_AP_S>::template RType<_AP_WI,_AP_SI>::RTYPE \
+  operator BIN_OP ( const ap_private<_AP_W,_AP_S> &op, C_TYPE i_op) {	\
+    return op.operator BIN_OP (i_op);		\
+  }
+#define OP_ASSIGN_RSHIFT_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)		\
+  template<int _AP_W, bool _AP_S>					\
+  INLINE  ap_private<_AP_W,_AP_S> &operator ASSIGN_OP ( ap_private<_AP_W,_AP_S> &op, C_TYPE op2) { \
+    op = op.operator >> (op2);					\
+    return op;						\
+  }
+#define OP_ASSIGN_LSHIFT_INT(ASSIGN_OP, C_TYPE, _AP_W2, _AP_S2)		\
+  template<int _AP_W, bool _AP_S>					\
+  INLINE  ap_private<_AP_W,_AP_S> &operator ASSIGN_OP ( ap_private<_AP_W,_AP_S> &op, C_TYPE op2) { \
+    op = op.operator << (op2);					\
+    return op;						\
+  }
+
+#define OPS_MIX_INT(C_TYPE, WI, SI)		\
+  OP_BIN_MIX_INT(*, C_TYPE, WI, SI, mult)	\
+    OP_BIN_MIX_INT(+, C_TYPE, WI, SI, plus)	\
+    OP_BIN_MIX_INT(-, C_TYPE, WI, SI, minus)	\
+    OP_BIN_MIX_INT(/, C_TYPE, WI, SI, div)	\
+    OP_BIN_MIX_INT(%, C_TYPE, WI, SI, mod)	\
+    OP_BIN_MIX_INT(&, C_TYPE, WI, SI, logic)	\
+    OP_BIN_MIX_INT(|, C_TYPE, WI, SI, logic)	\
+    OP_BIN_MIX_INT(^, C_TYPE, WI, SI, logic)	\
+    OP_BIN_SHIFT_INT(>>, C_TYPE, WI, SI, arg1)	\
+    OP_BIN_SHIFT_INT(<<, C_TYPE, WI, SI, arg1)	\
+						\
+    OP_REL_MIX_INT(==, C_TYPE, WI, SI)		\
+    OP_REL_MIX_INT(!=, C_TYPE, WI, SI)		\
+    OP_REL_MIX_INT(>, C_TYPE, WI, SI)		\
+    OP_REL_MIX_INT(>=, C_TYPE, WI, SI)		\
+    OP_REL_MIX_INT(<, C_TYPE, WI, SI)		\
+    OP_REL_MIX_INT(<=, C_TYPE, WI, SI)		\
+						\
+    OP_ASSIGN_MIX_INT(+=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(-=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(*=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(/=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(%=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(&=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(|=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_MIX_INT(^=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_RSHIFT_INT(>>=, C_TYPE, WI, SI)	\
+    OP_ASSIGN_LSHIFT_INT(<<=, C_TYPE, WI, SI)
+
+
+OPS_MIX_INT(bool, 1, false)
+OPS_MIX_INT(char, 8, true)
+OPS_MIX_INT(signed char, 8, true)
+OPS_MIX_INT(unsigned char, 8, false)
+OPS_MIX_INT(short, 16, true)
+OPS_MIX_INT(unsigned short, 16, false)
+OPS_MIX_INT(int, 32, true)
+OPS_MIX_INT(unsigned int, 32, false)
+# if defined __x86_64__
+OPS_MIX_INT(long, 64, true)
+OPS_MIX_INT(unsigned long, 64, false)
+# else
+OPS_MIX_INT(long, 32, true)
+OPS_MIX_INT(unsigned long, 32, false)
+# endif
+OPS_MIX_INT(ap_slong, 64, true)
+OPS_MIX_INT(ap_ulong, 64, false)
+
+#define OP_BIN_MIX_RANGE(BIN_OP, RTYPE) \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE typename ap_private<_AP_W1,_AP_S1>::template RType<_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP ( const ap_range_ref<_AP_W1,_AP_S1>& op1, const ap_private<_AP_W2,_AP_S2>& op2) { \
+      return ap_private<_AP_W1, false>(op1).operator BIN_OP (op2); \
+  } \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE typename ap_private<_AP_W1,_AP_S1>::template RType<_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP ( const ap_private<_AP_W1,_AP_S1>& op1, const ap_range_ref<_AP_W2,_AP_S2>& op2) { \
+      return op1.operator BIN_OP (ap_private<_AP_W2, false>(op2)); \
+  }
+
+#define OP_REL_MIX_RANGE(REL_OP) \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>   \
+  INLINE bool operator REL_OP ( const ap_range_ref<_AP_W1,_AP_S1>& op1, const ap_private<_AP_W2,_AP_S2>& op2) { \
+    return ap_private<_AP_W1,false>(op1).operator REL_OP (op2); \
+  } \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const ap_private<_AP_W1,_AP_S1>& op1, const ap_range_ref<_AP_W2,_AP_S2>& op2) { \
+    return op1.operator REL_OP (op2.operator ap_private<_AP_W2, false>()); \
+  }
+
+#define OP_ASSIGN_MIX_RANGE(ASSIGN_OP) \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE ap_private<_AP_W1,_AP_S1>& operator ASSIGN_OP ( ap_private<_AP_W1,_AP_S1>& op1, const ap_range_ref<_AP_W2,_AP_S2>& op2) { \
+    return op1.operator ASSIGN_OP (ap_private<_AP_W2, false>(op2)); \
+  } \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE ap_range_ref<_AP_W1,_AP_S1>& operator ASSIGN_OP (ap_range_ref<_AP_W1,_AP_S1>& op1, ap_private<_AP_W2,_AP_S2>& op2) { \
+    ap_private<_AP_W1, false> tmp(op1); \
+    tmp.operator ASSIGN_OP (op2); \
+    op1 = tmp; \
+    return op1; \
+  }
+
+
+OP_ASSIGN_MIX_RANGE(+=)
+OP_ASSIGN_MIX_RANGE(-=)
+OP_ASSIGN_MIX_RANGE(*=)
+OP_ASSIGN_MIX_RANGE(/=)
+OP_ASSIGN_MIX_RANGE(%=)
+OP_ASSIGN_MIX_RANGE(>>=)
+OP_ASSIGN_MIX_RANGE(<<=)
+OP_ASSIGN_MIX_RANGE(&=)
+OP_ASSIGN_MIX_RANGE(|=)
+OP_ASSIGN_MIX_RANGE(^=)
+
+OP_REL_MIX_RANGE(==)
+OP_REL_MIX_RANGE(!=)
+OP_REL_MIX_RANGE(>)
+OP_REL_MIX_RANGE(>=)
+OP_REL_MIX_RANGE(<)
+OP_REL_MIX_RANGE(<=)
+
+OP_BIN_MIX_RANGE(+, plus)
+OP_BIN_MIX_RANGE(-, minus)
+OP_BIN_MIX_RANGE(*, mult)
+OP_BIN_MIX_RANGE(/, div)
+OP_BIN_MIX_RANGE(%, mod)
+OP_BIN_MIX_RANGE(>>, arg1)
+OP_BIN_MIX_RANGE(<<, arg1)
+OP_BIN_MIX_RANGE(&, logic)
+OP_BIN_MIX_RANGE(|, logic)
+OP_BIN_MIX_RANGE(^, logic)
+
+#define OP_BIN_MIX_BIT(BIN_OP, RTYPE) \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE typename ap_private<1, false>::template RType<_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP ( const ap_bit_ref<_AP_W1,_AP_S1>& op1, const ap_private<_AP_W2,_AP_S2>& op2) { \
+      return ap_private<1, false>(op1).operator BIN_OP (op2); \
+  } \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE typename ap_private<_AP_W1,_AP_S1>::template RType<1,false>::RTYPE \
+  operator BIN_OP ( const ap_private<_AP_W1,_AP_S1>& op1, const ap_bit_ref<_AP_W2,_AP_S2>& op2) { \
+      return op1.operator BIN_OP (ap_private<1, false>(op2)); \
+  }
+
+#define OP_REL_MIX_BIT(REL_OP) \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2>   \
+  INLINE bool operator REL_OP ( const ap_bit_ref<_AP_W1,_AP_S1>& op1, const ap_private<_AP_W2,_AP_S2>& op2) { \
+    return ap_private<_AP_W1,false>(op1).operator REL_OP (op2); \
+  } \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE bool operator REL_OP ( const ap_private<_AP_W1,_AP_S1>& op1, const ap_bit_ref<_AP_W2,_AP_S2>& op2) { \
+    return op1.operator REL_OP (ap_private<1, false>(op2)); \
+  }
+
+#define OP_ASSIGN_MIX_BIT(ASSIGN_OP) \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE ap_private<_AP_W1,_AP_S1>& operator ASSIGN_OP ( ap_private<_AP_W1,_AP_S1>& op1, ap_bit_ref<_AP_W2,_AP_S2>& op2) { \
+    return op1.operator ASSIGN_OP (ap_private<1, false>(op2)); \
+  } \
+  template<int _AP_W1, bool _AP_S1, int _AP_W2, bool _AP_S2> \
+  INLINE ap_bit_ref<_AP_W1,_AP_S1>& operator ASSIGN_OP ( ap_bit_ref<_AP_W1,_AP_S1>& op1, ap_private<_AP_W2,_AP_S2>& op2) { \
+    ap_private<1, false> tmp(op1); \
+    tmp.operator ASSIGN_OP (op2); \
+    op1 = tmp; \
+    return op1; \
+  }
+
+
+OP_ASSIGN_MIX_BIT(+=)
+OP_ASSIGN_MIX_BIT(-=)
+OP_ASSIGN_MIX_BIT(*=)
+OP_ASSIGN_MIX_BIT(/=)
+OP_ASSIGN_MIX_BIT(%=)
+OP_ASSIGN_MIX_BIT(>>=)
+OP_ASSIGN_MIX_BIT(<<=)
+OP_ASSIGN_MIX_BIT(&=)
+OP_ASSIGN_MIX_BIT(|=)
+OP_ASSIGN_MIX_BIT(^=)
+
+OP_REL_MIX_BIT(==)
+OP_REL_MIX_BIT(!=)
+OP_REL_MIX_BIT(>)
+OP_REL_MIX_BIT(>=)
+OP_REL_MIX_BIT(<)
+OP_REL_MIX_BIT(<=)
+
+OP_BIN_MIX_BIT(+, plus)
+OP_BIN_MIX_BIT(-, minus)
+OP_BIN_MIX_BIT(*, mult)
+OP_BIN_MIX_BIT(/, div)
+OP_BIN_MIX_BIT(%, mod)
+OP_BIN_MIX_BIT(>>, arg1)
+OP_BIN_MIX_BIT(<<, arg1)
+OP_BIN_MIX_BIT(&, logic)
+OP_BIN_MIX_BIT(|, logic)
+OP_BIN_MIX_BIT(^, logic)
+
+#define REF_REL_OP_MIX_INT(REL_OP, C_TYPE, _AP_W2, _AP_S2)  \
+  template<int _AP_W, bool _AP_S>   \
+  INLINE bool operator REL_OP ( const ap_range_ref<_AP_W,_AP_S> &op, C_TYPE op2) {  \
+    return (ap_private<_AP_W, false>(op)).operator REL_OP (ap_private<_AP_W2,_AP_S2>(op2));  \
+  }  \
+  template<int _AP_W, bool _AP_S> \
+  INLINE bool operator REL_OP ( C_TYPE op2, const ap_range_ref<_AP_W,_AP_S> &op) {  \
+    return ap_private<_AP_W2,_AP_S2>(op2).operator REL_OP (ap_private<_AP_W, false>(op));  \
+  } \
+  template<int _AP_W, bool _AP_S>   \
+  INLINE bool operator REL_OP ( const ap_bit_ref<_AP_W,_AP_S> &op, C_TYPE op2) {  \
+    return (bool(op)) REL_OP op2;  \
+  }  \
+  template<int _AP_W, bool _AP_S> \
+  INLINE bool operator REL_OP ( C_TYPE op2, const ap_bit_ref<_AP_W,_AP_S> &op) {  \
+    return op2 REL_OP (bool(op));  \
+  } \
+  template<int _AP_W, typename _AP_T, int _AP_W1, typename _AP_T1>   \
+  INLINE bool operator REL_OP ( const ap_concat_ref<_AP_W,_AP_T, _AP_W1, _AP_T1> &op, C_TYPE op2) {  \
+    return (ap_private<_AP_W + _AP_W1, false>(op)).operator REL_OP (ap_private<_AP_W2,_AP_S2>(op2));  \
+  }  \
+  template<int _AP_W, typename _AP_T, int _AP_W1, typename _AP_T1>   \
+  INLINE bool operator REL_OP ( C_TYPE op2, const ap_concat_ref<_AP_W,_AP_T, _AP_W1, _AP_T1> &op) {  \
+    return ap_private<_AP_W2,_AP_S2>(op2).operator REL_OP (ap_private<_AP_W + _AP_W1, false>(op));  \
+  }
+
+#define REF_REL_MIX_INT(C_TYPE, _AP_WI, _AP_SI) \
+REF_REL_OP_MIX_INT(>, C_TYPE, _AP_WI, _AP_SI) \
+REF_REL_OP_MIX_INT(<, C_TYPE, _AP_WI, _AP_SI) \
+REF_REL_OP_MIX_INT(>=, C_TYPE, _AP_WI, _AP_SI) \
+REF_REL_OP_MIX_INT(<=, C_TYPE, _AP_WI, _AP_SI) \
+REF_REL_OP_MIX_INT(==, C_TYPE, _AP_WI, _AP_SI) \
+REF_REL_OP_MIX_INT(!=, C_TYPE, _AP_WI, _AP_SI)
+
+REF_REL_MIX_INT(bool, 1, false)
+REF_REL_MIX_INT(char, 8, true)
+REF_REL_MIX_INT(signed char, 8, true)
+REF_REL_MIX_INT(unsigned char, 8, false)
+REF_REL_MIX_INT(short, 16, true)
+REF_REL_MIX_INT(unsigned short, 16, false)
+REF_REL_MIX_INT(int, 32, true)
+REF_REL_MIX_INT(unsigned int, 32, false)
+# if defined __x86_64__
+REF_REL_MIX_INT(long, 64, true)
+REF_REL_MIX_INT(unsigned long, 64, false)
+# else
+REF_REL_MIX_INT(long, 32, true)
+REF_REL_MIX_INT(unsigned long, 32, false)
+# endif
+REF_REL_MIX_INT(ap_slong, 64, true)
+REF_REL_MIX_INT(ap_ulong, 64, false)
+
+#define REF_BIN_OP_MIX_INT(BIN_OP, RTYPE, C_TYPE, _AP_W2, _AP_S2)  \
+  template<int _AP_W, bool _AP_S>   \
+  INLINE typename ap_private<_AP_W, false>::template RType<_AP_W2,_AP_S2>::RTYPE \
+  operator BIN_OP ( const ap_range_ref<_AP_W,_AP_S> &op, C_TYPE op2) {  \
+    return (ap_private<_AP_W, false>(op)).operator BIN_OP (ap_private<_AP_W2,_AP_S2>(op2));  \
+  }  \
+  template<int _AP_W, bool _AP_S> \
+  INLINE typename ap_private<_AP_W2, _AP_S2>::template RType<_AP_W,false>::RTYPE \
+  operator BIN_OP ( C_TYPE op2, const ap_range_ref<_AP_W,_AP_S> &op) {  \
+    return ap_private<_AP_W2,_AP_S2>(op2).operator BIN_OP (ap_private<_AP_W, false>(op));  \
+  }
+
+#define REF_BIN_MIX_INT(C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(+, plus, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(-, minus, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(*, mult, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(/, div, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(%, mod, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(>>, arg1, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(<<, arg1, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(&, logic, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(|, logic, C_TYPE, _AP_WI, _AP_SI) \
+REF_BIN_OP_MIX_INT(^, logic, C_TYPE, _AP_WI, _AP_SI)
+
+REF_BIN_MIX_INT(bool, 1, false)
+REF_BIN_MIX_INT(char, 8, true)
+REF_BIN_MIX_INT(signed char, 8, true)
+REF_BIN_MIX_INT(unsigned char, 8, false)
+REF_BIN_MIX_INT(short, 16, true)
+REF_BIN_MIX_INT(unsigned short, 16, false)
+REF_BIN_MIX_INT(int, 32, true)
+REF_BIN_MIX_INT(unsigned int, 32, false)
+# if defined __x86_64__
+REF_BIN_MIX_INT(long, 64, true)
+REF_BIN_MIX_INT(unsigned long, 64, false)
+#else
+REF_BIN_MIX_INT(long, 32, true)
+REF_BIN_MIX_INT(unsigned long, 32, false)
+#endif
+REF_BIN_MIX_INT(ap_slong, 64, true)
+REF_BIN_MIX_INT(ap_ulong, 64, false)
+
+#define REF_BIN_OP(BIN_OP, RTYPE) \
+template<int _AP_W, bool _AP_S, int _AP_W2, bool _AP_S2> \
+INLINE typename ap_private<_AP_W, false>::template RType<_AP_W2, false>::RTYPE \
+operator BIN_OP (const ap_range_ref<_AP_W,_AP_S> &lhs, const ap_range_ref<_AP_W2,_AP_S2> &rhs) {  \
+  return ap_private<_AP_W,false>(lhs).operator BIN_OP (ap_private<_AP_W2, false>(rhs));  \
+}
+
+REF_BIN_OP(+, plus)
+REF_BIN_OP(-, minus)
+REF_BIN_OP(*, mult)
+REF_BIN_OP(/, div)
+REF_BIN_OP(%, mod)
+REF_BIN_OP(>>, arg1)
+REF_BIN_OP(<<, arg1)
+REF_BIN_OP(&, logic)
+REF_BIN_OP(|, logic)
+REF_BIN_OP(^, logic)
+
+#if 1
+#define CONCAT_OP_MIX_INT(C_TYPE, _AP_WI, _AP_SI) \
+template<int _AP_W, bool _AP_S> \
+INLINE \
+ap_private< _AP_W +  _AP_WI, false > \
+  operator, (const ap_private<_AP_W, _AP_S> &op1, C_TYPE op2) { \
+  ap_private<_AP_WI + _AP_W, false> val(op2); \
+  ap_private<_AP_WI + _AP_W, false> ret(op1); \
+  ret <<= _AP_WI; \
+  if (_AP_SI) { \
+      val <<= _AP_W; val >>= _AP_W; \
+  }\
+  ret |= val; \
+  return ret;\
+} \
+template<int _AP_W, bool _AP_S> \
+INLINE \
+ap_private< _AP_W +  _AP_WI, false > \
+  operator, (C_TYPE op1, const ap_private<_AP_W, _AP_S>& op2)  { \
+  ap_private<_AP_WI + _AP_W, false> val(op1); \
+  ap_private<_AP_WI + _AP_W, false> ret(op2); \
+  if (_AP_S) { \
+     ret <<= _AP_WI; ret >>= _AP_WI; \
+  } \
+  ret |= val << _AP_W; \
+  return ret; \
+} \
+template<int _AP_W, bool _AP_S> \
+INLINE \
+ap_private< _AP_W +  _AP_WI, false > \
+  operator, (const ap_range_ref<_AP_W, _AP_S> &op1, C_TYPE op2) { \
+  ap_private<_AP_WI + _AP_W, false> val(op2); \
+  ap_private<_AP_WI + _AP_W, false> ret(op1); \
+  ret <<= _AP_WI; \
+  if (_AP_SI) { \
+      val <<= _AP_W; val >>= _AP_W; \
+  } \
+  ret |= val; \
+  return ret; \
+} \
+template<int _AP_W, bool _AP_S> \
+INLINE \
+ap_private< _AP_W +  _AP_WI, false > \
+  operator, (C_TYPE op1, const ap_range_ref<_AP_W, _AP_S> &op2)  { \
+  ap_private<_AP_WI + _AP_W, false> val(op1); \
+  ap_private<_AP_WI + _AP_W, false> ret(op2); \
+  int len = op2.length(); \
+  val <<= len; \
+  ret |= val; \
+  return ret; \
+} \
+template<int _AP_W, bool _AP_S> \
+INLINE \
+ap_private<_AP_WI + 1, false > \
+  operator, (const ap_bit_ref<_AP_W, _AP_S> &op1, C_TYPE op2) { \
+  ap_private<_AP_WI + 1, false> val(op2); \
+  val[_AP_WI] = op1; \
+  return val; \
+} \
+template<int _AP_W, bool _AP_S> \
+INLINE \
+ap_private<_AP_WI + 1, false > \
+  operator, (C_TYPE op1, const ap_bit_ref<_AP_W, _AP_S> &op2)  { \
+  ap_private<_AP_WI + 1, false> val(op1); \
+  val <<= 1; \
+  val[0] = op2; \
+  return val; \
+} \
+template<int _AP_W, typename _AP_T, int _AP_W2, typename _AP_T2> \
+INLINE \
+ap_private<_AP_W + _AP_W2 + _AP_WI, false > \
+   operator, (const ap_concat_ref<_AP_W, _AP_T, _AP_W2, _AP_T2> &op1, C_TYPE op2) {\
+   ap_private<_AP_WI + _AP_W + _AP_W2, _AP_SI> val(op2);\
+   ap_private<_AP_WI + _AP_W + _AP_W2, _AP_SI> ret(op1);\
+   if (_AP_SI) { \
+       val <<= _AP_W + _AP_W2; val >>= _AP_W + _AP_W2; \
+   } \
+   ret <<= _AP_WI; \
+   ret |= val; \
+   return ret; \
+}\
+template<int _AP_W, typename _AP_T, int _AP_W2, typename _AP_T2> \
+INLINE \
+ap_private<_AP_W + _AP_W2 + _AP_WI, false > \
+   operator, (C_TYPE op1, const ap_concat_ref<_AP_W, _AP_T, _AP_W2, _AP_T2> &op2) {\
+   ap_private<_AP_WI + _AP_W + _AP_W2, _AP_SI> val(op1);\
+   ap_private<_AP_WI + _AP_W + _AP_W2, _AP_SI> ret(op2);\
+   int len = op2.length(); \
+   val <<= len; \
+   ret |= val;\
+   return ret; \
+}\
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, \
+         int _AP_N > \
+INLINE \
+ap_private< _AP_W + _AP_WI, false > \
+  operator, (const af_range_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, _AP_N> &op1, C_TYPE op2) { \
+  ap_private<_AP_WI + _AP_W, false> val(op2); \
+  ap_private<_AP_WI + _AP_W, false> ret(op1); \
+  if (_AP_SI) { \
+      val <<= _AP_W; val >>= _AP_W; \
+  }\
+  ret <<= _AP_WI; \
+  ret |= val; \
+  return ret; \
+} \
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, \
+         int _AP_N > \
+INLINE \
+ap_private< _AP_W + _AP_WI, false > \
+  operator, (C_TYPE op1, const af_range_ref<_AP_W, _AP_I, _AP_S, \
+             _AP_Q, _AP_O, _AP_N> &op2) { \
+  ap_private<_AP_WI + _AP_W, false> val(op1); \
+  ap_private<_AP_WI + _AP_W, false> ret(op2); \
+  int len = op2.length(); \
+  val <<= len; \
+  ret |= val; \
+  return ret; \
+} \
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, \
+         int _AP_N > \
+INLINE \
+ap_private< 1 + _AP_WI, false> \
+  operator, (const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q, _AP_O, \
+             _AP_N> &op1, C_TYPE op2) { \
+  ap_private<_AP_WI + 1, _AP_SI> val(op2); \
+  val[_AP_WI] = op1; \
+  return val; \
+} \
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q, ap_o_mode _AP_O, \
+         int _AP_N > \
+INLINE \
+ap_private< 1 + _AP_WI, false> \
+  operator, (C_TYPE op1, const af_bit_ref<_AP_W, _AP_I, _AP_S, _AP_Q,\
+        _AP_O, _AP_N> &op2) { \
+  ap_private<_AP_WI + 1, _AP_SI> val(op1); \
+  val <<= 1; \
+  val[0] = op2; \
+  return val; \
+}
+
+CONCAT_OP_MIX_INT(bool, 1, false)
+CONCAT_OP_MIX_INT(char, 8, true)
+CONCAT_OP_MIX_INT(signed char, 8, true)
+CONCAT_OP_MIX_INT(unsigned char, 8, false)
+CONCAT_OP_MIX_INT(short, 16, true)
+CONCAT_OP_MIX_INT(unsigned short, 16, false)
+CONCAT_OP_MIX_INT(int, 32, true)
+CONCAT_OP_MIX_INT(unsigned int, 32, false)
+# if defined __x86_64__
+CONCAT_OP_MIX_INT(long, 64, true)
+CONCAT_OP_MIX_INT(unsigned long, 64, false)
+# else
+CONCAT_OP_MIX_INT(long, 32, true)
+CONCAT_OP_MIX_INT(unsigned long, 32, false)
+# endif
+CONCAT_OP_MIX_INT(ap_slong, 64, true)
+CONCAT_OP_MIX_INT(ap_ulong, 64, false)
+#endif
+
+#if 1
+#define CONCAT_SHIFT_MIX_INT(C_TYPE, op) \
+template<int _AP_W, typename _AP_T, int _AP_W1, typename _AP_T1>   \
+INLINE ap_uint<_AP_W+_AP_W1> operator op (const ap_concat_ref<_AP_W, _AP_T, _AP_W1, _AP_T1> lhs, C_TYPE rhs) { \
+  return ((ap_uint<_AP_W+_AP_W1>)lhs.get()) op ((int)rhs); \
+}
+
+CONCAT_SHIFT_MIX_INT(long, <<)
+CONCAT_SHIFT_MIX_INT(unsigned long, <<)
+CONCAT_SHIFT_MIX_INT(unsigned int, <<)
+CONCAT_SHIFT_MIX_INT(ap_ulong, <<)
+CONCAT_SHIFT_MIX_INT(ap_slong, <<)
+CONCAT_SHIFT_MIX_INT(long, >>)
+CONCAT_SHIFT_MIX_INT(unsigned long, >>)
+CONCAT_SHIFT_MIX_INT(unsigned int, >>)
+CONCAT_SHIFT_MIX_INT(ap_ulong, >>)
+CONCAT_SHIFT_MIX_INT(ap_slong, >>)
+#endif
+
+#if defined(SYSTEMC_H) || defined(SYSTEMC_INCLUDED)
+template<int _AP_W, bool _AP_S>
+INLINE void sc_trace(sc_core::sc_trace_file *tf, const ap_private<_AP_W, _AP_S> &op,
+                     const std::string &name) {
+    if (tf)
+        tf->trace(sc_dt::sc_lv<_AP_W>(op.to_string(2).c_str()), name);
+}
+#endif
+
+template<int _AP_W, bool _AP_S>
+INLINE std::ostream& operator<<(std::ostream& out, const ap_private<_AP_W,_AP_S> &op)
+{
+  ap_private<_AP_W, _AP_S> v=op;
+    const std::ios_base::fmtflags basefield = out.flags() & std::ios_base::basefield;
+    unsigned radix = (basefield == std::ios_base::hex) ? 16 :
+                     ((basefield == std::ios_base::oct) ? 8 : 10);
+    std::string str=v.toString(radix,_AP_S);
+    out<<str;
+    return out;
+
+}
+
+template<int _AP_W, bool _AP_S>
+INLINE std::istream& operator >> (std::istream& in, ap_private<_AP_W,_AP_S> &op)
+{
+    std::string str;
+    in >> str;
+    op = ap_private<_AP_W, _AP_S>(str.c_str());
+    return in;
+
+}
+
+template<int _AP_W, bool _AP_S>
+INLINE std::ostream& operator<<(std::ostream& out, const ap_range_ref<_AP_W,_AP_S> &op)
+{
+    return operator<<(out, ap_private<_AP_W, _AP_S>(op));
+}
+
+template<int _AP_W, bool _AP_S>
+INLINE std::istream& operator >> (std::istream& in, ap_range_ref<_AP_W,_AP_S> &op)
+{
+    return operator>>(in, ap_private<_AP_W, _AP_S>(op));;
+}
+
+template<int _AP_W, bool _AP_S>
+INLINE void print(const ap_private<_AP_W,_AP_S> &op, bool fill=true )
+{
+    ap_private<_AP_W, _AP_S> v=op;
+    uint32_t ws=v.getNumWords();
+    const uint64_t *ptr=v.getRawData();
+    int i=ws-1;
+#if 0
+    if(fill)
+        printf("%016llx",*(ptr+i));
+    else
+        printf("%llx",*(ptr+i));
+#else
+//match SystemC output
+    if(_AP_W%64 != 0) {
+        uint32_t offset=_AP_W%64;
+        uint32_t count=(offset+3)/4;
+        int64_t data=*(ptr+i);
+        if(_AP_S)
+            data=(data<<(64-offset))>>(64-offset);
+        else
+            count=(offset+4)/4;
+        while(count-->0)
+            printf("%llx",(data>>(count*4))&0xf);
+    } else {
+        if(_AP_S==false)
+            printf("0");
+        printf("%016llx",*(ptr+i));
+    }
+#endif
+    for(--i;i>=0;i--)
+        printf("%016llx",*(ptr+i));
+    printf("\n");
+
+}
+#endif /* #ifndef __AESL_GCC_AP_INT_H__ */
\ No newline at end of file
diff --git a/hls_2018/router_03_boardstr/etc/ap_private.h b/hls_2018/router_03_boardstr/etc/ap_private.h
new file mode 100755
index 0000000..1a68a9e
--- /dev/null
+++ b/hls_2018/router_03_boardstr/etc/ap_private.h
@@ -0,0 +1,5858 @@
+/*
+ * Copyright 2012 Xilinx, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LLVM_SUPPORT_MATHEXTRAS_H
+#define LLVM_SUPPORT_MATHEXTRAS_H
+
+#ifdef _MSC_VER
+#if _MSC_VER <= 1500
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else if
+#include <stdint.h>
+#endif /* #if _MSC_VER <= 1500 */
+#else
+#include <stdint.h>
+#endif /* #if _MSC_VER <= 1500 */
+#undef INLINE
+#if 1
+#define INLINE  inline
+#else
+//Enable to debug ap_int/ap_fixed
+#define INLINE  __attribute__((weak))
+#endif
+#define AP_MAX(a,b) ((a) > (b) ? (a) : (b))
+#define AP_MIN(a,b) ((a) < (b) ? (a) : (b))
+#define AP_ABS(a) ((a)>=0 ? (a):-(a))
+#ifndef AP_INT_MAX_W
+#define AP_INT_MAX_W 1024
+#endif
+#define BIT_WIDTH_UPPER_LIMIT (1 << 15)
+#if AP_INT_MAX_W > BIT_WIDTH_UPPER_LIMIT
+#error "Bitwidth exceeds 32768 (1 << 15), the maximum allowed value"
+#endif
+#define MAX_MODE(BITS) ((BITS + 1023) / 1024)
+
+// NOTE: The following support functions use the _32/_64 extensions instead of
+// type overloading so that signed and unsigned integers can be used without
+// ambiguity.
+
+/// Hi_32 - This function returns the high 32 bits of a 64 bit value.
+INLINE uint32_t Hi_32(uint64_t Value) {
+    return static_cast<uint32_t>(Value >> 32);
+}
+
+/// Lo_32 - This function returns the low 32 bits of a 64 bit value.
+INLINE uint32_t Lo_32(uint64_t Value) {
+    return static_cast<uint32_t>(Value);
+}
+
+/// ByteSwap_16 - This function returns a byte-swapped representation of the
+/// 16-bit argument, Value.
+INLINE uint16_t ByteSwap_16(uint16_t Value) {
+#if defined(_MSC_VER) && !defined(_DEBUG)
+    // The DLL version of the runtime lacks these functions (bug!?), but in a
+    // release build they're replaced with BSWAP instructions anyway.
+    return (uint16_t)(_byteswap_ushort(Value));
+#else
+    uint16_t Hi = (uint16_t)((Value) << 8);
+    uint16_t Lo = (uint16_t)((Value) >> 8);
+    return Hi | Lo;
+#endif
+}
+
+/// ByteSwap_32 - This function returns a byte-swapped representation of the
+/// 32-bit argument, Value.
+INLINE uint32_t ByteSwap_32(uint32_t Value) {
+    uint32_t Byte0 = Value & 0x000000FF;
+    uint32_t Byte1 = Value & 0x0000FF00;
+    uint32_t Byte2 = Value & 0x00FF0000;
+    uint32_t Byte3 = Value & 0xFF000000;
+    return ((Byte0) << 24) | ((Byte1) << 8) | ((Byte2) >> 8) | ((Byte3) >> 24);
+}
+
+/// ByteSwap_64 - This function returns a byte-swapped representation of the
+/// 64-bit argument, Value.
+INLINE uint64_t ByteSwap_64(uint64_t Value) {
+    uint64_t Hi = ByteSwap_32(uint32_t(Value));
+    uint32_t Lo = ByteSwap_32(uint32_t(Value >> 32));
+    return ((Hi) << 32) | Lo;
+}
+
+/// CountLeadingZeros_32 - this function performs the platform optimal form of
+/// counting the number of zeros from the most significant bit to the first one
+/// bit.  Ex. CountLeadingZeros_32(0x00F000FF) == 8.
+/// Returns 32 if the word is zero.
+INLINE unsigned CountLeadingZeros_32(uint32_t Value) {
+    unsigned Count; // result
+#if __GNUC__ >= 4
+    // PowerPC is defined for __builtin_clz(0)
+#if !defined(__ppc__) && !defined(__ppc64__)
+    if (Value == 0) return 32;
+#endif
+    Count = __builtin_clz(Value);
+#else
+    if (Value == 0) return 32;
+    Count = 0;
+    // bisecton method for count leading zeros
+    for (unsigned Shift = 32 >> 1; Shift; Shift >>= 1) {
+        uint32_t Tmp = (Value) >> (Shift);
+        if (Tmp) {
+            Value = Tmp;
+        } else {
+            Count |= Shift;
+        }
+    }
+#endif
+    return Count;
+}
+
+/// CountLeadingZeros_64 - This function performs the platform optimal form
+/// of counting the number of zeros from the most significant bit to the first
+/// one bit (64 bit edition.)
+/// Returns 64 if the word is zero.
+INLINE unsigned CountLeadingZeros_64(uint64_t Value) {
+    unsigned Count; // result
+#if __GNUC__ >= 4
+    // PowerPC is defined for __builtin_clzll(0)
+#if !defined(__ppc__) && !defined(__ppc64__)
+    if (!Value) return 64;
+#endif
+    Count = __builtin_clzll(Value);
+#else
+    if (sizeof(long) == sizeof(int64_t)) {
+        if (!Value) return 64;
+        Count = 0;
+        // bisecton method for count leading zeros
+        for (unsigned Shift = 64 >> 1; Shift; Shift >>= 1) {
+            uint64_t Tmp = (Value) >> (Shift);
+            if (Tmp) {
+                Value = Tmp;
+            } else {
+                Count |= Shift;
+            }
+        }
+    } else {
+        // get hi portion
+        uint32_t Hi = Hi_32(Value);
+
+        // if some bits in hi portion
+        if (Hi) {
+            // leading zeros in hi portion plus all bits in lo portion
+            Count = CountLeadingZeros_32(Hi);
+        } else {
+            // get lo portion
+            uint32_t Lo = Lo_32(Value);
+            // same as 32 bit value
+            Count = CountLeadingZeros_32(Lo)+32;
+        }
+    }
+#endif
+    return Count;
+}
+
+/// CountTrailingZeros_64 - This function performs the platform optimal form
+/// of counting the number of zeros from the least significant bit to the first
+/// one bit (64 bit edition.)
+/// Returns 64 if the word is zero.
+INLINE unsigned CountTrailingZeros_64(uint64_t Value) {
+#if __GNUC__ >= 4
+    return (Value != 0) ? __builtin_ctzll(Value) : 64;
+#else
+    static const unsigned Mod67Position[] = {
+        64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
+        4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
+        47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
+        29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
+        7, 48, 35, 6, 34, 33, 0
+    };
+    return Mod67Position[(uint64_t)(-(int64_t)Value & (int64_t)Value) % 67];
+#endif
+}
+
+/// CountPopulation_64 - this function counts the number of set bits in a value,
+/// (64 bit edition.)
+INLINE unsigned CountPopulation_64(uint64_t Value) {
+#if __GNUC__ >= 4
+    return __builtin_popcountll(Value);
+#else
+    uint64_t v = Value - (((Value) >> 1) & 0x5555555555555555ULL);
+    v = (v & 0x3333333333333333ULL) + (((v) >> 2) & 0x3333333333333333ULL);
+    v = (v + ((v) >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+    return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
+#endif
+}
+
+#endif // LLVM_SUPPORT_MATHEXTRAS_H
+
+
+#ifndef AP_PRIVATE_H
+#define AP_PRIVATE_H
+
+#include <cassert>
+#include <string>
+#include <stdio.h>
+#include <math.h>
+#include <limits>
+#include <cstring>
+#include <cstdlib>
+#include <iomanip>
+
+namespace AESL_std {
+    template <class DataType>
+    DataType INLINE min(DataType a, DataType b) {
+        //    if (a >= b) return b;
+        //    else return a;
+        return (a>=b) ? b : a;
+    }
+
+    template <class DataType>
+    DataType INLINE max(DataType a, DataType b) {
+        //    if (a >= b) return a;
+        //    else return b;
+        return (a>=b) ? a : b;
+    }
+}
+enum ap_q_mode {
+    AP_RND, // rounding to plus infinity
+    AP_RND_ZERO,// rounding to zero
+    AP_RND_MIN_INF,// rounding to minus infinity
+    AP_RND_INF,// rounding to infinity
+    AP_RND_CONV, // convergent rounding
+    AP_TRN, // truncation
+    AP_TRN_ZERO // truncation to zero
+
+};
+enum ap_o_mode {
+    AP_SAT,                // saturation
+    AP_SAT_ZERO,        // saturation to zero
+    AP_SAT_SYM,                // symmetrical saturation
+    AP_WRAP,                // wrap-around (*)
+    AP_WRAP_SM                // sign magnitude wrap-around (*)
+};
+
+template<int _AP_W, int _AP_I, bool _AP_S, ap_q_mode _AP_Q,
+    ap_o_mode _AP_O, int _AP_N> struct ap_fixed_base;
+template<int _AP_W, int _AP_I, bool _AP_S,
+    ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> struct af_range_ref;
+template<int _AP_W, int _AP_I, bool _AP_S,
+    ap_q_mode _AP_Q, ap_o_mode _AP_O, int _AP_N> struct af_bit_ref;
+
+template<int _AP_W, bool _AP_S> struct ap_range_ref;
+template<int _AP_W, bool _AP_S> struct ap_bit_ref;
+template<int _AP_W1, typename _AP_T1, int _AP_W2, typename _AP_T2> struct ap_concat_ref;
+static bool InvalidDigit(const char* str, unsigned len, unsigned start, unsigned radix) {
+    unsigned i;
+    for (i = start; i < len; ++i)
+        if ((radix == 2 && (str[i] == '0' || str[i] == '1')) ||
+                (radix == 8 && str[i] >= '0' &&  str[i] <= '7') ||
+                (radix == 10 && str[i] >= '0' && str[i] <= '9') ||
+                (radix == 16 && ((str[i] >= '0' && str[i] <= '9') ||
+                                 (str[i] >= 'a' && str[i] <= 'f') ||
+                                 (str[i] >= 'A' && str[i] <= 'F'))))
+            continue;
+        else
+            return true;
+    return false;
+}
+
+static void ap_parse_sign(const char* str, uint32_t &base, bool &neg) {
+    if (str[0] == '+' || str[0] == '-') base = 1;
+    if (str[0] == '-') neg = true;
+    else neg = false;
+    return;
+}
+
+static void ap_parse_prefix(const char* str, uint32_t &offset, uint32_t &radix) {
+    if (str[0] == '0') {
+        switch (str[1]) {
+            case 'b':
+            case 'B':  offset = 2; radix = 2; break;
+            case 'x':
+            case 'X':  offset = 2; radix = 16; break;
+            case 'd':
+            case 'D':  offset = 2; radix = 10; break;
+            case 'o':
+            case 'O':  offset = 2; radix = 8; break;
+            default: break;
+        }
+    }
+    if (offset == 0)
+        for (int i=0, len = strlen(str); i<len; i++)
+            if ((str[i] <= 'f' && str[i] >= 'a') || (str[i] <= 'F' && str[i] >= 'A')) {
+                radix = 16;
+                break;
+            }
+    return;
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static bool sub_1(uint64_t x[], uint32_t len, uint64_t y) {
+    for (uint32_t i = 0; i < len; ++i) {
+        uint64_t __X = x[i];
+        x[i] -= y;
+        if (y > __X)
+            y = 1;  // We have to "borrow 1" from next "digit"
+        else {
+            y = 0;  // No need to borrow
+            break;  // Remaining digits are unchanged so exit early
+        }
+    }
+    return (y != 0);
+}
+
+    /// This enumeration just provides for internal constants used in this
+    /// translation unit.
+    enum {
+        MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
+        ///< Note that this must remain synchronized with IntegerType::MIN_INT_BITS
+        MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
+            ///< Note that this must remain synchronized with IntegerType::MAX_INT_BITS
+    };
+
+    /// A utility function for allocating memory and checking for allocation
+    /// failure.  The content is not zeroed.
+    static uint64_t* getMemory(uint32_t numWords) {
+        return (uint64_t*) malloc(numWords*sizeof(uint64_t));
+    }
+
+    //===----------------------------------------------------------------------===//
+    //                              ap_private Class
+    //===----------------------------------------------------------------------===//
+
+    /// ap_private - This class represents arbitrary precision constant integral values.
+    /// It is a functional replacement for common case unsigned integer type like
+    /// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
+    /// integer sizes and large integer value types such as 3-bits, 15-bits, or more
+    /// than 64-bits of precision. ap_private provides a variety of arithmetic operators
+    /// and methods to manipulate integer values of any bit-width. It supports both
+    /// the typical integer arithmetic and comparison operations as well as bitwise
+    /// manipulation.
+    ///
+    /// The class has several invariants worth noting:
+    ///   * All bit, byte, and word positions are zero-based.
+    ///   * Once the bit width is set, it doesn't change except by the Truncate,
+    ///     SignExtend, or ZeroExtend operations.
+    ///   * All binary operators must be on ap_private instances of the same bit width.
+    ///     Attempting to use these operators on instances with different bit
+    ///     widths will yield an assertion.
+    ///   * The value is stored canonically as an unsigned value. For operations
+    ///     where it makes a difference, there are both signed and unsigned variants
+    ///     of the operation. For example, sdiv and udiv. However, because the bit
+    ///     widths must be the same, operations such as Mul and Add produce the same
+    ///     results regardless of whether the values are interpreted as signed or
+    ///     not.
+    ///   * In general, the class tries to follow the style of computation that LLVM
+    ///     uses in its IR. This simplifies its use for LLVM.
+    ///
+    /// @brief Class for arbitrary precision integers.
+    template<int _AP_W, bool _AP_S, int _AP_N = (_AP_W+63)/64> class ap_private;
+    namespace ap_private_ops{
+        template<int _AP_W, bool _AP_S, int _AP_N>
+        INLINE ap_private<_AP_W, _AP_S, _AP_N> lshr(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t shiftAmt);
+        template<int _AP_W, bool _AP_S, int _AP_N>
+        INLINE ap_private<_AP_W, _AP_S, _AP_N> shl(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t shiftAmt);
+    }
+
+#if defined(_MSC_VER)
+#  if _MSC_VER < 1400 && !defined(for)
+#     define for if(0);else for
+#  endif
+    typedef unsigned __int64 ap_ulong;
+    typedef signed   __int64 ap_slong;
+#else
+    typedef unsigned long long ap_ulong;
+    typedef signed   long long ap_slong;
+#endif
+    template <bool _AP_S> struct retval {
+    };
+    template<> struct retval<true> {
+        typedef ap_slong Type;
+    };
+    template<> struct retval<false> {
+        typedef ap_ulong Type;
+    };
+
+    template<int _AP_W, bool _AP_S, int _AP_N>
+    class ap_private {
+#ifdef _MSC_VER
+#pragma warning( disable : 4521 4522 )
+#endif
+public:
+    typedef typename retval<_AP_S>::Type ValType;
+    template<int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2> friend struct ap_fixed_base;
+    ///return type of variety of operations
+    //----------------------------------------------------------
+    template<int _AP_W2, bool _AP_S2>
+    struct RType {
+        enum {
+            mult_w = _AP_W+_AP_W2,
+            mult_s = _AP_S||_AP_S2,
+            plus_w = AP_MAX(_AP_W+(_AP_S2&&!_AP_S),_AP_W2+(_AP_S&&!_AP_S2))+1,
+            plus_s = _AP_S||_AP_S2,
+            minus_w = AP_MAX(_AP_W+(_AP_S2&&!_AP_S),_AP_W2+(_AP_S&&!_AP_S2))+1,
+            minus_s = true,
+            div_w = _AP_W+_AP_S2,
+            div_s = _AP_S||_AP_S2,
+            mod_w = AP_MIN(_AP_W,_AP_W2+(!_AP_S2&&_AP_S)),
+            mod_s = _AP_S,
+            logic_w = AP_MAX(_AP_W+(_AP_S2&&!_AP_S),_AP_W2+(_AP_S&&!_AP_S2)),
+            logic_s = _AP_S||_AP_S2
+        };
+        typedef ap_private<mult_w, mult_s> mult;
+        typedef ap_private<plus_w, plus_s> plus;
+        typedef ap_private<minus_w, minus_s> minus;
+        typedef ap_private<logic_w, logic_s> logic;
+        typedef ap_private<div_w, div_s> div;
+        typedef ap_private<mod_w, mod_s> mod;
+        typedef ap_private<_AP_W, _AP_S> arg1;
+        typedef bool reduce;
+    };
+
+    INLINE void report() {
+#if 0
+        if (_AP_W > 1024 && _AP_W <= 4096) {
+            fprintf(stderr, "[W] W=%d is out of bound (1<=W<=1024): for"
+                    " synthesis: please define macro AP_INT_TYPE_EXT(N)"
+                    " to extend the valid range.\n", _AP_W);
+        } else
+#endif
+            if (_AP_W > MAX_MODE(AP_INT_MAX_W) * 1024) {
+                fprintf(stderr, "[E] ap_%sint<%d>: Bitwidth exceeds the "
+                        "default max value %d. Please use macro "
+                        "AP_INT_MAX_W to set a larger max value.\n",
+                        _AP_S?"":"u", _AP_W,
+                        MAX_MODE(AP_INT_MAX_W) * 1024);
+                exit(1);
+            }
+    }
+
+    enum { BitWidth = _AP_W };
+    /// This union is used to store the integer value. When the
+    /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+
+    /// This enum is used to hold the constants we needed for ap_private.
+    uint64_t VAL;    ///< Used to store the <= 64 bits integer value.
+    uint64_t pVal[_AP_N];  ///< Used to store the >64 bits integer value.
+
+    /// This enum is used to hold the constants we needed for ap_private.
+    enum {
+        APINT_BITS_PER_WORD = sizeof(uint64_t) * 8, ///< Bits in a word
+        APINT_WORD_SIZE = sizeof(uint64_t)          ///< Byte size of a word
+    };
+
+    enum { excess_bits = (_AP_W%APINT_BITS_PER_WORD) ? APINT_BITS_PER_WORD -(_AP_W%APINT_BITS_PER_WORD) : 0};
+    static const uint64_t mask = ((uint64_t)~0ULL >> (excess_bits));
+
+    /// This constructor is used only internally for speed of construction of
+    /// temporaries. It is unsafe for general use so it is not public.
+    /* Constructors */
+
+    ap_private(const char* val) {
+        std::string str(val);
+        uint32_t strLen = str.length();
+        const char *strp = str.c_str();
+        uint32_t offset = 0;
+        uint32_t base = 0;
+        bool neg = false;
+        uint32_t radix = 16;
+        ap_parse_sign(strp, base, neg);
+        ap_parse_prefix(strp + base, offset, radix);
+
+        if ((radix != 10 && neg) ||
+                (strLen - base - offset <= 0) ||
+                InvalidDigit(strp, strLen, base + offset, radix))  {
+            fprintf(stderr, "invalid character string %s !\n", val);
+            assert(0);
+        }
+
+        ap_private ap_private_val(str.c_str(), strLen, radix, base, offset);
+        if (neg)
+            ap_private_val = -ap_private_val;
+        operator =  (ap_private_val);
+        report();
+    }
+
+    ap_private(const char* val, int rd) {
+        std::string str(val);
+        uint32_t strLen = str.length();
+        const char *strp = str.c_str();
+        uint32_t offset = 0;
+        uint32_t base = 0;
+        uint32_t radix = rd;
+        bool neg = false;
+        ap_parse_sign(strp, base, neg);
+        ap_parse_prefix(strp + base, offset, radix);
+
+        if ((radix != 10 && neg) ||
+                (strLen - base - offset <= 0) ||
+                InvalidDigit(strp, strLen, base + offset, radix))  {
+            fprintf(stderr, "invalid character string %s !\n", val);
+            assert(0);
+        }
+
+        //        uint32_t bitsNeeded = ap_private<_AP_W, _AP_S>::getBitsNeeded(strp, strLen, radix);
+        //        ap_private<_AP_W, _AP_S> ap_private_val(bitsNeeded, strp , strLen, radix, base, offset);
+        ap_private ap_private_val(strp , strLen, radix, base, offset);
+        if (neg)
+            ap_private_val = -ap_private_val;
+        operator =  (ap_private_val);
+        report();
+    }
+
+    /// Note that numWords can be smaller or larger than the corresponding bit
+    /// width but any extraneous bits will be dropped.
+    /// @param numBits the bit width of the constructed ap_private
+    /// @param numWords the number of words in bigVal
+    /// @param bigVal a sequence of words to form the initial value of the ap_private
+    /// @brief Construct an ap_private of numBits width, initialized as bigVal[].
+    ap_private(uint32_t numWords, const uint64_t bigVal[]): VAL(0) {
+        assert(bigVal && "Null pointer detected!");
+        {
+            // Get memory, cleared to 0
+            memset(pVal, 0, _AP_N * sizeof(uint64_t));
+
+            // Calculate the number of words to copy
+            uint32_t words = AESL_std::min<uint32_t>(numWords, _AP_N);
+            // Copy the words from bigVal to pVal
+            memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+            if (words >= _AP_W)
+                clearUnusedBits();
+            // Make sure unused high bits are cleared
+        }
+    }
+
+    /// This constructor interprets Val as a string in the given radix. The
+    /// interpretation stops when the first charater that is not suitable for the
+    /// radix is encountered. Acceptable radix values are 2, 8, 10 and 16. It is
+    /// an error for the value implied by the string to require more bits than
+    /// numBits.
+    /// @param numBits the bit width of the constructed ap_private
+    /// @param val the string to be interpreted
+    /// @param radix the radix of Val to use for the intepretation
+    /// @brief Construct an ap_private from a string representation.
+    ap_private(const std::string& val, uint8_t radix=2, int base=0, int offset=0): VAL(0) {
+        assert(!val.empty() && "The input string is empty.");
+        const char *c_str = val.c_str();
+        fromString(c_str+base+offset, val.size()-base-offset, radix);
+    }
+
+    /// This constructor interprets the slen characters starting at StrStart as
+    /// a string in the given radix. The interpretation stops when the first
+    /// character that is not suitable for the radix is encountered. Acceptable
+    /// radix values are 2, 8, 10 and 16. It is an error for the value implied by
+    /// the string to require more bits than numBits.
+    /// @param numBits the bit width of the constructed ap_private
+    /// @param strStart the start of the string to be interpreted
+    /// @param slen the maximum number of characters to interpret
+    /// @param radix the radix to use for the conversion
+    /// @brief Construct an ap_private from a string representation.
+    /// This method does not consider whether it is negative or not.
+    ap_private(const char strStart[], uint32_t slen, uint8_t radix, int base=0, int offset=0) : VAL(0) {
+        fromString(strStart+base+offset, slen-base-offset, radix);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private(const ap_range_ref<_AP_W2,_AP_S2>& ref) {
+        *this=ref.get();
+        report();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private(const ap_bit_ref<_AP_W2,_AP_S2>& ref) {
+        *this = ((uint64_t)(bool)ref);
+        report();
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private(const ap_concat_ref<_AP_W2, _AP_T2,_AP_W3, _AP_T3>& ref) {
+        *this=ref.get();
+        report();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+        ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_private(const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+            _AP_Q2, _AP_O2, _AP_N2> &val) {
+        *this = ((val.operator ap_private<_AP_W2, false> ()));
+        report();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+        ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_private(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+            _AP_Q2, _AP_O2, _AP_N2> &val) {
+        *this = (uint64_t)(bool)val;
+        report();
+    }
+
+    /// Simply makes *this a copy of that.
+    /// @brief Copy Constructor.
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    ap_private(const volatile ap_private<_AP_W1, _AP_S1, _AP_N1>& that): VAL(0) {
+        operator = (const_cast<const ap_private<_AP_W1, _AP_S1, _AP_N1>& >(that));
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    ap_private(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that): VAL(0) {
+        operator = (that);
+    }
+
+    template<int _AP_W1, bool _AP_S1>
+    explicit ap_private(const ap_private<_AP_W1, _AP_S1, 1>& that): VAL(0) {
+        static const uint64_t that_sign_ext_mask = (_AP_W1==APINT_BITS_PER_WORD)?0:~0ULL>>(_AP_W1%APINT_BITS_PER_WORD)<<(_AP_W1%APINT_BITS_PER_WORD);
+        if (that.isNegative()) {
+            pVal[0] = that.VAL|that_sign_ext_mask;
+            memset(pVal+1, ~0, sizeof(uint64_t)*(_AP_N-1));
+        } else {
+            pVal[0] = that.VAL;
+            memset(pVal+1, 0, sizeof(uint64_t)*(_AP_N-1));
+        }
+        clearUnusedBits();
+    }
+
+    ap_private(const ap_private& that): VAL(0) {
+        memcpy(pVal, that.pVal, _AP_N * APINT_WORD_SIZE);
+        clearUnusedBits();
+    }
+
+    /// @brief Destructor.
+    virtual ~ap_private() {}
+
+    /// Default constructor that creates an uninitialized ap_private.  This is useful
+    ///  for object deserialization (pair this with the static method Read).
+    ap_private(){memset(pVal, 0, sizeof(uint64_t)*(_AP_N));}
+
+    ap_private(uint64_t* val, uint32_t bits=_AP_W) {assert(0);}
+    ap_private(const uint64_t *const val, uint32_t bits) {assert(0);}
+
+    /// @name Constructors
+    /// @{
+    /// If isSigned is true then val is treated as if it were a signed value
+    /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
+    /// will be done. Otherwise, no sign extension occurs (high order bits beyond
+    /// the range of val are zero filled).
+    /// @param numBits the bit width of the constructed ap_private
+    /// @param val the initial value of the ap_private
+    /// @param isSigned how to treat signedness of val
+    /// @brief Create a new ap_private of numBits width, initialized as val.
+#define CTOR(TYPE, SIGNED)                                            \
+    ap_private(TYPE val, bool isSigned=SIGNED) {                      \
+        pVal[0] = val;                                                \
+        if (isSigned && int64_t(pVal[0]) < 0) {                       \
+            memset(pVal+1, ~0, sizeof(uint64_t)*(_AP_N-1));           \
+        } else {                                                      \
+            memset(pVal+1, 0, sizeof(uint64_t)*(_AP_N-1));            \
+        }                                                             \
+        clearUnusedBits();                                            \
+    }
+#if 1
+    CTOR(int, true)
+    CTOR(bool, false)
+    CTOR(signed char, true)
+    CTOR(unsigned char, false)
+    CTOR(short, true)
+    CTOR(unsigned short, false)
+    CTOR(unsigned int, false)
+    CTOR(long, true)
+    CTOR(unsigned long, false)
+    CTOR(unsigned long long, false)
+    CTOR(long long, true)
+    CTOR(float, false)
+    CTOR(double, false)
+#undef CTOR
+#else
+    CTOR(uint64_t)
+#undef CTOR
+#endif
+
+
+    /// @returns true if the number of bits <= 64, false otherwise.
+    /// @brief Determine if this ap_private just has one word to store value.
+    INLINE bool isSingleWord() const {
+        return false;
+    }
+
+    /// @returns the word position for the specified bit position.
+    /// @brief Determine which word a bit is in.
+    static uint32_t whichWord(uint32_t bitPosition) {
+        //    return bitPosition / APINT_BITS_PER_WORD;
+        return (bitPosition) >> 6;
+    }
+
+    /// @returns the bit position in a word for the specified bit position
+    /// in the ap_private.
+    /// @brief Determine which bit in a word a bit is in.
+    static uint32_t whichBit(uint32_t bitPosition) {
+        //    return bitPosition % APINT_BITS_PER_WORD;
+        return bitPosition & 0x3f;
+    }
+
+    /// bit at a specific bit position. This is used to mask the bit in the
+    /// corresponding word.
+    /// @returns a uint64_t with only bit at "whichBit(bitPosition)" set
+    /// @brief Get a single bit mask.
+    static uint64_t maskBit(uint32_t bitPosition) {
+        return 1ULL << (whichBit(bitPosition));
+    }
+
+    /// @returns the corresponding word for the specified bit position.
+    /// @brief Get the word corresponding to a bit position
+    INLINE uint64_t getWord(uint32_t bitPosition) const {
+        return isSingleWord() ? VAL : pVal[whichWord(bitPosition)];
+    }
+
+    /// This method is used internally to clear the to "N" bits in the high order
+    /// word that are not used by the ap_private. This is needed after the most
+    /// significant word is assigned a value to ensure that those bits are
+    /// zero'd out.
+    /// @brief Clear unused high order bits
+    INLINE void clearUnusedBits(void) {
+        pVal[_AP_N-1] = _AP_S ? ((((int64_t)pVal[_AP_N-1])<<(excess_bits))>> excess_bits) : (excess_bits ? ((pVal[_AP_N-1])<<(excess_bits))>>(excess_bits) : pVal[_AP_N-1]);
+    }
+
+    INLINE void clearUnusedBitsToZero(void) {
+        pVal[_AP_N-1] &= mask;
+    }
+
+    INLINE void clearUnusedBitsToOne(void) {
+        pVal[_AP_N-1] |= mask;
+    }
+
+    /// This is used by the constructors that take string arguments.
+    /// @brief Convert a char array into an ap_private
+    INLINE void fromString(const char *strStart, uint32_t slen,
+            uint8_t radix) ;
+
+    INLINE ap_private read() volatile {
+        return *this;
+    }
+
+    INLINE void write(const ap_private& op2) volatile {
+        *this = (op2);
+    }
+
+    //Explicit conversions to C interger types
+    //-----------------------------------------------------------
+    operator ValType() const {
+        return getVal();
+    }
+
+    INLINE ValType getVal() const{
+        return *pVal;
+    }
+
+    INLINE int to_int() const {
+        return int(*this);
+    }
+
+    INLINE unsigned to_uint() const {
+        return (unsigned) getVal();
+    }
+
+    INLINE long to_long() const {
+        return (long) getVal();
+    }
+
+    INLINE unsigned long to_ulong() const {
+        return (unsigned long) getVal();
+    }
+
+    INLINE ap_slong to_int64() const  {
+        return (ap_slong) getVal();
+    }
+
+    INLINE ap_ulong to_uint64() const {
+        return (ap_ulong) getVal();
+    }
+
+    INLINE double to_double() const {
+        if (isNegative())
+            return roundToDouble(true);
+        else
+            return roundToDouble(false);
+    }
+
+    INLINE unsigned length() const { return _AP_W; }
+
+    /*Reverse the contents of ap_private instance. I.e. LSB becomes MSB and vise versa*/
+    INLINE ap_private& reverse () {
+        for (int i = 0; i < _AP_W/2; ++i) {
+            bool tmp = operator[](i);
+            if (operator[](_AP_W - 1 - i))
+                set(i);
+            else
+                clear(i);
+            if (tmp)
+                set(_AP_W - 1 - i);
+            else
+                clear(_AP_W - 1 - i);
+        }
+        clearUnusedBits();
+        return *this;
+    }
+
+    /*Return true if the value of ap_private instance is zero*/
+    INLINE bool iszero () const {
+        return isMinValue();
+    }
+
+    /* x < 0 */
+    INLINE bool sign () const {
+        if (isNegative())
+            return true;
+        return false;
+    }
+
+    /* x[i] = !x[i] */
+    INLINE void invert (int i) {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        flip(i);
+    }
+
+    /* x[i] */
+    INLINE bool test (int i) const {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        return operator[](i);
+    }
+
+    //Set the ith bit into v
+    INLINE void set (int i, bool v) {
+        assert( i >= 0 && "Attempting to write bit with negative index");
+        assert( i < _AP_W && "Attempting to write bit beyond MSB");
+        v ? set(i) : clear(i);
+    }
+
+    //Set the ith bit into v
+    INLINE void set_bit (int i, bool v) {
+        assert( i >= 0 && "Attempting to write bit with negative index");
+        assert( i < _AP_W && "Attempting to write bit beyond MSB");
+        v ? set(i) : clear(i);
+    }
+
+    INLINE ap_private& set(uint32_t bitPosition) {
+        pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+        clearUnusedBits();
+        return *this;
+    }
+
+    INLINE void set() {
+        for (uint32_t i = 0; i < _AP_N; ++i)
+            pVal[i] = ~0ULL;
+        clearUnusedBits();
+    }
+
+    //Get the value of ith bit
+    INLINE bool get (int i) const {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        return operator [](i);
+    }
+
+    //Get the value of ith bit
+    INLINE bool get_bit (int i) const {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        return operator [](i);
+    }
+
+    //This is used for sc_lv and sc_bv, which is implemented by sc_uint
+    //Rotate an ap_private object n places to the left
+    INLINE void lrotate(int n) {
+        assert( n >= 0 && "Attempting to shift negative index");
+        assert( n < _AP_W && "Shift value larger than bit width");
+        operator =  (shl(n) | lshr(_AP_W - n));
+    }
+
+    //This is used for sc_lv and sc_bv, which is implemented by sc_uint
+    //Rotate an ap_private object n places to the right
+    INLINE void rrotate(int n) {
+        assert( n >= 0 && "Attempting to shift negative index");
+        assert( n < _AP_W && "Shift value larger than bit width");
+        operator =  (lshr(n) | shl(_AP_W - n));
+    }
+
+    /// Set the given bit to 0 whose position is given as "bitPosition".
+    /// @brief Set a given bit to 0.
+    ap_private& clear(uint32_t bitPosition) {
+        pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+        clearUnusedBits();
+        return *this;
+    }
+
+    /// @brief Set every bit to 0.
+    void clear() {
+        memset(pVal, 0, _AP_N * APINT_WORD_SIZE);
+    }
+
+    /// @brief Toggle every bit to its opposite value.
+    ap_private& flip() {
+        for (uint32_t i = 0; i < _AP_N; ++i)
+            pVal[i] ^= ~0ULL;
+        clearUnusedBits();
+        return *this;
+    }
+
+    /// Toggle a given bit to its opposite value whose position is given
+    /// as "bitPosition".
+    /// @brief Toggles a given bit to its opposite value.
+    ap_private& flip(uint32_t bitPosition) {
+        assert(bitPosition < BitWidth && "Out of the bit-width range!");
+        if ((*this)[bitPosition]) clear(bitPosition);
+        else set(bitPosition);
+        return *this;
+    }
+
+    //complements every bit
+    INLINE void b_not() {
+        flip();
+    }
+
+    ap_private getLoBits(uint32_t numBits) const {
+        return ap_private_ops::lshr(ap_private_ops::shl(*this, _AP_W - numBits),
+               _AP_W - numBits);
+    }
+
+    ap_private getHiBits(uint32_t numBits) const {
+        return ap_private_ops::lshr(*this, _AP_W - numBits);
+    }
+
+    //Binary Arithmetic
+    //-----------------------------------------------------------
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private<AP_MAX(_AP_W2+_AP_W3,_AP_W), _AP_S>
+    operator & (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+        return *this & a2.get();
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private<AP_MAX(_AP_W2+_AP_W3,_AP_W), _AP_S>
+    operator | (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+        return *this | a2.get();
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private<AP_MAX(_AP_W2+_AP_W3,_AP_W), _AP_S>
+    operator ^ (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+        return *this ^ a2.get();
+    }
+
+
+    ///Arithmetic assign
+    //-------------------------------------------------------------
+
+#define OP_BIN_LOGIC_ASSIGN_AP(Sym)              \
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>                                \
+    INLINE ap_private& operator Sym(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {    \
+        uint32_t numWords = AESL_std::min(_AP_N, _AP_N1);                        \
+        uint32_t i;                                                              \
+        for (i = 0; i < numWords; ++i)                                           \
+        pVal[i] Sym RHS.pVal[i];                                                 \
+        if (_AP_N1 < _AP_N) {                                                    \
+            uint64_t ext = RHS.isNegative()?~0ULL:0;                             \
+            for (;i<_AP_N; i++)                                                  \
+            pVal[i] Sym ext;                                                     \
+        }                                                                        \
+        clearUnusedBits();                                                       \
+        return *this;                                                            \
+    }
+
+    OP_BIN_LOGIC_ASSIGN_AP(&=);
+    OP_BIN_LOGIC_ASSIGN_AP(|=);
+    OP_BIN_LOGIC_ASSIGN_AP(^=);
+#undef OP_BIN_LOGIC_ASSIGN_AP
+
+    /// Adds the RHS APint to this ap_private.
+    /// @returns this, after addition of RHS.
+    /// @brief Addition assignment operator.
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    INLINE ap_private& operator+=(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        add(pVal, pVal, RHS.pVal, _AP_N, _AP_N, _AP_N1, _AP_S, _AP_S1);
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    INLINE ap_private& operator-=(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        sub(pVal, pVal, RHS.pVal, _AP_N, _AP_N, _AP_N1, _AP_S, _AP_S1);
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    ap_private& operator*=(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        // Get some bit facts about LHS and check for zero
+        uint32_t lhsBits = getActiveBits();
+        uint32_t lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+        if (!lhsWords) {
+            // 0 * X ===> 0
+            return *this;
+        }
+
+        ap_private dupRHS = RHS;
+        // Get some bit facts about RHS and check for zero
+        uint32_t rhsBits = dupRHS.getActiveBits();
+        uint32_t rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+        if (!rhsWords) {
+            // X * 0 ===> 0
+            clear();
+            return *this;
+        }
+
+        // Allocate space for the result
+        uint32_t destWords = rhsWords + lhsWords;
+        uint64_t *dest = getMemory(destWords);
+
+        // Perform the long multiply
+        mul(dest, pVal, lhsWords, dupRHS.pVal, rhsWords, destWords);
+
+        // Copy result back into *this
+        clear();
+        uint32_t wordsToCopy = destWords >= _AP_N ? _AP_N : destWords;
+
+        memcpy(pVal, dest, wordsToCopy* APINT_WORD_SIZE);
+
+        uint64_t ext = (isNegative() ^ RHS.isNegative()) ? ~0ULL : 0ULL;
+        for (int i=wordsToCopy; i<_AP_N; i++)
+            pVal[i]=ext;
+        clearUnusedBits();
+        // delete dest array and return
+        free(dest);
+        return *this;
+    }
+
+#define OP_ASSIGN_AP(Sym)                                                \
+    template<int _AP_W2, bool _AP_S2>                                        \
+    INLINE ap_private& operator Sym##=(const ap_private<_AP_W2,_AP_S2>& op) \
+    {                                                                        \
+        *this=operator Sym (op);                                        \
+        return *this;                                                        \
+    }                                                                        \
+
+    OP_ASSIGN_AP(/)
+    OP_ASSIGN_AP(%)
+#undef OP_ASSIGN_AP
+
+#define OP_BIN_LOGIC_AP(Sym)                                                \
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>                           \
+    INLINE                                                                  \
+    typename RType<_AP_W1, _AP_S1>::logic                                   \
+    operator Sym (const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) const {    \
+        enum { numWords = (RType<_AP_W1, _AP_S1>::logic_w +APINT_BITS_PER_WORD-1)/APINT_BITS_PER_WORD}; \
+        typename RType<_AP_W1, _AP_S1>::logic Result;                       \
+        uint64_t *val = Result.pVal;                                        \
+        uint32_t i;                                                         \
+        uint32_t min_N = std::min(_AP_N, _AP_N1);                           \
+        uint32_t max_N = std::max(_AP_N, _AP_N1);                           \
+        for (i = 0; i < min_N; ++i)                                         \
+        val[i] = pVal[i] Sym RHS.pVal[i];                                   \
+        if (numWords > i) {                                                 \
+            const uint64_t* tmpVal = (_AP_N>_AP_N1 ? pVal : RHS.pVal)+i;    \
+            uint64_t ext = ((_AP_N<_AP_N1 && isNegative() )||(_AP_N1 < _AP_N && RHS.isNegative())) ? ~0ULL : 0; \
+            for (;i<max_N;i++,tmpVal++)                                     \
+            val[i] = *tmpVal Sym ext;                                       \
+            if (numWords > i) {                                             \
+                uint64_t ext2 = ((_AP_N>_AP_N1 && isNegative() )||(_AP_N1 > _AP_N && RHS.isNegative())) ? ~0ULL : 0; \
+                val[i] = ext Sym ext2;                                      \
+            }                                                               \
+        }                                                                   \
+        Result.clearUnusedBits();                                           \
+        return Result;                                                      \
+    }
+
+    OP_BIN_LOGIC_AP(|);
+    OP_BIN_LOGIC_AP(&);
+    OP_BIN_LOGIC_AP(^);
+
+#undef OP_BIN_LOGIC_AP
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    INLINE typename RType<_AP_W1,_AP_S1>::plus operator+(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) const {
+        //  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+        typename RType<_AP_W1,_AP_S1>::plus Result;
+        bool carry = add(Result.pVal, this->pVal, RHS.pVal, (RType<_AP_W1,_AP_S1>::plus_w + 63) / 64, _AP_N, _AP_N1, _AP_S, _AP_S1);
+        if ((RType<_AP_W1,_AP_S1>::plus_w + 63) / 64> std::max(_AP_W, _AP_W1) )
+            Result.pVal[(RType<_AP_W1,_AP_S1>::plus_w + 63)/64 - 1] = carry;
+        Result.clearUnusedBits();
+        return Result;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    INLINE typename RType<_AP_W1,_AP_S1>::minus operator-(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) const {
+        typename  RType<_AP_W1,_AP_S1>::minus Result;
+        bool borrow =  sub(Result.pVal, this->pVal, RHS.pVal, (RType<_AP_W1,_AP_S1>::minus_w + 63) / 64,  _AP_N, _AP_N1, _AP_S, _AP_S1);
+        if ((RType<_AP_W1,_AP_S1>::minus_w + 63) / 64 > AESL_std::max(_AP_W, _AP_W1) ) {
+            Result.pVal[(RType<_AP_W1,_AP_S1>::minus_w+63)/64 - 1] = borrow;
+        }
+        Result.clearUnusedBits();
+        return Result;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    typename RType<_AP_W1, _AP_S1>::mult operator*(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) const {
+
+        // Get some bit facts about LHS and check for zero
+        uint32_t lhsBits = getActiveBits();
+        uint32_t lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+        if (!lhsWords)
+            // 0 * X ===> 0
+            return typename RType<_AP_W1, _AP_S1>::mult();
+
+        // Get some bit facts about RHS and check for zero
+        uint32_t rhsBits = RHS.getActiveBits();
+        uint32_t rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+        if (!rhsWords) {
+            // X * 0 ===> 0
+            return typename RType<_AP_W1, _AP_S1>::mult();
+        }
+
+        //extend size to avoid result loss
+        typename RType<_AP_W1, _AP_S1>::mult dupLHS = *this;
+        typename RType<_AP_W1, _AP_S1>::mult dupRHS = RHS;
+        lhsBits = dupLHS.getActiveBits();
+        lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+        rhsBits = dupRHS.getActiveBits();
+        rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+
+        // Allocate space for the result
+        enum { destWords =(RType<_AP_W1, _AP_S1>::mult_w+APINT_BITS_PER_WORD-1)/APINT_BITS_PER_WORD};
+        int destw = destWords;
+        typename RType<_AP_W1, _AP_S1>::mult Result;
+        uint64_t *dest = Result.pVal;
+        uint64_t ext = (isNegative() ^ RHS.isNegative()) ? ~0ULL : 0;
+
+        // Perform the long multiply
+        mul(dest, dupLHS.pVal, lhsWords, dupRHS.pVal, rhsWords, destWords);
+
+        for (int i=lhsWords+rhsWords; i<destWords; i++)
+            dest[i]=ext;
+        Result.clearUnusedBits();
+        return Result;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE typename RType<_AP_W2,_AP_S2>::div
+    operator / (const ap_private<_AP_W2,_AP_S2>& op) const {
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S> lhs=ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S>(*this);
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S> rhs=ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S>(op);
+        return typename RType<_AP_W2,_AP_S2>::div((_AP_S||_AP_S2)?lhs.sdiv(rhs):lhs.udiv(rhs));
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE typename RType<_AP_W2,_AP_S2>::mod
+    operator % (const ap_private<_AP_W2,_AP_S2>& op) const {
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S||_AP_S2> lhs=*this;
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S||_AP_S2> rhs= op;
+        typename RType<_AP_W2,_AP_S2>::mod res = typename RType<_AP_W2,_AP_S2>::mod(_AP_S?lhs.srem(rhs):lhs.urem(rhs));
+        return res;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private
+    operator << (const ap_private<_AP_W2, _AP_S2>& op2) const {
+        uint32_t sh=op2.to_uint();
+        return *this << sh;
+    }
+
+    INLINE ap_private
+    operator << (uint32_t sh) const {
+        ap_private r(*this);
+        bool overflow=(sh>=length());
+        if(overflow)
+            r.clear();
+        else
+            r = ap_private(r.shl(sh));
+        return r;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private
+    operator >> (const ap_private<_AP_W2, _AP_S2>& op2) const {
+        uint32_t sh = op2.to_uint();
+        return *this >> sh;
+    }
+
+    INLINE ap_private
+    operator >> (uint32_t sh) const {
+        ap_private r(*this);
+        bool overflow=(sh>=_AP_W);
+        bool neg_v=r.isNegative();
+        if(_AP_S) {
+            if(overflow)
+                neg_v?r.set():r.clear();
+            else
+                return r.ashr(sh);
+        } else {
+            if(overflow)
+                r.clear();
+            else
+                return r.lshr(sh);
+        }
+        return r;
+    }
+
+    ///Shift assign
+    //------------------------------------------------------------------
+#define OP_ASSIGN_AP(Sym)                                                    \
+    template<int _AP_W2, bool _AP_S2>                                        \
+    INLINE ap_private& operator Sym##=(int op)                               \
+    {                                                                        \
+        *this = operator Sym (op);                                           \
+        return *this;                                                        \
+    }                                                                        \
+    INLINE ap_private& operator Sym##=(unsigned int op)                      \
+    {                                                                        \
+        *this = operator Sym (op);                                           \
+        return *this;                                                        \
+    }                                                                        \
+    template<int _AP_W2, bool _AP_S2>                                        \
+    INLINE ap_private& operator Sym##=(const ap_private<_AP_W2,_AP_S2>& op) \
+    {                                                                        \
+        *this = operator Sym (op);                                           \
+        return *this;                                                        \
+    }
+    OP_ASSIGN_AP(>>)
+    OP_ASSIGN_AP(<<)
+#undef OP_ASSIGN_AP
+    ///Comparisons
+    //-----------------------------------------------------------------
+    bool operator==(const ap_private& RHS) const {
+        // Get some facts about the number of bits used in the two operands.
+        uint32_t n1 = getActiveBits();
+        uint32_t n2 = RHS.getActiveBits();
+
+        // If the number of bits isn't the same, they aren't equal
+        if (n1 != n2)
+            return false;
+
+        // If the number of bits fits in a word, we only need to compare the low word.
+        if (n1 <= APINT_BITS_PER_WORD)
+            return pVal[0] == RHS.pVal[0];
+
+        // Otherwise, compare everything
+        for (int i = whichWord(n1 - 1); i >= 0; --i)
+            if (pVal[i] != RHS.pVal[i])
+                return false;
+        return true;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator == (const ap_private<_AP_W2, _AP_S2>& op) const {
+        enum { _AP_MAX_W = AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2))};
+        ap_private<_AP_MAX_W, _AP_S|_AP_S2> lhs(*this);
+        ap_private<_AP_MAX_W, _AP_S|_AP_S2> rhs(op);
+        return lhs==rhs;
+    }
+
+    bool operator==(uint64_t Val) const {
+        uint32_t n = getActiveBits();
+        if (n <= APINT_BITS_PER_WORD)
+            return pVal[0] == Val;
+        else
+            return false;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator != (const ap_private<_AP_W2, _AP_S2>& op) const {
+        return !(*this==op);
+    }
+
+    template<bool _AP_S1>
+    INLINE bool operator!=(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return !((*this) == RHS);
+    }
+
+    INLINE bool operator!=(uint64_t Val) const {
+        return !((*this) == Val);
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator <= (const ap_private<_AP_W2,_AP_S2>& op) const {
+        return !(*this>op);
+    }
+
+    INLINE bool operator <(const ap_private& op) const {
+        return _AP_S ? slt(op):ult(op);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator < (const ap_private<_AP_W2, _AP_S2>& op) const {
+        enum { _AP_MAX_W = AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2))};
+        ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+        ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+        if (_AP_S == _AP_S2)
+            return _AP_S?lhs.slt(rhs):lhs.ult(rhs);
+        else
+            if (_AP_S)
+                if (_AP_W2 >= _AP_W)
+                    return lhs.ult(rhs);
+                else
+                    return lhs.slt(rhs);
+            else
+                if (_AP_W >= _AP_W2)
+                    return lhs.ult(rhs);
+                else
+                    return lhs.slt(rhs);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator >=(const ap_private<_AP_W2,_AP_S2>& op) const {
+        return !(*this<op);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator > (const ap_private<_AP_W2, _AP_S2>& op) const {
+        enum { _AP_MAX_W = AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2))};
+        ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+        ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+        if (_AP_S == _AP_S2)
+            return _AP_S?lhs.sgt(rhs):lhs.ugt(rhs);
+        else
+            if (_AP_S)
+                if (_AP_W2 >= _AP_W)
+                    return lhs.ugt(rhs);
+                else
+                    return lhs.sgt(rhs);
+            else
+                if (_AP_W >= _AP_W2)
+                    return lhs.ugt(rhs);
+                else
+                    return lhs.sgt(rhs);
+    }
+
+    ///Bit and Part Select
+    //--------------------------------------------------------------
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (int Hi, int Lo) {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (int Hi, int Lo) const {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(const_cast<ap_private<_AP_W,
+                _AP_S>*>(this), Hi, Lo);
+    }
+
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (int Hi, int Lo) const {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>((const_cast<ap_private<_AP_W,
+                    _AP_S>*> (this)), Hi, Lo);
+    }
+
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (int Hi, int Lo) {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+            const ap_private<_AP_W3, _AP_S3> &LoIdx) {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+            const ap_private<_AP_W3, _AP_S3> &LoIdx) {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+            const ap_private<_AP_W3, _AP_S3> &LoIdx) const {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(const_cast<ap_private*>(this), Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+            const ap_private<_AP_W3, _AP_S3> &LoIdx) const {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        return this->range(Hi, Lo);
+    }
+
+    INLINE ap_bit_ref<_AP_W,_AP_S> operator [] (uint32_t index) {
+        assert(index >= 0&&"Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index );
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref<_AP_W,_AP_S> operator [] (const ap_private<_AP_W2,_AP_S2> &index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index.to_int() );
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator [] (const ap_private<_AP_W2,_AP_S2>& index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        ap_bit_ref<_AP_W,_AP_S> br =operator [] (index);
+        return br.to_bool();
+    }
+
+    INLINE bool operator [](uint32_t bitPosition) const {
+      return (maskBit(bitPosition) & (pVal[whichWord(bitPosition)])) != 0;
+    }
+
+    INLINE ap_bit_ref<_AP_W,_AP_S> bit (int index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index );
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref<_AP_W,_AP_S> bit (const ap_private<_AP_W2,_AP_S2> &index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W &&"Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index.to_int() );
+    }
+
+    INLINE bool bit (int index) const {
+        assert(index >= 0 &&  "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        ap_bit_ref<_AP_W,_AP_S> br(const_cast<ap_private<_AP_W, _AP_S>*>(this), index);
+        return br.to_bool();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool bit (const ap_private<_AP_W2,_AP_S2>& index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        ap_bit_ref<_AP_W,_AP_S> br = bit(index);
+        return br.to_bool();
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W,ap_private<_AP_W, _AP_S>,_AP_W2,ap_private<_AP_W2,_AP_S2> > concat(ap_private<_AP_W2,_AP_S2>& a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_private<_AP_W2,_AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W,ap_private<_AP_W, _AP_S>,_AP_W2,ap_private<_AP_W2,_AP_S2> > concat(const ap_private<_AP_W2,_AP_S2>& a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_private<_AP_W2,_AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                                                                                                  const_cast<ap_private<_AP_W2,_AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (ap_private<_AP_W2, _AP_S2>& a2) {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (ap_private<_AP_W2, _AP_S2>& a2) const {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this), a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (const ap_private<_AP_W2, _AP_S2>& a2) {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(*this, const_cast<ap_private<_AP_W2,_AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (const ap_private<_AP_W2, _AP_S2>& a2) const {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this), const_cast<ap_private<_AP_W2,_AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_range_ref<_AP_W2, _AP_S2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+                             ap_range_ref<_AP_W2, _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                             const_cast<ap_range_ref<_AP_W2, _AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+    operator, (ap_range_ref<_AP_W2, _AP_S2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+                             ap_range_ref<_AP_W2, _AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_bit_ref<_AP_W2, _AP_S2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+                  ap_bit_ref<_AP_W2, _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                  const_cast<ap_bit_ref<_AP_W2, _AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+    operator, (ap_bit_ref<_AP_W2, _AP_S2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+                  ap_bit_ref<_AP_W2, _AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE
+    ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3, ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3,
+                             ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                      const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& >(a2));
+    }
+
+    template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE
+    ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3, ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3,
+                             ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this, a2);
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                const_cast<af_range_ref<_AP_W2,_AP_I2, _AP_S2, _AP_Q2,
+                _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                const_cast<af_bit_ref<_AP_W2,_AP_I2, _AP_S2, _AP_Q2,
+                _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+    }
+
+    INLINE ap_private<_AP_W,false> get() const {
+        ap_private<_AP_W,false> ret(*this);
+        return ret;
+    }
+
+    template<int _AP_W3>
+    INLINE void set(const ap_private<_AP_W3, false> & val) {
+        operator = (ap_private<_AP_W3, _AP_S>(val));
+    }
+
+    /// @}
+    /// @name Value Tests
+    /// @{
+    /// This tests the high bit of this ap_private to determine if it is set.
+    /// @returns true if this ap_private is negative, false otherwise
+    /// @brief Determine sign of this ap_private.
+    INLINE bool isNegative() const {
+        //just for get rid of warnings
+        enum {shift = (_AP_W-APINT_BITS_PER_WORD*(_AP_N-1)-1)};
+        static const uint64_t mask = 1ULL << (shift);
+        return _AP_S && (pVal[_AP_N-1]&mask);
+    }
+
+    /// This tests the high bit of the ap_private to determine if it is unset.
+    /// @brief Determine if this ap_private Value is positive (not negative).
+    INLINE bool isPositive() const {
+        return !isNegative();
+    }
+
+    /// This tests if the value of this ap_private is strictly positive (> 0).
+    /// @returns true if this ap_private is Positive and not zero.
+    /// @brief Determine if this ap_private Value is strictly positive.
+    INLINE bool isStrictlyPositive() const {
+        return isPositive() && (*this) != 0;
+    }
+
+    /// This checks to see if the value has all bits of the ap_private are set or not.
+    /// @brief Determine if all bits are set
+    INLINE bool isAllOnesValue() const {
+        return countPopulation() == _AP_W;
+    }
+
+    /// This checks to see if the value of this ap_private is the maximum unsigned
+    /// value for the ap_private's bit width.
+    /// @brief Determine if this is the largest unsigned value.
+    INLINE bool isMaxValue() const {
+        return countPopulation() == _AP_W;
+    }
+
+    /// This checks to see if the value of this ap_private is the maximum signed
+    /// value for the ap_private's bit width.
+    /// @brief Determine if this is the largest signed value.
+    INLINE bool isMaxSignedValue() const {
+        return BitWidth == 1 ? VAL == 0 :
+            !isNegative() && countPopulation() == _AP_W - 1;
+    }
+
+    /// This checks to see if the value of this ap_private is the minimum unsigned
+    /// value for the ap_private's bit width.
+    /// @brief Determine if this is the smallest unsigned value.
+    INLINE bool isMinValue() const {
+        return countPopulation() == 0;
+    }
+
+    /// This checks to see if the value of this ap_private is the minimum signed
+    /// value for the ap_private's bit width.
+    /// @brief Determine if this is the smallest signed value.
+    INLINE bool isMinSignedValue() const {
+        return BitWidth == 1 ? VAL == 1 :
+            isNegative() && countPopulation() == 1;
+    }
+
+    /// This function returns a pointer to the internal storage of the ap_private.
+    /// This is useful for writing out the ap_private in binary form without any
+    /// conversions.
+    INLINE const uint64_t* getRawData() const {
+        if (isSingleWord())
+            return &VAL;
+        return &pVal[0];
+    }
+
+    ap_private sqrt() const;
+
+    /// @}
+    /// @Assignment Operators
+    /// @{
+    /// @returns *this after assignment of RHS.
+    /// @brief Copy assignment operator.
+    INLINE ap_private& operator=(const ap_private& RHS) {
+        if (this != &RHS)
+            memcpy(pVal, RHS.pVal, _AP_N * APINT_WORD_SIZE);
+        return *this;
+    }
+    INLINE ap_private& operator=(const volatile ap_private& RHS) {
+        if (this != &RHS)
+            for (int i=0; i<_AP_N; ++i)
+                pVal[i] = RHS.pVal[i];
+        return *this;
+    }
+    INLINE volatile ap_private& operator=(const ap_private& RHS) volatile {
+        if (this != &RHS)
+            for (int i=0; i<_AP_N; ++i)
+                pVal[i] = RHS.pVal[i];
+        return *this;
+    }
+    INLINE volatile ap_private& operator=(const volatile ap_private& RHS) volatile {
+        if (this != &RHS)
+            for (int i=0; i<_AP_N; ++i)
+                pVal[i] = RHS.pVal[i];
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    INLINE ap_private& operator=(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        if (_AP_S1)
+            cpSextOrTrunc(RHS);
+        else
+            cpZextOrTrunc(RHS);
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    INLINE ap_private& operator=(const volatile ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        if (_AP_S1)
+            cpSextOrTrunc(RHS);
+        else
+            cpZextOrTrunc(RHS);
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1>
+    INLINE ap_private& operator=(const ap_private<_AP_W1, _AP_S1, 1>& RHS) {
+        static const uint64_t that_sign_ext_mask = (_AP_W1==APINT_BITS_PER_WORD)?0:~0ULL>>(_AP_W1%APINT_BITS_PER_WORD)<<(_AP_W1%APINT_BITS_PER_WORD);
+        if (RHS.isNegative()) {
+            pVal[0] = RHS.VAL | that_sign_ext_mask;
+            memset(pVal+1,~0, APINT_WORD_SIZE*(_AP_N-1));
+        } else {
+            pVal[0] = RHS.VAL;
+            memset(pVal+1, 0, APINT_WORD_SIZE*(_AP_N-1));
+        }
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1>
+    INLINE ap_private& operator=(const volatile ap_private<_AP_W1, _AP_S1, 1>& RHS) {
+        static const uint64_t that_sign_ext_mask = (_AP_W1==APINT_BITS_PER_WORD)?0:~0ULL>>(_AP_W1%APINT_BITS_PER_WORD)<<(_AP_W1%APINT_BITS_PER_WORD);
+        if (RHS.isNegative()) {
+            pVal[0] = RHS.VAL | that_sign_ext_mask;
+            memset(pVal+1,~0, APINT_WORD_SIZE*(_AP_N-1));
+        } else {
+            pVal[0] = RHS.VAL;
+            memset(pVal+1, 0, APINT_WORD_SIZE*(_AP_N-1));
+        }
+        clearUnusedBits();
+        return *this;
+    }
+
+    /// @}
+    /// @name Unary Operators
+    /// @{
+    /// @returns a new ap_private value representing *this incremented by one
+    /// @brief Postfix increment operator.
+    INLINE const ap_private operator++(int) {
+        ap_private API(*this);
+        ++(*this);
+        return API;
+    }
+
+    /// @returns *this incremented by one
+    /// @brief Prefix increment operator.
+    INLINE ap_private& operator++() {
+        add_1(pVal, pVal, _AP_N, 1);
+        clearUnusedBits();
+        return *this;
+    }
+
+    /// @returns a new ap_private representing *this decremented by one.
+    /// @brief Postfix decrement operator.
+    INLINE const ap_private operator--(int) {
+        ap_private API(*this);
+        --(*this);
+        return API;
+    }
+
+    /// @returns *this decremented by one.
+    /// @brief Prefix decrement operator.
+    INLINE ap_private& operator--() {
+        sub_1(pVal, _AP_N, 1);
+        clearUnusedBits();
+        return *this;
+    }
+
+    /// Performs a bitwise complement operation on this ap_private.
+    /// @returns an ap_private that is the bitwise complement of *this
+    /// @brief Unary bitwise complement operator.
+    INLINE ap_private operator~() const {
+        ap_private Result(*this);
+        Result.flip();
+        return Result;
+    }
+
+    /// Negates *this using two's complement logic.
+    /// @returns An ap_private value representing the negation of *this.
+    /// @brief Unary negation operator
+    INLINE typename RType<1,false>::minus operator-() const {
+        return ap_private<1,false>(0) - (*this);
+    }
+
+    /// Performs logical negation operation on this ap_private.
+    /// @returns true if *this is zero, false otherwise.
+    /// @brief Logical negation operator.
+    INLINE bool operator !() const {
+        for (uint32_t i = 0; i < _AP_N; ++i)
+            if (pVal[i])
+                return false;
+        return true;
+    }
+
+    template<bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> And(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return this->operator&(RHS);
+    }
+    template<bool _AP_S1>
+    INLINE ap_private Or(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return this->operator|(RHS);
+    }
+    template<bool _AP_S1>
+    ap_private Xor(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return this->operator^(RHS);
+    }
+
+    ap_private Mul(const ap_private& RHS) const {
+        ap_private Result(*this);
+        Result *= RHS;
+        return Result;
+    }
+
+    ap_private Add(const ap_private& RHS) const {
+        ap_private Result(0);
+        bool carry = add(Result.pVal, this->pVal, RHS.pVal, _AP_N, _AP_N, _AP_N, _AP_S, _AP_S);
+        Result.clearUnusedBits();
+        return Result;
+    }
+
+    ap_private Sub(const ap_private& RHS) const {
+        ap_private Result(0);
+        sub(Result.pVal, this->pVal, RHS.pVal, _AP_N, _AP_N, _AP_N, _AP_S, _AP_S);
+        Result.clearUnusedBits();
+        return Result;
+    }
+
+    /// Arithmetic right-shift this ap_private by shiftAmt.
+    /// @brief Arithmetic right-shift function.
+    ap_private ashr(uint32_t shiftAmt) const {
+        assert(shiftAmt <= BitWidth && "Invalid shift amount, too big");
+        // Handle a degenerate case
+        if (shiftAmt == 0)
+            return *this;
+
+        // Handle single word shifts with built-in ashr
+        if (isSingleWord()) {
+            if (shiftAmt == BitWidth)
+                return ap_private(/*BitWidth, 0*/); // undefined
+            else {
+                uint32_t SignBit = APINT_BITS_PER_WORD - BitWidth;
+                return ap_private(/*BitWidth,*/
+                        (((int64_t(VAL) << (SignBit)) >> (SignBit)) >> (shiftAmt)));
+            }
+        }
+
+        // If all the bits were shifted out, the result is, technically, undefined.
+        // We return -1 if it was negative, 0 otherwise. We check this early to avoid
+        // issues in the algorithm below.
+        if (shiftAmt == BitWidth) {
+            if (isNegative())
+                return ap_private(-1);
+            else
+                return ap_private(0);
+        }
+
+        // Create some space for the result.
+        ap_private Retval(0);
+        uint64_t * val = Retval.pVal;
+
+        // Compute some values needed by the following shift algorithms
+        uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
+        uint32_t offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+        uint32_t breakWord = _AP_N - 1 - offset; // last word affected
+        uint32_t bitsInWord = whichBit(BitWidth); // how many bits in last word?
+        if (bitsInWord == 0)
+            bitsInWord = APINT_BITS_PER_WORD;
+
+        // If we are shifting whole words, just move whole words
+        if (wordShift == 0) {
+            // Move the words containing significant bits
+            for (uint32_t i = 0; i <= breakWord; ++i)
+                val[i] = pVal[i+offset]; // move whole word
+
+            // Adjust the top significant word for sign bit fill, if negative
+            if (isNegative())
+                if (bitsInWord < APINT_BITS_PER_WORD)
+                    val[breakWord] |= ~0ULL << (bitsInWord); // set high bits
+        } else {
+            // Shift the low order words
+            for (uint32_t i = 0; i < breakWord; ++i) {
+                // This combines the shifted corresponding word with the low bits from
+                // the next word (shifted into this word's high bits).
+                val[i] = ((pVal[i+offset]) >> (wordShift));
+                val[i] |=           ((pVal[i+offset+1]) << (APINT_BITS_PER_WORD - wordShift));
+            }
+
+            // Shift the break word. In this case there are no bits from the next word
+            // to include in this word.
+            val[breakWord] = (pVal[breakWord+offset]) >> (wordShift);
+
+            // Deal with sign extenstion in the break word, and possibly the word before
+            // it.
+            if (isNegative()) {
+                if (wordShift > bitsInWord) {
+                    if (breakWord > 0)
+                        val[breakWord-1] |=
+                            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+                    val[breakWord] |= ~0ULL;
+                } else
+                    val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+            }
+        }
+
+        // Remaining words are 0 or -1, just assign them.
+        uint64_t fillValue = (isNegative() ? ~0ULL : 0);
+        for (uint32_t i = breakWord+1; i < _AP_N; ++i)
+            val[i] = fillValue;
+        Retval.clearUnusedBits();
+        return Retval;
+    }
+
+    /// Logical right-shift this ap_private by shiftAmt.
+    /// @brief Logical right-shift function.
+    ap_private lshr(uint32_t shiftAmt) const {
+        if (isSingleWord()) {
+            if (shiftAmt == BitWidth)
+                return ap_private(0);
+            else
+                return ap_private((this->VAL) >> (shiftAmt));
+        }
+
+        // If all the bits were shifted out, the result is 0. This avoids issues
+        // with shifting by the size of the integer type, which produces undefined
+        // results. We define these "undefined results" to always be 0.
+        if (shiftAmt == BitWidth)
+            return ap_private(0);
+
+        // If none of the bits are shifted out, the result is *this. This avoids
+        // issues with shifting byt he size of the integer type, which produces
+        // undefined results in the code below. This is also an optimization.
+        if (shiftAmt == 0)
+            return *this;
+
+        // Create some space for the result.
+        ap_private Retval(0);
+        uint64_t * val = Retval.pVal;
+
+        // If we are shifting less than a word, compute the shift with a simple carry
+        if (shiftAmt < APINT_BITS_PER_WORD) {
+            uint64_t carry = 0;
+            for (int i = _AP_N-1; i >= 0; --i) {
+                val[i] = ((pVal[i]) >> (shiftAmt)) | carry;
+                carry = (pVal[i]) << (APINT_BITS_PER_WORD - shiftAmt);
+            }
+            Retval.clearUnusedBits();
+            return Retval;
+        }
+
+        // Compute some values needed by the remaining shift algorithms
+        uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
+        uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
+
+        // If we are shifting whole words, just move whole words
+        if (wordShift == 0) {
+            for (uint32_t i = 0; i < _AP_N - offset; ++i)
+                val[i] = pVal[i+offset];
+            for (uint32_t i = _AP_N-offset; i < _AP_N; i++)
+                val[i] = 0;
+            Retval.clearUnusedBits();
+            return Retval;
+        }
+
+        // Shift the low order words
+        uint32_t breakWord = _AP_N - offset -1;
+        for (uint32_t i = 0; i < breakWord; ++i)
+            val[i] = ((pVal[i+offset]) >> (wordShift)) |
+                ((pVal[i+offset+1]) << (APINT_BITS_PER_WORD - wordShift));
+        // Shift the break word.
+        val[breakWord] = (pVal[breakWord+offset]) >> (wordShift);
+
+        // Remaining words are 0
+        for (uint32_t i = breakWord+1; i < _AP_N; ++i)
+            val[i] = 0;
+        Retval.clearUnusedBits();
+        return Retval;
+    }
+
+    /// Left-shift this ap_private by shiftAmt.
+    /// @brief Left-shift function.
+    ap_private shl(uint32_t shiftAmt) const {
+        assert(shiftAmt <= BitWidth && "Invalid shift amount, too big");
+        if (isSingleWord()) {
+            if (shiftAmt == BitWidth)
+                return ap_private(0); // avoid undefined shift results
+            return ap_private((VAL) << (shiftAmt));
+        }
+
+        // If all the bits were shifted out, the result is 0. This avoids issues
+        // with shifting by the size of the integer type, which produces undefined
+        // results. We define these "undefined results" to always be 0.
+        if (shiftAmt == BitWidth)
+            return ap_private(0);
+
+        // If none of the bits are shifted out, the result is *this. This avoids a
+        // lshr by the words size in the loop below which can produce incorrect
+        // results. It also avoids the expensive computation below for a common case.
+        if (shiftAmt == 0)
+            return *this;
+
+        // Create some space for the result.
+        ap_private Retval(0);
+        uint64_t* val = Retval.pVal;
+        // If we are shifting less than a word, do it the easy way
+        if (shiftAmt < APINT_BITS_PER_WORD) {
+            uint64_t carry = 0;
+            for (uint32_t i = 0; i < _AP_N; i++) {
+                val[i] = ((pVal[i]) << (shiftAmt)) | carry;
+                carry = (pVal[i]) >> (APINT_BITS_PER_WORD - shiftAmt);
+            }
+            Retval.clearUnusedBits();
+            return Retval;
+        }
+
+        // Compute some values needed by the remaining shift algorithms
+        uint32_t wordShift = shiftAmt % APINT_BITS_PER_WORD;
+        uint32_t offset = shiftAmt / APINT_BITS_PER_WORD;
+
+        // If we are shifting whole words, just move whole words
+        if (wordShift == 0) {
+            for (uint32_t i = 0; i < offset; i++)
+                val[i] = 0;
+            for (uint32_t i = offset; i < _AP_N; i++)
+                val[i] = pVal[i-offset];
+            Retval.clearUnusedBits();
+            return Retval;
+        }
+
+        // Copy whole words from this to Result.
+        uint32_t i = _AP_N - 1;
+        for (; i > offset; --i)
+            val[i] = (pVal[i-offset]) << (wordShift) |
+                (pVal[i-offset-1]) >> (APINT_BITS_PER_WORD - wordShift);
+        val[offset] = (pVal[0]) << (wordShift);
+        for (i = 0; i < offset; ++i)
+            val[i] = 0;
+        Retval.clearUnusedBits();
+        return Retval;
+    }
+
+    INLINE ap_private rotl(uint32_t rotateAmt) const {
+        if (rotateAmt == 0)
+            return *this;
+        // Don't get too fancy, just use existing shift/or facilities
+        ap_private hi(*this);
+        ap_private lo(*this);
+        hi.shl(rotateAmt);
+        lo.lshr(BitWidth - rotateAmt);
+        return hi | lo;
+    }
+
+    INLINE ap_private rotr(uint32_t rotateAmt) const {
+        if (rotateAmt == 0)
+            return *this;
+        // Don't get too fancy, just use existing shift/or facilities
+        ap_private hi(*this);
+        ap_private lo(*this);
+        lo.lshr(rotateAmt);
+        hi.shl(BitWidth - rotateAmt);
+        return hi | lo;
+    }
+
+    /// Perform an unsigned divide operation on this ap_private by RHS. Both this and
+    /// RHS are treated as unsigned quantities for purposes of this division.
+    /// @returns a new ap_private value containing the division result
+    /// @brief Unsigned division operation.
+    ap_private udiv(const ap_private& RHS) const {
+        assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+        // First, deal with the easy case
+        if (isSingleWord()) {
+            assert(RHS.VAL != 0 && "Divide by zero?");
+            return ap_private(VAL / RHS.VAL);
+        }
+
+        // Get some facts about the LHS and RHS number of bits and words
+        uint32_t rhsBits = RHS.getActiveBits();
+        uint32_t rhsWords = !rhsBits ? 0 : (whichWord(rhsBits - 1) + 1);
+        assert(rhsWords && "Divided by zero???");
+        uint32_t lhsBits = this->getActiveBits();
+        uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+        // Deal with some degenerate cases
+        if (!lhsWords)
+            // 0 / X ===> 0
+            return ap_private(0);
+        else if (lhsWords < rhsWords || this->ult(RHS)) {
+            // X / Y ===> 0, iff X < Y
+            return ap_private(0);
+        } else if (*this == RHS) {
+            // X / X ===> 1
+            return ap_private(1);
+        } else if (lhsWords == 1 && rhsWords == 1) {
+            // All high words are zero, just use native divide
+            return ap_private(this->pVal[0] / RHS.pVal[0]);
+        }
+
+        // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+        ap_private Quotient(0); // to hold result.
+        divide(*this, lhsWords, RHS, rhsWords, &Quotient, (ap_private*)0);
+        return Quotient;
+    }
+
+    /// Signed divide this ap_private by ap_private RHS.
+    /// @brief Signed division function for ap_private.
+    INLINE ap_private sdiv(const ap_private& RHS) const {
+        if (isNegative())
+            if (RHS.isNegative())
+                return (-(*this)).udiv(-RHS);
+            else
+                return -((-(*this)).udiv(RHS));
+        else if (RHS.isNegative())
+            return -(this->udiv(-RHS));
+        return this->udiv(RHS);
+    }
+
+    /// Perform an unsigned remainder operation on this ap_private with RHS being the
+    /// divisor. Both this and RHS are treated as unsigned quantities for purposes
+    /// of this operation. Note that this is a true remainder operation and not
+    /// a modulo operation because the sign follows the sign of the dividend
+    /// which is *this.
+    /// @returns a new ap_private value containing the remainder result
+    /// @brief Unsigned remainder operation.
+    ap_private urem(const ap_private& RHS) const {
+        if (isSingleWord()) {
+            assert(RHS.VAL != 0 && "Remainder by zero?");
+            return ap_private(VAL % RHS.VAL);
+        }
+
+        // Get some facts about the LHS
+        uint32_t lhsBits = getActiveBits();
+        uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+        // Get some facts about the RHS
+        uint32_t rhsBits = RHS.getActiveBits();
+        uint32_t rhsWords = !rhsBits ? 0 : (whichWord(rhsBits - 1) + 1);
+        assert(rhsWords && "Performing remainder operation by zero ???");
+
+        // Check the degenerate cases
+        if (lhsWords == 0) {
+            // 0 % Y ===> 0
+            return ap_private(0);
+        } else if (lhsWords < rhsWords || this->ult(RHS)) {
+            // X % Y ===> X, iff X < Y
+            return *this;
+        } else if (*this == RHS) {
+            // X % X == 0;
+            return ap_private(0);
+        } else if (lhsWords == 1) {
+            // All high words are zero, just use native remainder
+            return ap_private(pVal[0] % RHS.pVal[0]);
+        }
+
+        // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+        ap_private Remainder(0);
+        divide(*this, lhsWords, RHS, rhsWords, (ap_private*)(0), &Remainder);
+        return Remainder;
+    }
+
+    ap_private urem(uint64_t RHS) const {
+        // Get some facts about the LHS
+        uint32_t lhsBits = getActiveBits();
+        uint32_t lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+        // Get some facts about the RHS
+        uint32_t rhsBits = 64 - CountLeadingZeros_64(RHS); // RHS.getActiveBits();
+        uint32_t rhsWords = 1;//!rhsBits ? 0 : (ap_private<_AP_W, _AP_S, _AP_N>::whichWord(rhsBits - 1) + 1);
+        assert(rhsWords && "Performing remainder operation by zero ???");
+        // Check the degenerate cases
+        if (lhsWords == 0) {
+            // 0 % Y ===> 0
+            return ap_private(0);
+        } else if (lhsWords < rhsWords || this->ult(RHS)) {
+            // X % Y ===> X, iff X < Y
+            return *this;
+        } else if (*this == RHS) {
+            // X % X == 0;
+            return ap_private(0);
+        } else if (lhsWords == 1) {
+            // All high words are zero, just use native remainder
+            return ap_private(pVal[0] % RHS);
+        }
+
+        // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+        ap_private Remainder(0);
+        divide(*this, lhsWords, RHS, (ap_private*)(0), &Remainder);
+        return Remainder;
+    }
+
+    /// Signed remainder operation on ap_private.
+    /// @brief Function for signed remainder operation.
+    INLINE ap_private srem(const ap_private& RHS) const {
+        if (isNegative()) {
+            ap_private lhs = -(*this);
+            if (RHS.isNegative()) {
+                ap_private rhs = -RHS;
+                return -(lhs.urem(rhs));
+            } else
+                return -(lhs.urem(RHS));
+        } else if (RHS.isNegative()) {
+            ap_private rhs = -RHS;
+            return this->urem(rhs);
+        }
+        return this->urem(RHS);
+    }
+
+    /// Signed remainder operation on ap_private.
+    /// @brief Function for signed remainder operation.
+    INLINE ap_private srem(int64_t RHS) const {
+        if (isNegative())
+            if (RHS<0)
+                return -((-(*this)).urem(-RHS));
+            else
+                return -((-(*this)).urem(RHS));
+        else if (RHS<0)
+            return this->urem(-RHS);
+        return this->urem(RHS);
+    }
+
+    /// Sometimes it is convenient to divide two ap_private values and obtain both
+    /// the quotient and remainder. This function does both operations in the
+    /// same computation making it a little more efficient.
+    /// @brief Dual division/remainder interface.
+    static void udivrem(const ap_private& LHS, const ap_private& RHS, ap_private &Quotient, ap_private& Remainder) {
+        // Get some size facts about the dividend and divisor
+        uint32_t lhsBits  = LHS.getActiveBits();
+        uint32_t lhsWords = !lhsBits ? 0 : (ap_private::whichWord(lhsBits - 1) + 1);
+        uint32_t rhsBits  = RHS.getActiveBits();
+        uint32_t rhsWords = !rhsBits ? 0 : (ap_private::whichWord(rhsBits - 1) + 1);
+
+        // Check the degenerate cases
+        if (lhsWords == 0) {
+            Quotient = 0;                // 0 / Y ===> 0
+            Remainder = 0;               // 0 % Y ===> 0
+            return;
+        }
+
+        if (lhsWords < rhsWords || LHS.ult(RHS)) {
+            Quotient = 0;               // X / Y ===> 0, iff X < Y
+            Remainder = LHS;            // X % Y ===> X, iff X < Y
+            return;
+        }
+
+        if (LHS == RHS) {
+            Quotient  = 1;              // X / X ===> 1
+            Remainder = 0;              // X % X ===> 0;
+            return;
+        }
+
+        if (lhsWords == 1 && rhsWords == 1) {
+            // There is only one word to consider so use the native versions.
+            if (LHS.isSingleWord()) {
+                Quotient = ap_private(LHS.VAL / RHS.VAL);
+                Remainder = ap_private(LHS.VAL % RHS.VAL);
+            } else {
+                Quotient = ap_private(LHS.pVal[0] / RHS.pVal[0]);
+                Remainder = ap_private(LHS.pVal[0] % RHS.pVal[0]);
+            }
+            return;
+        }
+
+        // Okay, lets do it the long way
+        divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+    }
+
+    static void sdivrem(const ap_private &LHS, const ap_private &RHS,
+            ap_private &Quotient, ap_private &Remainder) {
+        if (LHS.isNegative()) {
+            if (RHS.isNegative())
+                ap_private::udivrem(-LHS, -RHS, Quotient, Remainder);
+            else
+                ap_private::udivrem(-LHS, RHS, Quotient, Remainder);
+            Quotient = -Quotient;
+            Remainder = -Remainder;
+        } else if (RHS.isNegative()) {
+            ap_private::udivrem(LHS, -RHS, Quotient, Remainder);
+            Quotient = -Quotient;
+        } else {
+            ap_private::udivrem(LHS, RHS, Quotient, Remainder);
+        }
+    }
+
+    /// Compares this ap_private with RHS for the validity of the equality
+    /// relationship.
+    /// @returns true if *this == Val
+    /// @brief Equality comparison.
+    template<bool _AP_S1>
+    INLINE  bool eq(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return (*this) == RHS;
+    }
+
+    /// Compares this ap_private with RHS for the validity of the inequality
+    /// relationship.
+    /// @returns true if *this != Val
+    /// @brief Inequality comparison
+    template<bool _AP_S1>
+    INLINE bool ne(const ap_private<_AP_W, _AP_S1, _AP_N> &RHS) const {
+        return !((*this) == RHS);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// the validity of the less-than relationship.
+    /// @returns true if *this < RHS when both are considered unsigned.
+    /// @brief Unsigned less than comparison
+    template<bool _AP_S1>
+    INLINE bool ult(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        // Get active bit length of both operands
+        uint32_t n1 = getActiveBits();
+        uint32_t n2 = RHS.getActiveBits();
+
+        // If magnitude of LHS is less than RHS, return true.
+        if (n1 < n2)
+            return true;
+
+        // If magnitude of RHS is greather than LHS, return false.
+        if (n2 < n1)
+            return false;
+
+        // If they bot fit in a word, just compare the low order word
+        if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+            return pVal[0] < RHS.pVal[0];
+
+        // Otherwise, compare all words
+        uint32_t topWord = whichWord(AESL_std::max(n1,n2)-1);
+        for (int i = topWord; i >= 0; --i) {
+            if (pVal[i] > RHS.pVal[i])
+                return false;
+            if (pVal[i] < RHS.pVal[i])
+                return true;
+        }
+        return false;
+    }
+
+    INLINE bool ult(uint64_t RHS) const {
+        // Get active bit length of both operands
+        uint32_t n1 = getActiveBits();
+        uint32_t n2 = 64 - CountLeadingZeros_64(RHS); //RHS.getActiveBits();
+
+        // If magnitude of LHS is less than RHS, return true.
+        if (n1 < n2)
+            return true;
+
+        // If magnitude of RHS is greather than LHS, return false.
+        if (n2 < n1)
+            return false;
+
+        // If they bot fit in a word, just compare the low order word
+        if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+            return pVal[0] < RHS;
+        assert(0);
+    }
+
+    template<bool _AP_S1>
+    INLINE bool slt(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        ap_private lhs(*this);
+        ap_private<_AP_W, _AP_S1, _AP_N> rhs(RHS);
+        bool lhsNeg = isNegative();
+        bool rhsNeg = rhs.isNegative();
+        if (lhsNeg) {
+            // Sign bit is set so perform two's complement to make it positive
+            lhs.flip();
+            lhs++;
+        }
+        if (rhsNeg) {
+            // Sign bit is set so perform two's complement to make it positive
+            rhs.flip();
+            rhs++;
+        }
+
+        // Now we have unsigned values to compare so do the comparison if necessary
+        // based on the negativeness of the values.
+        if (lhsNeg)
+            if (rhsNeg)
+                return lhs.ugt(rhs);
+            else
+                return true;
+        else if (rhsNeg)
+            return false;
+        else
+            return lhs.ult(rhs);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// validity of the less-or-equal relationship.
+    /// @returns true if *this <= RHS when both are considered unsigned.
+    /// @brief Unsigned less or equal comparison
+    template<bool _AP_S1>
+    INLINE bool ule(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return ult(RHS) || eq(RHS);
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// validity of the less-or-equal relationship.
+    /// @returns true if *this <= RHS when both are considered signed.
+    /// @brief Signed less or equal comparison
+    template<bool _AP_S1>
+    INLINE bool sle(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return slt(RHS) || eq(RHS);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// the validity of the greater-than relationship.
+    /// @returns true if *this > RHS when both are considered unsigned.
+    /// @brief Unsigned greather than comparison
+    template<bool _AP_S1>
+    INLINE bool ugt(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return !ult(RHS) && !eq(RHS);
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// the validity of the greater-than relationship.
+    /// @returns true if *this > RHS when both are considered signed.
+    /// @brief Signed greather than comparison
+    template<bool _AP_S1>
+    INLINE bool sgt(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return !slt(RHS) && !eq(RHS);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// validity of the greater-or-equal relationship.
+    /// @returns true if *this >= RHS when both are considered unsigned.
+    /// @brief Unsigned greater or equal comparison
+    template<bool _AP_S1>
+    INLINE bool uge(const ap_private<_AP_W, _AP_S, _AP_N>& RHS) const {
+        return !ult(RHS);
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// validity of the greater-or-equal relationship.
+    /// @returns true if *this >= RHS when both are considered signed.
+    /// @brief Signed greather or equal comparison
+    template<bool _AP_S1>
+    INLINE bool sge(const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) const {
+        return !slt(RHS);
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    void cpTrunc(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that) {
+        assert(_AP_W1 > BitWidth && "Invalid ap_private Truncate request");
+        assert(_AP_W1 >= MIN_INT_BITS && "Can't truncate to 0 bits");
+        memcpy(pVal, that.pVal, _AP_N*APINT_WORD_SIZE);
+    }
+
+    // Sign extend to a new width.
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    void cpSext(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that) {
+        assert(_AP_W1 < BitWidth && "Invalid ap_private SignExtend request");
+        assert(_AP_W1 <= MAX_INT_BITS && "Too many bits");
+        // If the sign bit isn't set, this is the same as zext.
+        if (!that.isNegative()) {
+            cpZext(that);
+            return;
+        }
+
+        // The sign bit is set. First, get some facts
+        enum { wordBits = _AP_W1 % APINT_BITS_PER_WORD};
+
+        // Mask the high order word appropriately
+        if (_AP_N1 == _AP_N) {
+            enum { newWordBits = _AP_W % APINT_BITS_PER_WORD};
+            // The extension is contained to the wordsBefore-1th word.
+            static const uint64_t mask = wordBits?(~0ULL<<(wordBits)):0ULL;
+            if (_AP_N1 == 1) {
+                assert(0);
+            } else {
+                for (uint32_t i = 0; i < _AP_N1; ++i)
+                    pVal[i] = that.pVal[i];
+                pVal[_AP_N1-1] |=  mask;
+                return;
+            }
+        }
+
+        if (_AP_N1 == 1) {
+            assert(0);//    newVal[0] = VAL | mask;
+        } else {
+            enum { newWordBits = _AP_W % APINT_BITS_PER_WORD};
+            // The extension is contained to the wordsBefore-1th word.
+            static const uint64_t mask = wordBits?(~0ULL<<(wordBits)):0ULL;
+            for (uint32_t i = 0; i < _AP_N1; ++i)
+                pVal[i] = that.pVal[i];
+            pVal[_AP_N1-1] |=  mask;
+        }
+        for (uint32_t i=_AP_N1; i < _AP_N-1; i++)
+            pVal[i] = ~0ULL;
+        pVal[_AP_N-1] = ~0ULL;
+        clearUnusedBits();
+        return;
+    }
+
+    //  Zero extend to a new width.
+    template <int _AP_W1, bool _AP_S1, int _AP_N1>
+    void cpZext(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that) {
+        assert(_AP_W1 < BitWidth && "Invalid ap_private ZeroExtend request");
+        assert(_AP_W1 <= MAX_INT_BITS && "Too many bits");
+        uint32_t wordsAfter = _AP_N;
+        if (wordsAfter==1) {
+            assert(0); // return ap_private<_AP_W1, _AP_S, _AP_N1> (_AP_W1, VAL, _AP_S);
+        } else {
+            if (_AP_N1 == 1) {
+                assert(0);
+                // newVal[0] = VAL;
+            } else {
+                uint32_t i = 0;
+                for (; i < _AP_N1; ++i)
+                    pVal[i] = that.pVal[i];
+                for (; i < _AP_N; ++i)
+                    pVal[i] = 0;
+            }
+        }
+        clearUnusedBits();
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    void cpZextOrTrunc(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that) {
+        if (BitWidth > _AP_W1)
+            cpZext(that);
+        else if (BitWidth < _AP_W1)
+            cpTrunc(that);
+        else {
+            for (int i=0; i<_AP_N1; ++i)
+                pVal[i]=that.pVal[i];
+            clearUnusedBits();
+        }
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    void cpSextOrTrunc(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that) {
+        if (BitWidth > _AP_W1)
+            cpSext(that);
+        else if (BitWidth < _AP_W1)
+            cpTrunc(that);
+        else {
+            for (int i=0; i<_AP_N1; ++i)
+                pVal[i] = that.pVal[i];
+            clearUnusedBits();
+        }
+    }
+
+    /// @name Value Characterization Functions
+    /// @{
+
+    /// @returns the total number of bits.
+    INLINE uint32_t getBitWidth() const {
+        return BitWidth;
+    }
+
+    /// Here one word's bitwidth equals to that of uint64_t.
+    /// @returns the number of words to hold the integer value of this ap_private.
+    /// @brief Get the number of words.
+    INLINE uint32_t getNumWords() const {
+        return (BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
+    }
+
+    /// This function returns the number of active bits which is defined as the
+    /// bit width minus the number of leading zeros. This is used in several
+    /// computations to see how "wide" the value is.
+    /// @brief Compute the number of active bits in the value
+    INLINE uint32_t getActiveBits() const {
+        uint32_t bits=BitWidth - countLeadingZeros();
+        return bits?bits:1;
+    }
+
+
+    /// This method attempts to return the value of this ap_private as a zero extended
+    /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
+    /// uint64_t. Otherwise an assertion will result.
+    /// @brief Get zero extended value
+    INLINE uint64_t getZExtValue() const {
+        assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
+        return *pVal;
+    }
+
+    /// This method attempts to return the value of this ap_private as a sign extended
+    /// int64_t. The bit width must be <= 64 or the value must fit within an
+    /// int64_t. Otherwise an assertion will result.
+    /// @brief Get sign extended value
+    INLINE int64_t getSExtValue() const {
+        assert(getActiveBits() <= 64 && "Too many bits for int64_t");
+        return int64_t(pVal[0]);
+    }
+
+    /// This method determines how many bits are required to hold the ap_private
+    /// equivalent of the string given by \p str of length \p slen.
+    /// @brief Get bits required for string value.
+    static uint32_t getBitsNeeded(const char* str, uint32_t slen, uint8_t radix);
+
+    /// countLeadingZeros - This function is an ap_private version of the
+    /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number
+    /// of zeros from the most significant bit to the first one bit.
+    /// @returns BitWidth if the value is zero.
+    /// @returns the number of zeros from the most significant bit to the first
+    /// one bits.
+    INLINE uint32_t countLeadingZeros() const ;
+
+    /// countLeadingOnes - This function counts the number of contiguous 1 bits
+    /// in the high order bits. The count stops when the first 0 bit is reached.
+    /// @returns 0 if the high order bit is not set
+    /// @returns the number of 1 bits from the most significant to the least
+    /// @brief Count the number of leading one bits.
+    INLINE uint32_t countLeadingOnes() const ;
+
+    /// countTrailingZeros - This function is an ap_private version of the
+    /// countTrailingZoers_{32,64} functions in MathExtras.h. It counts
+    /// the number of zeros from the least significant bit to the first set bit.
+    /// @returns BitWidth if the value is zero.
+    /// @returns the number of zeros from the least significant bit to the first
+    /// one bit.
+    /// @brief Count the number of trailing zero bits.
+    INLINE uint32_t countTrailingZeros() const ;
+
+    /// countPopulation - This function is an ap_private version of the
+    /// countPopulation_{32,64} functions in MathExtras.h. It counts the number
+    /// of 1 bits in the ap_private value.
+    /// @returns 0 if the value is zero.
+    /// @returns the number of set bits.
+    /// @brief Count the number of bits set.
+    INLINE uint32_t countPopulation() const {
+        uint32_t Count = 0;
+        for (uint32_t i = 0; i<_AP_N-1 ; ++i)
+            Count += CountPopulation_64(pVal[i]);
+        Count += CountPopulation_64(pVal[_AP_N-1]&mask);
+        return Count;
+    }
+
+    /// @}
+    /// @name Conversion Functions
+    /// @{
+
+    /// This is used internally to convert an ap_private to a string.
+    /// @brief Converts an ap_private to a std::string
+    INLINE std::string toString(uint8_t radix, bool wantSigned) const
+        ;
+
+    /// Considers the ap_private to be unsigned and converts it into a string in the
+    /// radix given. The radix can be 2, 8, 10 or 16.
+    /// @returns a character interpretation of the ap_private
+    /// @brief Convert unsigned ap_private to string representation.
+    INLINE std::string toStringUnsigned(uint8_t radix = 10) const {
+        return toString(radix, false);
+    }
+
+    /// Considers the ap_private to be unsigned and converts it into a string in the
+    /// radix given. The radix can be 2, 8, 10 or 16.
+    /// @returns a character interpretation of the ap_private
+    /// @brief Convert unsigned ap_private to string representation.
+    INLINE std::string toStringSigned(uint8_t radix = 10) const {
+        return toString(radix, true);
+    }
+
+    /// @returns a byte-swapped representation of this ap_private Value.
+    INLINE ap_private byteSwap() const ;
+
+    /// @brief Converts this ap_private to a double value.
+    INLINE  double roundToDouble(bool isSigned) const ;
+
+    /// @brief Converts this unsigned ap_private to a double value.
+    INLINE  double roundToDouble() const {
+        return roundToDouble(false);
+    }
+
+    /// @brief Converts this signed ap_private to a double value.
+    INLINE  double signedRoundToDouble() const {
+        return roundToDouble(true);
+    }
+
+    /// The conversion does not do a translation from integer to double, it just
+    /// re-interprets the bits as a double. Note that it is valid to do this on
+    /// any bit width. Exactly 64 bits will be translated.
+    /// @brief Converts ap_private bits to a double
+    INLINE  double bitsToDouble() const {
+        union {
+            uint64_t __I;
+            double __D;
+        } __T;
+        __T.__I = pVal[0];
+        return __T.__D;
+    }
+
+    /// The conversion does not do a translation from integer to float, it just
+    /// re-interprets the bits as a float. Note that it is valid to do this on
+    /// any bit width. Exactly 32 bits will be translated.
+    /// @brief Converts ap_private bits to a double
+    INLINE float bitsToFloat() const {
+        union {
+            uint32_t __I;
+            float __F;
+        } __T;
+        __T.__I = uint32_t(pVal[0]);
+        return __T.__F;
+    }
+
+    /// The conversion does not do a translation from double to integer, it just
+    /// re-interprets the bits of the double. Note that it is valid to do this on
+    /// any bit width but bits from V may get truncated.
+    /// @brief Converts a double to ap_private bits.
+    INLINE ap_private& doubleToBits(double __V) {
+        union {
+            uint64_t __I;
+            double __D;
+        } __T;
+        __T.__D = __V;
+        pVal[0] = __T.__I;
+        return *this;
+    }
+
+    /// The conversion does not do a translation from float to integer, it just
+    /// re-interprets the bits of the float. Note that it is valid to do this on
+    /// any bit width but bits from V may get truncated.
+    /// @brief Converts a float to ap_private bits.
+    INLINE ap_private& floatToBits(float __V) {
+        union {
+            uint32_t __I;
+            float __F;
+        } __T;
+        __T.__F = __V;
+        pVal[0] = __T.__I;
+    }
+
+    //Reduce operation
+    //-----------------------------------------------------------
+    INLINE bool and_reduce() const {
+        return isMaxValue();
+    }
+
+    INLINE bool nand_reduce() const {
+        return isMinValue();
+    }
+
+    INLINE bool or_reduce() const {
+        return (bool)countPopulation();
+    }
+
+    INLINE bool nor_reduce() const {
+        return countPopulation()==0;
+    }
+
+    INLINE bool xor_reduce() const {
+        unsigned int i=countPopulation();
+        return (i%2)?true:false;
+    }
+
+    INLINE bool xnor_reduce() const {
+        unsigned int i=countPopulation();
+        return (i%2)?false:true;
+    }
+    INLINE std::string to_string(uint8_t radix=16, bool sign=false) const {
+        return toString(radix, radix==10?_AP_S:sign);
+    }
+};
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+INLINE bool operator==(uint64_t V1, const ap_private<_AP_W, _AP_S, _AP_N>& V2) {
+    return V2 == V1;
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+INLINE bool operator!=(uint64_t V1, const ap_private<_AP_W, _AP_S, _AP_N>& V2) {
+    return V2 != V1;
+}
+
+namespace ap_private_ops {
+    enum {APINT_BITS_PER_WORD=64};
+    /// @brief Determine the smaller of two ap_privates considered to be signed.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> smin(const ap_private<_AP_W, _AP_S, _AP_N> &LHS, const ap_private<_AP_W, _AP_S1, _AP_N> &RHS) {
+        return LHS.slt(RHS) ? LHS : RHS;
+    }
+
+    /// @brief Determine the larger of two ap_privates considered to be signed.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> smax(const ap_private<_AP_W, _AP_S, _AP_N> &LHS, const ap_private<_AP_W, _AP_S1, _AP_N> &RHS) {
+        return LHS.sgt(RHS) ? LHS : RHS;
+    }
+
+    /// @brief Determine the smaller of two ap_privates considered to be signed.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> umin(const ap_private<_AP_W, _AP_S, _AP_N> &LHS, const ap_private<_AP_W, _AP_S1, _AP_N> &RHS) {
+        return LHS.ult(RHS) ? LHS : RHS;
+    }
+
+    /// @brief Determine the larger of two ap_privates considered to be unsigned.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> umax(const ap_private<_AP_W, _AP_S, _AP_N> &LHS, const ap_private<_AP_W, _AP_S1, _AP_N> &RHS) {
+        return LHS.ugt(RHS) ? LHS : RHS;
+    }
+
+    /// @brief Check if the specified ap_private has a N-bits integer value.
+    template<int _AP_W, bool _AP_S, int _AP_N>
+    INLINE bool isIntN(uint32_t __N, const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return APIVal.isIntN(__N);
+    }
+
+    /// @returns true if the argument ap_private value is a sequence of ones
+    /// starting at the least significant bit with the remainder zero.
+    template<int _AP_W, bool _AP_S, int _AP_N>
+    INLINE bool isMask(uint32_t numBits, const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return APIVal.getBoolValue() && ((APIVal + ap_private<_AP_W, _AP_S, _AP_N>(numBits,1)) & APIVal) == 0;
+    }
+
+    /// @returns true if the argument ap_private value contains a sequence of ones
+    /// with the remainder zero.
+    template<int _AP_W, bool _AP_S, int _AP_N>
+    INLINE bool isShiftedMask(uint32_t numBits, const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return isMask(numBits, (APIVal - ap_private<_AP_W, _AP_S, _AP_N>(numBits,1)) | APIVal);
+    }
+
+    /// @returns a byte-swapped representation of the specified ap_private Value.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> byteSwap(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return APIVal.byteSwap();
+    }
+
+    /// @returns the floor log base 2 of the specified ap_private value.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE uint32_t logBase2(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return APIVal.logBase2();
+    }
+
+    /// GreatestCommonDivisor - This function returns the greatest common
+    /// divisor of the two ap_private values using Enclid's algorithm.
+    /// @returns the greatest common divisor of Val1 and Val2
+    /// @brief Compute GCD of two ap_private values.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> GreatestCommonDivisor(const ap_private<_AP_W, _AP_S, _AP_N>& Val1, const ap_private<_AP_W, _AP_S, _AP_N>& Val2)
+        ;
+
+    /// Treats the ap_private as an unsigned value for conversion purposes.
+    /// @brief Converts the given ap_private to a double value.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE double Roundap_privateToDouble(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return APIVal.roundToDouble();
+    }
+
+    /// Treats the ap_private as a signed value for conversion purposes.
+    /// @brief Converts the given ap_private to a double value.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE double RoundSignedap_privateToDouble(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return APIVal.signedRoundToDouble();
+    }
+
+    /// @brief Converts the given ap_private to a float vlalue.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE float Roundap_privateToFloat(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return float(Roundap_privateToDouble(APIVal));
+    }
+
+    /// Treast the ap_private as a signed value for conversion purposes.
+    /// @brief Converts the given ap_private to a float value.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE float RoundSignedap_privateToFloat(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return float(APIVal.signedRoundToDouble());
+    }
+
+    /// RoundDoubleToap_private - This function convert a double value to an ap_private value.
+    /// @brief Converts the given double value into a ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> RoundDoubleToap_private(double Double, uint32_t width) ;
+
+    /// RoundFloatToap_private - Converts a float value into an ap_private value.
+    /// @brief Converts a float value into a ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> RoundFloatToap_private(float Float, uint32_t width) {
+        return RoundDoubleToap_private<_AP_W, _AP_S, _AP_N>(double(Float), width);
+    }
+
+    /// Arithmetic right-shift the ap_private by shiftAmt.
+    /// @brief Arithmetic right-shift function.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> ashr(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t shiftAmt) {
+        return LHS.ashr(shiftAmt);
+    }
+
+    /// Logical right-shift the ap_private by shiftAmt.
+    /// @brief Logical right-shift function.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> lshr(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t shiftAmt) {
+        return LHS.lshr(shiftAmt);
+    }
+
+    /// Left-shift the ap_private by shiftAmt.
+    /// @brief Left-shift function.
+    template<int _AP_W, bool _AP_S, int _AP_N> INLINE ap_private<_AP_W, _AP_S, _AP_N> shl(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t shiftAmt) {
+        return LHS.shl(shiftAmt);
+    }
+
+    /// Signed divide ap_private LHS by ap_private RHS.
+    /// @brief Signed division function for ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> sdiv(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS.sdiv(RHS);
+    }
+
+    /// Unsigned divide ap_private LHS by ap_private RHS.
+    /// @brief Unsigned division function for ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> udiv(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS.udiv(RHS);
+    }
+
+    /// Signed remainder operation on ap_private.
+    /// @brief Function for signed remainder operation.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> srem(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS.srem(RHS);
+    }
+
+    /// Unsigned remainder operation on ap_private.
+    /// @brief Function for unsigned remainder operation.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> urem(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS.urem(RHS);
+    }
+
+    /// Performs multiplication on ap_private values.
+    /// @brief Function for multiplication operation.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> mul(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS * RHS;
+    }
+
+    /// Performs addition on ap_private values.
+    /// @brief Function for addition operation.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> add(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS + RHS;
+    }
+
+    /// Performs subtraction on ap_private values.
+    /// @brief Function for subtraction operation.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> sub(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS - RHS;
+    }
+
+    /// Performs bitwise AND operation on ap_private LHS and
+    /// ap_private RHS.
+    /// @brief Bitwise AND function for ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> And(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS & RHS;
+    }
+
+    /// Performs bitwise OR operation on ap_private LHS and ap_private RHS.
+    /// @brief Bitwise OR function for ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> Or(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS | RHS;
+    }
+
+    /// Performs bitwise XOR operation on ap_private.
+    /// @brief Bitwise XOR function for ap_private.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1, _AP_N> Xor(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, const ap_private<_AP_W, _AP_S1, _AP_N>& RHS) {
+        return LHS ^ RHS;
+    }
+
+    /// Performs a bitwise complement operation on ap_private.
+    /// @brief Bitwise complement function.
+    template<int _AP_W, bool _AP_S, int _AP_N, bool _AP_S1> INLINE ap_private<_AP_W, _AP_S, _AP_N> Not(const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+        return ~APIVal;
+    }
+
+    template <int wordBits>  void clearUnusedBits(uint64_t& msw) {
+        // Compute how many bits are used in the final word
+        //    uint32_t wordBits = APIVal.getBitWidth() & 0x3f;
+        if (wordBits == 0)
+            // If all bits are used, we want to leave the value alone. This also
+            // avoids the undefined behavior of >> when the shfit is the same size as
+            // the word size (64).
+            return;
+
+        // Mask out the hight bits.
+        uint64_t mask = ~uint64_t(0ULL) >> (64 /*ap_private::APINT_BITS_PER_WORD */- wordBits);
+        msw &= mask;
+    }
+    template <> INLINE  void clearUnusedBits<1>(uint64_t& msw) {
+        uint64_t mask = ~uint64_t(0ULL) >> (64 /*ap_private::APINT_BITS_PER_WORD */- 1);
+        msw &= mask;
+    }
+    template <int wordBits>  void clearUnusedBits(int64_t& msw) {
+        // Compute how many bits are used in the final word
+        // uint32_t wordBits = APIVal.getBitWidth() & 0x3f;
+        if (wordBits == 0)
+            // If all bits are used, we want to leave the value alone. This also
+            // avoids the undefined behavior of >> when the shfit is the same size as
+            // the word size (64).
+            return;
+
+        // Mask out the hight bits.
+        uint64_t mask = ~uint64_t(0ULL) >> (64 /*ap_private::APINT_BITS_PER_WORD */- wordBits);
+        msw &= mask;
+    }
+    template <> INLINE  void clearUnusedBits<1>(int64_t& msw) {
+        uint64_t mask = ~uint64_t(0ULL) >> (64 /*ap_private::APINT_BITS_PER_WORD */- 1);
+        msw &= mask;
+    }
+    //  template<int _AP_W, bool _AP_S>
+    template<int _AP_W, bool _AP_S, int shiftAmt>
+    INLINE ap_private<_AP_W, _AP_S> ashr(const ap_private<_AP_W, _AP_S>& a) {
+        return ashr(a, shiftAmt);
+    }
+
+    template<int _AP_W, bool _AP_S, int shiftAmt>
+    INLINE ap_private<_AP_W, _AP_S> lshr(const ap_private<_AP_W, _AP_S>& a) {
+        return lshr(a, shiftAmt);
+    }
+
+    template<int _AP_W, int shiftAmt>
+    INLINE ap_private<_AP_W, true> ashr(const ap_private<_AP_W, true, 1>& a) {
+        enum {APINT_BITS_PER_WORD=64, excess_bits=APINT_BITS_PER_WORD-_AP_W};
+        static const uint64_t sign_bit = (1ULL<<(_AP_W-1));
+        static const uint64_t sign_ext_mask = (_AP_W-shiftAmt>0)?~0ULL<<(APINT_BITS_PER_WORD-_AP_W+shiftAmt):~0ULL;
+        return ap_private<_AP_W, true>((((int64_t)a.VAL) >> (shiftAmt)) | (a.VAL & sign_bit? sign_ext_mask : 0ULL));
+    }
+
+    template<int _AP_W, int shiftAmt>
+    INLINE ap_private<_AP_W, false> ashr(const ap_private<_AP_W, false, 1>& a) {
+        return ap_private<_AP_W, false>((a.VAL) >> (shiftAmt));
+    }
+
+    template<int _AP_W, bool _AP_S, int shiftAmt>
+    INLINE ap_private<_AP_W, _AP_S> lshr(const ap_private<_AP_W, _AP_S, 1>& a) {
+        static const uint64_t mask = ~0ULL<<_AP_W;
+        return ap_private<_AP_W, _AP_S>((a.VAL&mask) >> (shiftAmt));
+    }
+
+    template<int _AP_W, bool _AP_S, int shiftAmt>
+    INLINE ap_private<_AP_W-shiftAmt, _AP_S> shr(const ap_private<_AP_W, _AP_S>& a) {
+        return ap_private<_AP_W-shiftAmt, _AP_S>((a.VAL) >> (shiftAmt));
+    }
+
+    template<int _AP_W, bool _AP_S, int shiftAmt>
+    INLINE ap_private<_AP_W+shiftAmt, _AP_S> shl(const ap_private<_AP_W, _AP_S>& a) {
+        return ap_private<_AP_W+shiftAmt, _AP_S>((a.VAL) << (shiftAmt));
+    }
+
+    template<int _AP_W, bool _AP_S, int index>
+    INLINE bool get(const ap_private<_AP_W, _AP_S, 1>& a) {
+        unsigned shift = (index%APINT_BITS_PER_WORD);
+        static const uint64_t mask=1ULL << (shift);
+        return ((mask & a.VAL) != 0);
+    }
+
+    template<int _AP_W, bool _AP_S, int index>
+    INLINE bool get(const ap_private<_AP_W, _AP_S>& a) {
+        static const uint64_t mask=1ULL << (index&0x3f);
+        return ((mask & a.pVal[(index)>>6]) != 0);
+    }
+
+    template<int _AP_W, bool _AP_S, int msb, int lsb>
+    INLINE void set(ap_private<_AP_W, _AP_S, 1>& a) {
+        const uint64_t mask = ~0ULL >> (lsb) << (APINT_BITS_PER_WORD-msb+lsb-1)>>(APINT_BITS_PER_WORD-msb-1);
+        a.VAL |= mask;
+    }
+
+    template<int _AP_W, bool _AP_S, int msb, int lsb>
+    INLINE void clear(ap_private<_AP_W, _AP_S, 1>& a) {
+        static const uint64_t mask = ~(~0ULL >> (lsb) <<(APINT_BITS_PER_WORD-msb+lsb-1) >>  (APINT_BITS_PER_WORD-msb-1));
+        a.VAL &= mask;
+    }
+
+    template<int _AP_W, bool _AP_S, int msb_index, int lsb_index>
+    INLINE void set(ap_private<_AP_W, _AP_S>& a) {
+        enum { APINT_BITS_PER_WORD=64,
+            lsb_word = lsb_index /APINT_BITS_PER_WORD,
+            msb_word = msb_index / APINT_BITS_PER_WORD,
+            msb = msb_index % APINT_BITS_PER_WORD,
+            lsb=lsb_index % APINT_BITS_PER_WORD};
+        if (msb_word==lsb_word) {
+            const uint64_t mask = ~0ULL >> (lsb) << (APINT_BITS_PER_WORD-msb+lsb-1)>>(APINT_BITS_PER_WORD-msb-1);
+            a.pVal[msb_word] |= mask;
+        } else {
+            const uint64_t lsb_mask = ~0ULL >> (lsb) << (lsb);
+            const uint64_t msb_mask = ~0ULL << (APINT_BITS_PER_WORD-msb-1)>>(APINT_BITS_PER_WORD-msb-1);
+            a.pVal[lsb_word] |=lsb_mask;
+            for (int i=lsb_word+1; i<msb_word; i++) {
+                a.pVal[i]=0;
+            }
+            a.pVal[msb_word] |= msb_mask;
+        }
+        a.clearUnusedBits();
+    }
+
+    template<int _AP_W, bool _AP_S, int msb_index, int lsb_index>
+    INLINE void clear(ap_private<_AP_W, _AP_S>& a) {
+        enum { APINT_BITS_PER_WORD=64,
+            lsb_word = lsb_index /APINT_BITS_PER_WORD,
+            msb_word = msb_index / APINT_BITS_PER_WORD,
+            msb = msb_index % APINT_BITS_PER_WORD,
+            lsb=lsb_index % APINT_BITS_PER_WORD};
+        if (msb_word == lsb_word) {
+            const uint64_t mask = ~(~0ULL >> (lsb) << (APINT_BITS_PER_WORD-msb+lsb-1)>>(APINT_BITS_PER_WORD-msb-1));
+            a.pVal[msb_word] &= mask;
+        } else {
+            const uint64_t lsb_mask = ~(~0ULL >> (lsb) << (lsb));
+            const uint64_t msb_mask = ~(~0ULL << (APINT_BITS_PER_WORD-msb-1)>>(APINT_BITS_PER_WORD-msb-1));
+            a.pVal[lsb_word] &=lsb_mask;
+            for (int i=lsb_word+1; i<msb_word; i++) {
+                a.pVal[i]=0;
+            }
+            a.pVal[msb_word] &= msb_mask;
+        }
+        a.clearUnusedBits();
+    }
+
+    template<int _AP_W, bool _AP_S, int index>
+    INLINE void set(ap_private<_AP_W, _AP_S, 1>& a) {
+        static const uint64_t mask=1ULL << (index);
+        a.VAL |= mask;
+        a.clearUnusedBits();
+    }
+
+    template<int _AP_W, bool _AP_S, int index>
+    INLINE void clear(ap_private<_AP_W, _AP_S, 1>& a) {
+        static const uint64_t mask=~(1ULL << (index));
+        a.VAL &= mask;
+        a.clearUnusedBits();
+    }
+
+    template<int _AP_W, bool _AP_S, int index>
+    INLINE void set(ap_private<_AP_W, _AP_S>& a) {
+        enum { APINT_BITS_PER_WORD=64, word = index/APINT_BITS_PER_WORD};
+        static const uint64_t mask=1ULL << (index%APINT_BITS_PER_WORD);
+        a.pVal[word] |= mask;
+        a.clearUnusedBits();
+    }
+
+    template<int _AP_W, bool _AP_S, int index>
+    INLINE void clear(ap_private<_AP_W, _AP_S>& a) {
+        enum { APINT_BITS_PER_WORD=64, word = index/APINT_BITS_PER_WORD};
+        static const uint64_t mask=~(1ULL << (index%APINT_BITS_PER_WORD));
+        a.pVal[word] &= mask;
+        a.clearUnusedBits();
+    }
+
+    template<int _AP_W>
+    INLINE bool isNegative(const ap_private<_AP_W, false>& a)  {
+        return false;
+    }
+
+    template<int _AP_W>
+    INLINE bool isNegative(const ap_private<_AP_W, true, 1>& a) {
+        static const uint64_t sign_mask = (1ULL << (_AP_W-1));
+        return ((sign_mask & a.VAL) != 0);
+    }
+
+    template<int _AP_W>
+    INLINE bool isNegative(const ap_private<_AP_W, true>& a) {
+        enum {APINT_BITS_PER_WORD=64,_AP_N=(_AP_W+APINT_BITS_PER_WORD-1)/APINT_BITS_PER_WORD};
+        static const uint64_t sign_mask = (1ULL << (_AP_W%APINT_BITS_PER_WORD-1));
+        return sign_mask & a.pVal[_AP_N-1];
+    }
+} // End of ap_private_ops namespace
+
+/// @brief Check if the specified ap_private has a N-bits integer value.
+template<int _AP_W, bool _AP_S, int _AP_N>
+INLINE bool isIntN(uint32_t __N, const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+    return APIVal.isIntN(__N);
+}
+
+/// @returns true if the argument ap_private value is a sequence of ones
+/// starting at the least significant bit with the remainder zero.
+template<int _AP_W, bool _AP_S, int _AP_N>
+INLINE bool isMask(uint32_t numBits, const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+    return APIVal.getBoolValue() && ((APIVal + ap_private<_AP_W, _AP_S, _AP_N>(numBits,1)) & APIVal) == 0;
+}
+
+/// @returns true if the argument ap_private value contains a sequence of ones
+/// with the remainder zero.
+template<int _AP_W, bool _AP_S, int _AP_N>
+INLINE bool isShiftedMask(uint32_t numBits, const ap_private<_AP_W, _AP_S, _AP_N>& APIVal) {
+    return isMask(numBits, (APIVal - ap_private<_AP_W, _AP_S, _AP_N>(numBits,1)) | APIVal);
+}
+
+#if 0
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static bool add_1(uint64_t dest[], uint64_t x[], uint32_t len, uint64_t y) {
+    for (uint32_t i = 0; i < len; ++i) {
+        dest[i] = y + x[i];
+        if (dest[i] < y)
+            y = 1; // Carry one to next digit.
+        else {
+            y = 0; // No need to carry so exit early
+            break;
+        }
+    }
+    return (y != 0);
+}
+#endif
+
+#if 0
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+                uint32_t destlen, uint32_t xlen, uint32_t ylen, bool xsigned, bool ysigned) {
+    bool carry = false;
+    uint32_t len = AESL_std::min(xlen, ylen);
+    uint32_t i;
+    for (i = 0; i< len && i < destlen; ++i) {
+        uint64_t limit = AESL_std::min(x[i],y[i]); // must come first in case dest == x
+        dest[i] = x[i] + y[i] + carry;
+        carry = dest[i] < limit || (carry && dest[i] == limit);
+    }
+    if (xlen > ylen) {
+        const uint64_t yext = xsigned && int64_t(y[ylen-1])<0 ? -1 : 0;
+        for (i=ylen; i< xlen && i < destlen; i++) {
+            uint64_t limit = AESL_std::min(x[i], yext);
+            dest[i] = x[i] + yext + carry;
+            carry = (dest[i] < limit)||(carry && dest[i] == x[i]);
+        }
+    } else if (ylen> xlen) {
+        const uint64_t xext = ysigned && int64_t(x[xlen-1])<0 ? -1 : 0;
+        for (i=xlen; i< ylen && i < destlen; i++) {
+            uint64_t limit = AESL_std::min(xext, y[i]);
+            dest[i] = xext + y[i] + carry;
+            carry = (dest[i] < limit)||(carry && dest[i] == y[i]);
+        }
+    }
+    return carry;
+}
+#endif
+
+#if 0
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+                uint32_t destlen, uint32_t xlen, uint32_t ylen, bool xsigned, bool ysigned) {
+    bool borrow = false;
+    uint32_t i;
+    uint32_t len = AESL_std::min(xlen, ylen);
+    for (i = 0; i < len && i < destlen; ++i) {
+        uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+        borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+        dest[i] = x_tmp - y[i];
+    }
+    if (xlen > ylen) {
+        const uint64_t yext = ysigned && int64_t(y[ylen-1])<0 ? -1 : 0;
+        for (i=ylen; i< xlen && i < destlen; i++) {
+            uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+            borrow = yext > x_tmp || (borrow && x[i] == 0);
+            dest[i] = x_tmp - yext;
+        }
+    } else if (ylen> xlen) {
+        const uint64_t xext = xsigned && int64_t(x[xlen-1])<0 ? -1 : 0;
+        for (i=xlen; i< ylen && i < destlen; i++) {
+            uint64_t x_tmp = borrow ? xext - 1 : xext;
+            borrow = y[i] > x_tmp || (borrow && xext==0);
+            dest[i] = x_tmp - y[i];
+        }
+    }
+    return borrow;
+}
+#endif
+
+/// Subtracts the RHS ap_private from this ap_private
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+
+#if 0
+/// Multiplies an integer array, x by a a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit ap_private by a single digit (64-bit) integer.
+static uint64_t mul_1(uint64_t dest[], const uint64_t x[], uint32_t len, uint64_t y) {
+    // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
+    uint64_t ly = y & 0xffffffffULL, hy = (y) >> 32;
+    uint64_t carry = 0;
+    static const uint64_t two_power_32 = 1ULL << 32;
+    // For each digit of x.
+    for (uint32_t i = 0; i < len; ++i) {
+        // Split x into high and low words
+        uint64_t lx = x[i] & 0xffffffffULL;
+        uint64_t hx = (x[i]) >> 32;
+        // hasCarry - A flag to indicate if there is a carry to the next digit.
+        // hasCarry == 0, no carry
+        // hasCarry == 1, has carry
+        // hasCarry == 2, no carry and the calculation result == 0.
+        uint8_t hasCarry = 0;
+        dest[i] = carry + lx * ly;
+        // Determine if the add above introduces carry.
+        hasCarry = (dest[i] < carry) ? 1 : 0;
+        carry = hx * ly + ((dest[i]) >> 32) + (hasCarry ? two_power_32 : 0);
+        // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+        // (2^32 - 1) + 2^32 = 2^64.
+        hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+        carry += (lx * hy) & 0xffffffffULL;
+        dest[i] = ((carry) << 32) | (dest[i] & 0xffffffffULL);
+        carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? two_power_32 : 0) +
+            ((carry) >> 32) + ((lx * hy) >> 32) + hx * hy;
+    }
+    return carry;
+}
+#endif
+
+#if 0
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen.
+/// @brief Generalized multiplicate of integer arrays.
+static void mul(uint64_t dest[], const uint64_t x[], uint32_t xlen, const uint64_t y[],
+                uint32_t ylen, uint32_t destlen) {
+    dest[xlen] = mul_1(dest, x, xlen, y[0]);
+    for (uint32_t i = 1; i < ylen; ++i) {
+        uint64_t ly = y[i] & 0xffffffffULL, hy = (y[i]) >> 32;
+        uint64_t carry = 0, lx = 0, hx = 0;
+        for (uint32_t j = 0; j < xlen; ++j) {
+            lx = x[j] & 0xffffffffULL;
+            hx = (x[j]) >> 32;
+            // hasCarry - A flag to indicate if has carry.
+            // hasCarry == 0, no carry
+            // hasCarry == 1, has carry
+            // hasCarry == 2, no carry and the calculation result == 0.
+            uint8_t hasCarry = 0;
+            uint64_t resul = carry + lx * ly;
+            hasCarry = (resul < carry) ? 1 : 0;
+            carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + ((resul) >> 32);
+            hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+            carry += (lx * hy) & 0xffffffffULL;
+            resul = ((carry) << 32) | (resul & 0xffffffffULL);
+            dest[i+j] += resul;
+            carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
+                ((carry) >> 32) + (dest[i+j] < resul ? 1 : 0) +
+                ((lx * hy) >> 32) + hx * hy;
+        }
+        dest[i+xlen] = carry;
+    }
+}
+#endif
+
+
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+uint32_t ap_private<_AP_W, _AP_S, _AP_N>::getBitsNeeded(const char* str, uint32_t slen, uint8_t radix) {
+    assert(str != 0 && "Invalid value string");
+    assert(slen > 0 && "Invalid string length");
+
+    // Each computation below needs to know if its negative
+    uint32_t isNegative = str[0] == '-';
+    if (isNegative) {
+        slen--;
+        str++;
+    }
+    // For radixes of power-of-two values, the bits required is accurately and
+    // easily computed
+    if (radix == 2)
+        return slen + isNegative;
+    if (radix == 8)
+        return slen * 3 + isNegative;
+    if (radix == 16)
+        return slen * 4 + isNegative;
+
+    // Otherwise it must be radix == 10, the hard case
+    assert(radix == 10 && "Invalid radix");
+
+    // Convert to the actual binary value.
+    //ap_private<_AP_W, _AP_S, _AP_N> tmp(sufficient, str, slen, radix);
+
+    // Compute how many bits are required.
+    //return isNegative + tmp.logBase2() + 1;
+    return isNegative + slen * 4;
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+uint32_t ap_private<_AP_W, _AP_S, _AP_N>::countLeadingZeros() const {
+    enum { msw_bits = (BitWidth % APINT_BITS_PER_WORD)?(BitWidth % APINT_BITS_PER_WORD):APINT_BITS_PER_WORD,
+        excessBits = APINT_BITS_PER_WORD - msw_bits };
+    uint32_t Count = CountLeadingZeros_64(pVal[_AP_N-1]);
+    if (Count>=excessBits)
+        Count -= excessBits;
+    if (!pVal[_AP_N-1]) {
+        for (uint32_t i = _AP_N-1 ; i ; --i) {
+            if (!pVal[i-1])
+                Count += APINT_BITS_PER_WORD;
+            else {
+                Count += CountLeadingZeros_64(pVal[i-1]);
+                break;
+            }
+        }
+    }
+    return Count;
+}
+
+static uint32_t countLeadingOnes_64(uint64_t __V, uint32_t skip) {
+    uint32_t Count = 0;
+    if (skip)
+        (__V) <<= (skip);
+    while (__V && (__V & (1ULL << 63))) {
+        Count++;
+        (__V) <<= 1;
+    }
+    return Count;
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+uint32_t ap_private<_AP_W, _AP_S, _AP_N>::countLeadingOnes() const {
+    if (isSingleWord())
+        return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+
+    uint32_t highWordBits = BitWidth % APINT_BITS_PER_WORD;
+    uint32_t shift = (highWordBits == 0 ? 0 : APINT_BITS_PER_WORD - highWordBits);
+    int i = _AP_N - 1;
+    uint32_t Count = countLeadingOnes_64(pVal[i], shift);
+    if (Count == highWordBits) {
+        for (i--; i >= 0; --i) {
+            if (pVal[i] == ~0ULL)
+                Count += APINT_BITS_PER_WORD;
+            else {
+                Count += countLeadingOnes_64(pVal[i], 0);
+                break;
+            }
+        }
+    }
+    return Count;
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+INLINE uint32_t ap_private<_AP_W, _AP_S, _AP_N>::countTrailingZeros() const {
+    if (isSingleWord())
+        return AESL_std::min(uint32_t(CountTrailingZeros_64(VAL)), BitWidth);
+    uint32_t Count = 0;
+    uint32_t i = 0;
+    for (; i < _AP_N && pVal[i] == 0; ++i)
+        Count += APINT_BITS_PER_WORD;
+    if (i < _AP_N)
+        Count += CountTrailingZeros_64(pVal[i]);
+    return AESL_std::min(Count, BitWidth);
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+ap_private<_AP_W, _AP_S, _AP_N> ap_private<_AP_W, _AP_S, _AP_N>::byteSwap() const {
+    assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+    if (BitWidth == 16)
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/ ByteSwap_16(uint16_t(VAL)));
+    else if (BitWidth == 32)
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/ ByteSwap_32(uint32_t(VAL)));
+    else if (BitWidth == 48) {
+        uint32_t Tmp1 = uint32_t((VAL) >> 16);
+        Tmp1 = ByteSwap_32(Tmp1);
+        uint16_t Tmp2 = uint16_t(VAL);
+        Tmp2 = ByteSwap_16(Tmp2);
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/ ((uint64_t(Tmp2)) << 32) | Tmp1);
+    } else if (BitWidth == 64)
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/ ByteSwap_64(VAL));
+    else {
+        ap_private<_AP_W, _AP_S, _AP_N> Result(0);
+        char *pByte = (char*)Result.pVal;
+        for (uint32_t i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
+            char Tmp = pByte[i];
+            pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
+            pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
+        }
+        return Result;
+    }
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+ap_private<_AP_W, _AP_S, _AP_N> ap_private_ops::GreatestCommonDivisor(const ap_private<_AP_W, _AP_S, _AP_N>& API1, const ap_private<_AP_W, _AP_S, _AP_N>& API2) {
+    ap_private<_AP_W, _AP_S, _AP_N> __A = API1, __B = API2;
+    while (!!__B) {
+        ap_private<_AP_W, _AP_S, _AP_N> __T = __B;
+        __B = ap_private_ops::urem(__A, __B);
+        __A = __T;
+    }
+    return __A;
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+ap_private<_AP_W, _AP_S, _AP_N> ap_private_ops::RoundDoubleToap_private(double Double, uint32_t width) {
+    union {
+        double __D;
+        uint64_t __I;
+    } __T;
+    __T.__D = Double;
+
+    // Get the sign bit from the highest order bit
+    bool isNeg = (__T.__I) >> 63;
+
+    // Get the 11-bit exponent and adjust for the 1023 bit bias
+    int64_t exp = (((__T.__I) >> 52) & 0x7ffULL) - 1023;
+
+    // If the exponent is negative, the value is < 0 so just return 0.
+    if (exp < 0)
+        return ap_private<_AP_W, _AP_S, _AP_N>(width, 0u);
+
+    // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+    uint64_t mantissa = (__T.__I & (~0ULL >> 12)) | 1ULL << 52;
+
+    // If the exponent doesn't shift all bits out of the mantissa
+    if (exp < 52)
+        return isNeg ? -ap_private<_AP_W, _AP_S, _AP_N>(width, (mantissa) >> (52 - exp)) :
+            ap_private<_AP_W, _AP_S, _AP_N>((mantissa) >> (52 - exp));
+
+    // If the client didn't provide enough bits for us to shift the mantissa into
+    // then the result is undefined, just return 0
+    if (width <= exp - 52)
+        return ap_private<_AP_W, _AP_S, _AP_N>(width, 0);
+
+    // Otherwise, we have to shift the mantissa bits up to the right location
+    ap_private<_AP_W, _AP_S, _AP_N> Tmp(width, mantissa);
+    Tmp = Tmp.shl(exp - 52);
+    return isNeg ? -Tmp : Tmp;
+}
+
+/// RoundToDouble - This function convert this ap_private to a double.
+/// The layout for double is as following (IEEE Standard 754):
+///  --------------------------------------
+/// |  Sign    Exponent    Fraction    Bias |
+/// |-------------------------------------- |
+/// |  1[63]   11[62-52]   52[51-00]   1023 |
+///  --------------------------------------
+template<int _AP_W, bool _AP_S, int _AP_N>
+double ap_private<_AP_W, _AP_S, _AP_N>::roundToDouble(bool isSigned) const {
+
+    // Handle the simple case where the value is contained in one uint64_t.
+    if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+        uint64_t val;
+        if (isSingleWord()) val = VAL;
+        else val = pVal[0];
+        if (isSigned) {
+            int64_t sext = ((int64_t(val)) << (64-BitWidth)) >> (64-BitWidth);
+            return double(sext);
+        } else
+            return double(val);
+    }
+
+    // Determine if the value is negative.
+    bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+    // Construct the absolute value if we're negative.
+    ap_private<_AP_W, _AP_S, _AP_N> Tmp(isNeg ? -(*this) : (*this));
+
+    // Figure out how many bits we're using.
+    uint32_t n = Tmp.getActiveBits();
+
+    // The exponent (without bias normalization) is just the number of bits
+    // we are using. Note that the sign bit is gone since we constructed the
+    // absolute value.
+    uint64_t exp = n;
+
+    // Return infinity for exponent overflow
+    if (exp > 1023) {
+        if (!isSigned || !isNeg)
+            return std::numeric_limits<double>::infinity();
+        else
+            return -std::numeric_limits<double>::infinity();
+    }
+    exp += 1023; // Increment for 1023 bias
+
+    // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+    // extract the high 52 bits from the correct words in pVal.
+    uint64_t mantissa;
+    unsigned hiWord = whichWord(n-1);
+    if (hiWord == 0) {
+        mantissa = Tmp.pVal[0];
+        if (n > 52)
+            (mantissa) >>= (n - 52); // shift down, we want the top 52 bits.
+    } else {
+        assert(hiWord > 0 && "High word is negative?");
+        uint64_t hibits = (Tmp.pVal[hiWord]) << (52 - n % APINT_BITS_PER_WORD);
+        uint64_t lobits = (Tmp.pVal[hiWord-1]) >> (11 + n % APINT_BITS_PER_WORD);
+        mantissa = hibits | lobits;
+    }
+
+    // The leading bit of mantissa is implicit, so get rid of it.
+    uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+    union {
+        double __D;
+        uint64_t __I;
+    } __T;
+    __T.__I = sign | ((exp) << 52) | mantissa;
+    return __T.__D;
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+template<int _AP_W, bool _AP_S, int _AP_N>
+ap_private<_AP_W, _AP_S, _AP_N> ap_private<_AP_W, _AP_S, _AP_N>::sqrt() const {
+
+    // Determine the magnitude of the value.
+    uint32_t magnitude = getActiveBits();
+
+    // Use a fast table for some small values. This also gets rid of some
+    // rounding errors in libc sqrt for small values.
+    if (magnitude <= 5) {
+        static const uint8_t results[32] = {
+            /*     0 */ 0,
+            /*  1- 2 */ 1, 1,
+            /*  3- 6 */ 2, 2, 2, 2,
+            /*  7-12 */ 3, 3, 3, 3, 3, 3,
+            /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+            /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+            /*    31 */ 6
+        };
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/ results[ (isSingleWord() ? VAL : pVal[0]) ]);
+    }
+
+    // If the magnitude of the value fits in less than 52 bits (the precision of
+    // an IEEE double precision floating point value), then we can use the
+    // libc sqrt function which will probably use a hardware sqrt computation.
+    // This should be faster than the algorithm below.
+    if (magnitude < 52) {
+#ifdef _MSC_VER
+        // Amazingly, VC++ doesn't have round().
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/
+                uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
+#else
+        return ap_private<_AP_W, _AP_S, _AP_N>(/*BitWidth,*/
+                uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+#endif
+    }
+
+    // Okay, all the short cuts are exhausted. We must compute it. The following
+    // is a classical Babylonian method for computing the square root. This code
+    // was adapted to APINt from a wikipedia article on such computations.
+    // See http://www.wikipedia.org/ and go to the page named
+    // Calculate_an_integer_square_root.
+    uint32_t nbits = BitWidth, i = 4;
+    ap_private<_AP_W, _AP_S, _AP_N> testy(16);
+    ap_private<_AP_W, _AP_S, _AP_N> x_old(/*BitWidth,*/ 1);
+    ap_private<_AP_W, _AP_S, _AP_N> x_new(0);
+    ap_private<_AP_W, _AP_S, _AP_N> two(/*BitWidth,*/ 2);
+
+    // Select a good starting value using binary logarithms.
+    for (;; i += 2, testy = testy.shl(2))
+        if (i >= nbits || this->ule(testy)) {
+            x_old = x_old.shl(i / 2);
+            break;
+        }
+
+    // Use the Babylonian method to arrive at the integer square root:
+    for (;;) {
+        x_new = (this->udiv(x_old) + x_old).udiv(two);
+        if (x_old.ule(x_new))
+            break;
+        x_old = x_new;
+    }
+
+    // Make sure we return the closest approximation
+    // NOTE: The rounding calculation below is correct. It will produce an
+    // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+    // determined to be a rounding issue with pari/gp as it begins to use a
+    // floating point representation after 192 bits. There are no discrepancies
+    // between this algorithm and pari/gp for bit widths < 192 bits.
+    ap_private<_AP_W, _AP_S, _AP_N> square(x_old * x_old);
+    ap_private<_AP_W, _AP_S, _AP_N> nextSquare((x_old + 1) * (x_old +1));
+    if (this->ult(square))
+        return x_old;
+    else if (this->ule(nextSquare)) {
+        ap_private<_AP_W, _AP_S, _AP_N> midpoint((nextSquare - square).udiv(two));
+        ap_private<_AP_W, _AP_S, _AP_N> offset(*this - square);
+        if (offset.ult(midpoint))
+            return x_old;
+        else
+            return x_old + 1;
+    } else
+        assert(0 && "Error in ap_private<_AP_W, _AP_S, _AP_N>::sqrt computation");
+    return x_old + 1;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
+                     uint32_t m, uint32_t n) {
+    assert(u && "Must provide dividend");
+    assert(v && "Must provide divisor");
+    assert(q && "Must provide quotient");
+    assert(u != v && u != q && v != q && "Must us different memory");
+    assert(n>1 && "n must be > 1");
+
+    // Knuth uses the value b as the base of the number system. In our case b
+    // is 2^31 so we just set it to -1u.
+    uint64_t b = uint64_t(1) << 32;
+
+    //DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
+    //DEBUG(cerr << "KnuthDiv: original:");
+    //DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
+    //DEBUG(cerr << " by");
+    //DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
+    //DEBUG(cerr << '\n');
+    // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+    // u and v by d. Note that we have taken Knuth's advice here to use a power
+    // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+    // 2 allows us to shift instead of multiply and it is easy to determine the
+    // shift amount from the leading zeros.  We are basically normalizing the u
+    // and v so that its high bits are shifted to the top of v's range without
+    // overflow. Note that this can require an extra word in u so that u must
+    // be of length m+n+1.
+    uint32_t shift = CountLeadingZeros_32(v[n-1]);
+    uint32_t v_carry = 0;
+    uint32_t u_carry = 0;
+    if (shift) {
+        for (uint32_t i = 0; i < m+n; ++i) {
+            uint32_t u_tmp = (u[i]) >> (32 - shift);
+            u[i] = ((u[i]) << (shift)) | u_carry;
+            u_carry = u_tmp;
+        }
+        for (uint32_t i = 0; i < n; ++i) {
+            uint32_t v_tmp = (v[i]) >> (32 - shift);
+            v[i] = ((v[i]) << (shift)) | v_carry;
+            v_carry = v_tmp;
+        }
+    }
+    u[m+n] = u_carry;
+    //DEBUG(cerr << "KnuthDiv:   normal:");
+    //DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
+    //DEBUG(cerr << " by");
+    //DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
+    //DEBUG(cerr << '\n');
+
+    // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
+    int j = m;
+    do {
+        //DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
+        // D3. [Calculate q'.].
+        //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+        //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+        // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+        // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+        // on v[n-2] determines at high speed most of the cases in which the trial
+        // value qp is one too large, and it eliminates all cases where qp is two
+        // too large.
+        uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
+        //DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
+        uint64_t qp = dividend / v[n-1];
+        uint64_t rp = dividend % v[n-1];
+        if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+            qp--;
+            rp += v[n-1];
+            if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+                qp--;
+        }
+        //DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+        // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+        // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+        // consists of a simple multiplication by a one-place number, combined with
+        // a subtraction.
+        bool isNeg = false;
+        for (uint32_t i = 0; i < n; ++i) {
+            uint64_t u_tmp = uint64_t(u[j+i]) | ((uint64_t(u[j+i+1])) << 32);
+            uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+            bool borrow = subtrahend > u_tmp;
+            /*DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
+              << ", subtrahend == " << subtrahend
+              << ", borrow = " << borrow << '\n');*/
+
+            uint64_t result = u_tmp - subtrahend;
+            uint32_t k = j + i;
+            u[k++] = (uint32_t)(result & (b-1)); // subtract low word
+            u[k++] = (uint32_t)((result) >> 32);   // subtract high word
+            while (borrow && k <= m+n) { // deal with borrow to the left
+                borrow = u[k] == 0;
+                u[k]--;
+                k++;
+            }
+            isNeg |= borrow;
+            /*DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
+              u[j+i+1] << '\n');*/
+        }
+        /*DEBUG(cerr << "KnuthDiv: after subtraction:");
+          DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
+          DEBUG(cerr << '\n');*/
+        // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+        // this step is actually negative, (u[j+n]...u[j]) should be left as the
+        // true value plus b**(n+1), namely as the b's complement of
+        // the true value, and a "borrow" to the left should be remembered.
+        //
+        if (isNeg) {
+            bool carry = true;  // true because b's complement is "complement + 1"
+            for (uint32_t i = 0; i <= m+n; ++i) {
+                u[i] = ~u[i] + carry; // b's complement
+                carry = carry && u[i] == 0;
+            }
+        }
+        /*DEBUG(cerr << "KnuthDiv: after complement:");
+          DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
+          DEBUG(cerr << '\n');*/
+
+        // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+        // negative, go to step D6; otherwise go on to step D7.
+        q[j] = (uint32_t)qp;
+        if (isNeg) {
+            // D6. [Add back]. The probability that this step is necessary is very
+            // small, on the order of only 2/b. Make sure that test data accounts for
+            // this possibility. Decrease q[j] by 1
+            q[j]--;
+            // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+            // A carry will occur to the left of u[j+n], and it should be ignored
+            // since it cancels with the borrow that occurred in D4.
+            bool carry = false;
+            for (uint32_t i = 0; i < n; i++) {
+                uint32_t limit = AESL_std::min(u[j+i],v[i]);
+                u[j+i] += v[i] + carry;
+                carry = u[j+i] < limit || (carry && u[j+i] == limit);
+            }
+            u[j+n] += carry;
+        }
+        /*DEBUG(cerr << "KnuthDiv: after correction:");
+          DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
+          DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');*/
+
+        // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
+    } while (--j >= 0);
+
+    /*DEBUG(cerr << "KnuthDiv: quotient:");
+      DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
+      DEBUG(cerr << '\n');*/
+
+    // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+    // remainder may be obtained by dividing u[...] by d. If r is non-null we
+    // compute the remainder (urem uses this).
+    if (r) {
+        // The value d is expressed by the "shift" value above since we avoided
+        // multiplication by d by using a shift left. So, all we have to do is
+        // shift right here. In order to mak
+        if (shift) {
+            uint32_t carry = 0;
+            //DEBUG(cerr << "KnuthDiv: remainder:");
+            for (int i = n-1; i >= 0; i--) {
+                r[i] = ((u[i]) >> (shift)) | carry;
+                carry = (u[i]) << (32 - shift);
+                //DEBUG(cerr << " " << r[i]);
+            }
+        } else {
+            for (int i = n-1; i >= 0; i--) {
+                r[i] = u[i];
+                //DEBUG(cerr << " " << r[i]);
+            }
+        }
+        //DEBUG(cerr << '\n');
+    }
+    //DEBUG(cerr << std::setbase(10) << '\n');
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+void divide(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t lhsWords,
+            const ap_private<_AP_W, _AP_S, _AP_N>& RHS, uint32_t rhsWords,
+            ap_private<_AP_W, _AP_S, _AP_N> *Quotient, ap_private<_AP_W, _AP_S, _AP_N> *Remainder) {
+    assert(lhsWords >= rhsWords && "Fractional result");
+    enum {APINT_BITS_PER_WORD=64};
+    // First, compose the values into an array of 32-bit words instead of
+    // 64-bit words. This is a necessity of both the "short division" algorithm
+    // and the the Knuth "classical algorithm" which requires there to be native
+    // operations for +, -, and * on an m bit value with an m*2 bit result. We
+    // can't use 64-bit operands here because we don't have native results of
+    // 128-bits. Furthremore, casting the 64-bit values to 32-bit values won't
+    // work on large-endian machines.
+    uint64_t mask = ~0ull >> (sizeof(uint32_t)*8);
+    uint32_t n = rhsWords * 2;
+    uint32_t m = (lhsWords * 2) - n;
+
+    // Allocate space for the temporary values we need either on the stack, if
+    // it will fit, or on the heap if it won't.
+    uint32_t SPACE[128];
+    uint32_t *__U = 0;
+    uint32_t *__V = 0;
+    uint32_t *__Q = 0;
+    uint32_t *__R = 0;
+    if ((Remainder?4:3)*n+2*m+1 <= 128) {
+        __U = &SPACE[0];
+        __V = &SPACE[m+n+1];
+        __Q = &SPACE[(m+n+1) + n];
+        if (Remainder)
+            __R = &SPACE[(m+n+1) + n + (m+n)];
+    } else {
+        __U = new uint32_t[m + n + 1];
+        __V = new uint32_t[n];
+        __Q = new uint32_t[m+n];
+        if (Remainder)
+            __R = new uint32_t[n];
+    }
+
+    // Initialize the dividend
+    memset(__U, 0, (m+n+1)*sizeof(uint32_t));
+    for (unsigned i = 0; i < lhsWords; ++i) {
+        uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+        __U[i * 2] = (uint32_t)(tmp & mask);
+        __U[i * 2 + 1] = (tmp) >> (sizeof(uint32_t)*8);
+    }
+    __U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+    // Initialize the divisor
+    memset(__V, 0, (n)*sizeof(uint32_t));
+    for (unsigned i = 0; i < rhsWords; ++i) {
+        uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+        __V[i * 2] = (uint32_t)(tmp & mask);
+        __V[i * 2 + 1] = (tmp) >> (sizeof(uint32_t)*8);
+    }
+
+    // initialize the quotient and remainder
+    memset(__Q, 0, (m+n) * sizeof(uint32_t));
+    if (Remainder)
+        memset(__R, 0, n * sizeof(uint32_t));
+
+    // Now, adjust m and n for the Knuth division. n is the number of words in
+    // the divisor. m is the number of words by which the dividend exceeds the
+    // divisor (i.e. m+n is the length of the dividend). These sizes must not
+    // contain any zero words or the Knuth algorithm fails.
+    for (unsigned i = n; i > 0 && __V[i-1] == 0; i--) {
+        n--;
+        m++;
+    }
+    for (unsigned i = m+n; i > 0 && __U[i-1] == 0; i--)
+        m--;
+
+    // If we're left with only a single word for the divisor, Knuth doesn't work
+    // so we implement the short division algorithm here. This is much simpler
+    // and faster because we are certain that we can divide a 64-bit quantity
+    // by a 32-bit quantity at hardware speed and short division is simply a
+    // series of such operations. This is just like doing short division but we
+    // are using base 2^32 instead of base 10.
+    assert(n != 0 && "Divide by zero?");
+    if (n == 1) {
+        uint32_t divisor = __V[0];
+        uint32_t remainder = 0;
+        for (int i = m+n-1; i >= 0; i--) {
+            uint64_t partial_dividend = (uint64_t(remainder)) << 32 | __U[i];
+            if (partial_dividend == 0) {
+                __Q[i] = 0;
+                remainder = 0;
+            } else if (partial_dividend < divisor) {
+                __Q[i] = 0;
+                remainder = (uint32_t)partial_dividend;
+            } else if (partial_dividend == divisor) {
+                __Q[i] = 1;
+                remainder = 0;
+            } else {
+                __Q[i] = (uint32_t)(partial_dividend / divisor);
+                remainder = (uint32_t)(partial_dividend - (__Q[i] * divisor));
+            }
+        }
+        if (__R)
+            __R[0] = remainder;
+    } else {
+        // Now we're ready to invoke the Knuth classical divide algorithm. In this
+        // case n > 1.
+        KnuthDiv(__U, __V, __Q, __R, m, n);
+    }
+
+    // If the caller wants the quotient
+    if (Quotient) {
+        // Set up the Quotient value's memory.
+        if (Quotient->BitWidth != LHS.BitWidth) {
+            if (Quotient->isSingleWord())
+                Quotient->VAL = 0;
+        } else
+            Quotient->clear();
+
+        // The quotient is in Q. Reconstitute the quotient into Quotient's low
+        // order words.
+        if (lhsWords == 1) {
+            uint64_t tmp =
+                uint64_t(__Q[0]) | ((uint64_t(__Q[1])) << (APINT_BITS_PER_WORD / 2));
+            if (Quotient->isSingleWord())
+                Quotient->VAL = tmp;
+            else
+                Quotient->pVal[0] = tmp;
+        } else {
+            assert(!Quotient->isSingleWord() && "Quotient ap_private not large enough");
+            for (unsigned i = 0; i < lhsWords; ++i)
+                Quotient->pVal[i] =
+                    uint64_t(__Q[i*2]) | ((uint64_t(__Q[i*2+1])) << (APINT_BITS_PER_WORD / 2));
+        }
+        Quotient->clearUnusedBits();
+    }
+
+    // If the caller wants the remainder
+    if (Remainder) {
+        // Set up the Remainder value's memory.
+        if (Remainder->BitWidth != RHS.BitWidth) {
+            if (Remainder->isSingleWord())
+                Remainder->VAL = 0;
+        } else
+            Remainder->clear();
+
+        // The remainder is in R. Reconstitute the remainder into Remainder's low
+        // order words.
+        if (rhsWords == 1) {
+            uint64_t tmp =
+                uint64_t(__R[0]) | ((uint64_t(__R[1])) << (APINT_BITS_PER_WORD / 2));
+            if (Remainder->isSingleWord())
+                Remainder->VAL = tmp;
+            else
+                Remainder->pVal[0] = tmp;
+        } else {
+            assert(!Remainder->isSingleWord() && "Remainder ap_private not large enough");
+            for (unsigned i = 0; i < rhsWords; ++i)
+                Remainder->pVal[i] =
+                    uint64_t(__R[i*2]) | ((uint64_t(__R[i*2+1])) << (APINT_BITS_PER_WORD / 2));
+        }
+        Remainder->clearUnusedBits();
+    }
+
+    // Clean up the memory we allocated.
+    if (__U != &SPACE[0]) {
+        delete [] __U;
+        delete [] __V;
+        delete [] __Q;
+        delete [] __R;
+    }
+}
+
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+void ap_private<_AP_W, _AP_S, _AP_N>::fromString(const char *str, uint32_t slen, uint8_t radix) {
+    enum { numbits=_AP_W};
+    // Check our assumptions here
+    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+            "Radix should be 2, 8, 10, or 16!");
+    assert(str && "String is null?");
+    bool isNeg = str[0] == '-';
+    if (isNeg)
+        str++, slen--;
+
+    //skip any leading zero
+    while(*str == '0' && *(str+1) != '\0') {str++; slen--;}
+    assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+    assert(((slen - 1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+    assert(((slen - 1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+    assert((((slen -1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
+
+    memset(pVal, 0, _AP_N * sizeof(uint64_t));
+
+    // Figure out if we can shift instead of multiply
+    uint32_t shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+    // Set up an ap_private for the digit to add outside the loop so we don't
+    // constantly construct/destruct it.
+    uint64_t bigVal[_AP_N];
+    memset(bigVal, 0, _AP_N * sizeof(uint64_t));
+    ap_private<_AP_W, _AP_S, _AP_N> apdigit(getBitWidth(), bigVal);
+    ap_private<_AP_W, _AP_S, _AP_N> apradix(radix);
+
+    // Enter digit traversal loop
+    for (unsigned i = 0; i < slen; i++) {
+        // Get a digit
+        uint32_t digit = 0;
+        char cdigit = str[i];
+        if (radix == 16) {
+#define isxdigit(c)  (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
+            if (!isxdigit(cdigit))
+                assert(0 && "Invalid hex digit in string");
+            if (isdigit(cdigit))
+                digit = cdigit - '0';
+            else if (cdigit >= 'a')
+                digit = cdigit - 'a' + 10;
+            else if (cdigit >= 'A')
+                digit = cdigit - 'A' + 10;
+            else
+                assert(0 && "huh? we shouldn't get here");
+        } else if (isdigit(cdigit)) {
+            digit = cdigit - '0';
+        } else {
+            assert(0 && "Invalid character in digit string");
+        }
+#undef isxdigit
+#undef isdigit
+        // Shift or multiply the value by the radix
+        if (shift)
+            *this <<= shift;
+        else
+            *this *= apradix;
+
+        // Add in the digit we just interpreted
+        if (apdigit.isSingleWord())
+            apdigit.VAL = digit;
+        else
+            apdigit.pVal[0] = digit;
+        *this += apdigit;
+    }
+    // If its negative, put it in two's complement form
+    if (isNeg) {
+        (*this)--;
+        this->flip();
+    }
+    clearUnusedBits();
+}
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+std::string ap_private<_AP_W, _AP_S, _AP_N>::toString(uint8_t radix, bool wantSigned) const {
+    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+            "Radix should be 2, 8, 10, or 16!");
+    static const char *digits[] = {
+        "0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"
+    };
+    std::string result;
+    uint32_t bits_used = getActiveBits();
+
+    if (radix != 10) {
+        // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+        // because the number of bits per digit (1,3 and 4 respectively) divides
+        // equaly. We just shift until there value is zero.
+
+        // First, check for a zero value and just short circuit the logic below.
+        if (*this == (uint64_t)(0))
+            result = "0";
+        else {
+            ap_private<_AP_W, false, _AP_N> tmp(*this);
+            size_t insert_at = 0;
+            if (wantSigned && isNegative()) {
+                // They want to print the signed version and it is a negative value
+                // Flip the bits and add one to turn it into the equivalent positive
+                // value and put a '-' in the result.
+                tmp.flip();
+                tmp++;
+                tmp.clearUnusedBitsToZero();
+                result = "-";
+                insert_at = 1;
+            }
+            // Just shift tmp right for each digit width until it becomes zero
+            uint32_t shift = (radix == 16 ? 4 : (radix == 8 ? 3 : 1));
+            uint64_t mask = radix - 1;
+            ap_private<_AP_W, false, _AP_N> zero(0);
+            while (tmp.ne(zero)) {
+                unsigned digit = (tmp.isSingleWord() ? tmp.VAL : tmp.pVal[0]) & mask;
+                result.insert(insert_at, digits[digit]);
+                tmp = tmp.lshr(shift);
+            }
+        }
+        return result;
+    }
+
+    ap_private<_AP_W, false, _AP_N> tmp(*this);
+    ap_private<_AP_W, false, _AP_N> divisor(radix);
+    ap_private<_AP_W, false, _AP_N> zero(0);
+    size_t insert_at = 0;
+    if (wantSigned && isNegative()) {
+        // They want to print the signed version and it is a negative value
+        // Flip the bits and add one to turn it into the equivalent positive
+        // value and put a '-' in the result.
+        tmp.flip();
+        tmp++;
+        tmp.clearUnusedBitsToZero();
+        result = "-";
+        insert_at = 1;
+    }
+    if (tmp == ap_private<_AP_W, false, _AP_N>(0))
+        result = "0";
+    else while (tmp.ne(zero)) {
+        ap_private<_AP_W, false, _AP_N> APdigit(0);
+        ap_private<_AP_W, false, _AP_N> tmp2(0);
+        divide(tmp,  tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
+                &APdigit);
+        uint32_t digit = APdigit.getZExtValue();
+        assert(digit < radix && "divide failed");
+        result.insert(insert_at,digits[digit]);
+        tmp = tmp2;
+    }
+
+    return result;
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+/* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
+   and unrestricting assumption.  */
+
+/* Some handy functions local to this file.  */
+
+template<int _AP_W, bool _AP_S, int _AP_N>
+void divide(const ap_private<_AP_W, _AP_S, _AP_N>& LHS, uint32_t lhsWords,
+            uint64_t RHS,
+            ap_private<_AP_W, _AP_S, _AP_N> *Quotient, ap_private<_AP_W, _AP_S, _AP_N> *Remainder) {
+    uint32_t rhsWords=1;
+    assert(lhsWords >= rhsWords && "Fractional result");
+    enum {APINT_BITS_PER_WORD=64};
+    // First, compose the values into an array of 32-bit words instead of
+    // 64-bit words. This is a necessity of both the "short division" algorithm
+    // and the the Knuth "classical algorithm" which requires there to be native
+    // operations for +, -, and * on an m bit value with an m*2 bit result. We
+    // can't use 64-bit operands here because we don't have native results of
+    // 128-bits. Furthremore, casting the 64-bit values to 32-bit values won't
+    // work on large-endian machines.
+    uint64_t mask = ~0ull >> (sizeof(uint32_t)*8);
+    uint32_t n = 2;
+    uint32_t m = (lhsWords * 2) - n;
+
+    // Allocate space for the temporary values we need either on the stack, if
+    // it will fit, or on the heap if it won't.
+    uint32_t SPACE[128];
+    uint32_t *__U = 0;
+    uint32_t *__V = 0;
+    uint32_t *__Q = 0;
+    uint32_t *__R = 0;
+    if ((Remainder?4:3)*n+2*m+1 <= 128) {
+        __U = &SPACE[0];
+        __V = &SPACE[m+n+1];
+        __Q = &SPACE[(m+n+1) + n];
+        if (Remainder)
+            __R = &SPACE[(m+n+1) + n + (m+n)];
+    } else {
+        __U = new uint32_t[m + n + 1];
+        __V = new uint32_t[n];
+        __Q = new uint32_t[m+n];
+        if (Remainder)
+            __R = new uint32_t[n];
+    }
+
+    // Initialize the dividend
+    memset(__U, 0, (m+n+1)*sizeof(uint32_t));
+    for (unsigned i = 0; i < lhsWords; ++i) {
+        uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+        __U[i * 2] = tmp & mask;
+        __U[i * 2 + 1] = (tmp) >> (sizeof(uint32_t)*8);
+    }
+    __U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+    // Initialize the divisor
+    memset(__V, 0, (n)*sizeof(uint32_t));
+    __V[0] = RHS & mask;
+    __V[1] = (RHS) >> (sizeof(uint32_t)*8);
+
+    // initialize the quotient and remainder
+    memset(__Q, 0, (m+n) * sizeof(uint32_t));
+    if (Remainder)
+        memset(__R, 0, n * sizeof(uint32_t));
+
+    // Now, adjust m and n for the Knuth division. n is the number of words in
+    // the divisor. m is the number of words by which the dividend exceeds the
+    // divisor (i.e. m+n is the length of the dividend). These sizes must not
+    // contain any zero words or the Knuth algorithm fails.
+    for (unsigned i = n; i > 0 && __V[i-1] == 0; i--) {
+        n--;
+        m++;
+    }
+    for (unsigned i = m+n; i > 0 && __U[i-1] == 0; i--)
+        m--;
+
+    // If we're left with only a single word for the divisor, Knuth doesn't work
+    // so we implement the short division algorithm here. This is much simpler
+    // and faster because we are certain that we can divide a 64-bit quantity
+    // by a 32-bit quantity at hardware speed and short division is simply a
+    // series of such operations. This is just like doing short division but we
+    // are using base 2^32 instead of base 10.
+    assert(n != 0 && "Divide by zero?");
+    if (n == 1) {
+        uint32_t divisor = __V[0];
+        uint32_t remainder = 0;
+        for (int i = m+n-1; i >= 0; i--) {
+            uint64_t partial_dividend = (uint64_t(remainder)) << 32 | __U[i];
+            if (partial_dividend == 0) {
+                __Q[i] = 0;
+                remainder = 0;
+            } else if (partial_dividend < divisor) {
+                __Q[i] = 0;
+                remainder = partial_dividend;
+            } else if (partial_dividend == divisor) {
+                __Q[i] = 1;
+                remainder = 0;
+            } else {
+                __Q[i] = partial_dividend / divisor;
+                remainder = partial_dividend - (__Q[i] * divisor);
+            }
+        }
+        if (__R)
+            __R[0] = remainder;
+    } else {
+        // Now we're ready to invoke the Knuth classical divide algorithm. In this
+        // case n > 1.
+        KnuthDiv(__U, __V, __Q, __R, m, n);
+    }
+
+    // If the caller wants the quotient
+    if (Quotient) {
+        // Set up the Quotient value's memory.
+        if (Quotient->BitWidth != LHS.BitWidth) {
+            if (Quotient->isSingleWord())
+                Quotient->VAL = 0;
+            else
+                delete [] Quotient->pVal;
+        } else
+            Quotient->clear();
+
+        // The quotient is in Q. Reconstitute the quotient into Quotient's low
+        // order words.
+        if (lhsWords == 1) {
+            uint64_t tmp =
+                uint64_t(__Q[0]) | ((uint64_t(__Q[1])) << (APINT_BITS_PER_WORD / 2));
+            if (Quotient->isSingleWord())
+                Quotient->VAL = tmp;
+            else
+                Quotient->pVal[0] = tmp;
+        } else {
+            assert(!Quotient->isSingleWord() && "Quotient ap_private not large enough");
+            for (unsigned i = 0; i < lhsWords; ++i)
+                Quotient->pVal[i] =
+                    uint64_t(__Q[i*2]) | ((uint64_t(__Q[i*2+1])) << (APINT_BITS_PER_WORD / 2));
+        }
+        Quotient->clearUnusedBits();
+    }
+
+    // If the caller wants the remainder
+    if (Remainder) {
+        // Set up the Remainder value's memory.
+        if (Remainder->BitWidth != 64 /* RHS.BitWidth */) {
+            if (Remainder->isSingleWord())
+                Remainder->VAL = 0;
+        } else
+            Remainder->clear();
+
+        // The remainder is in __R. Reconstitute the remainder into Remainder's low
+        // order words.
+        if (rhsWords == 1) {
+            uint64_t tmp =
+                uint64_t(__R[0]) | ((uint64_t(__R[1])) << (APINT_BITS_PER_WORD / 2));
+            if (Remainder->isSingleWord())
+                Remainder->VAL = tmp;
+            else
+                Remainder->pVal[0] = tmp;
+        } else {
+            assert(!Remainder->isSingleWord() && "Remainder ap_private not large enough");
+            for (unsigned i = 0; i < rhsWords; ++i)
+                Remainder->pVal[i] =
+                    uint64_t(__R[i*2]) | ((uint64_t(__R[i*2+1])) << (APINT_BITS_PER_WORD / 2));
+        }
+        Remainder->clearUnusedBits();
+    }
+
+    // Clean up the memory we allocated.
+    if (__U != &SPACE[0]) {
+        delete [] __U;
+        delete [] __V;
+        delete [] __Q;
+        delete [] __R;
+    }
+}
+
+//When bitwidth < 64
+template<int _AP_W, bool _AP_S>  class ap_private <_AP_W, _AP_S, 1> {
+#ifdef _MSC_VER
+#pragma warning( disable : 4521 4522 )
+#endif
+public:
+  typedef typename retval<_AP_S>::Type ValType;
+    template<int _AP_W2, bool _AP_S2>
+    struct RType {
+        enum {
+          _AP_N =1,
+            mult_w = _AP_W+_AP_W2,
+            mult_s = _AP_S||_AP_S2, //?? why
+            plus_w = AP_MAX(_AP_W+(_AP_S2&&!_AP_S),_AP_W2+(_AP_S&&!_AP_S2))+1, //shouldn't it be AP_MAX(_AP_W,_AP_W2)+!(_AP_S^_AP_S2)+1 ????
+            plus_s = _AP_S||_AP_S2,
+            minus_w = AP_MAX(_AP_W+(_AP_S2&&!_AP_S),_AP_W2+(_AP_S&&!_AP_S2))+1,
+            minus_s = true,
+            div_w = _AP_W+_AP_S2,
+            div_s = _AP_S||_AP_S2,
+            mod_w = AP_MIN(_AP_W,_AP_W2+(!_AP_S2&&_AP_S)),
+            mod_s = _AP_S,
+            logic_w = AP_MAX(_AP_W+(_AP_S2&&!_AP_S),_AP_W2+(_AP_S&&!_AP_S2)),
+            logic_s = _AP_S||_AP_S2
+        };
+        typedef ap_private<mult_w, mult_s> mult;
+        typedef ap_private<plus_w, plus_s> plus;
+        typedef ap_private<minus_w, minus_s> minus;
+        typedef ap_private<logic_w, logic_s> logic;
+        typedef ap_private<div_w, div_s> div;
+        typedef ap_private<mod_w, mod_s> mod;
+        typedef ap_private<_AP_W, _AP_S> arg1;
+        typedef bool reduce;
+    };
+    enum { APINT_BITS_PER_WORD = 64};
+    enum { excess_bits = (_AP_W%APINT_BITS_PER_WORD) ? APINT_BITS_PER_WORD -(_AP_W%APINT_BITS_PER_WORD) : 0};
+    static const uint64_t mask = ((uint64_t)~0ULL >> (excess_bits));
+    static const uint64_t not_mask = ~mask;
+    static const uint64_t sign_bit_mask = 1ULL << (APINT_BITS_PER_WORD-1);
+    template<int _AP_W1>  struct sign_ext_mask { static const uint64_t mask=~0ULL<<_AP_W1;};
+
+    enum { BitWidth=_AP_W};
+    uint64_t VAL;    ///< Used to store the <= 64 bits integer value.
+    const uint64_t *const pVal;
+
+    INLINE uint32_t getBitWidth() const {
+        return BitWidth;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    ap_private<_AP_W, _AP_S, 1>& operator=(const ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        VAL = RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1, int _AP_N1>
+    ap_private<_AP_W, _AP_S, 1>& operator=(const volatile ap_private<_AP_W1, _AP_S1, _AP_N1>& RHS) {
+        VAL = RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template <int _AP_W1, bool _AP_S1>
+    ap_private<_AP_W, _AP_S, 1>& operator=(const ap_private<_AP_W1, _AP_S1, 1>& RHS) {
+        VAL = RHS.VAL;
+        clearUnusedBits();
+        return *this;
+    }
+
+    template <int _AP_W1, bool _AP_S1>
+    ap_private<_AP_W, _AP_S, 1>& operator=(const volatile ap_private<_AP_W1, _AP_S1, 1>& RHS) {
+        VAL = RHS.VAL;
+        clearUnusedBits();
+        return *this;
+    }
+
+    volatile ap_private& operator=(const ap_private& RHS) volatile {
+        // Don't do anything for X = X
+        VAL = RHS.VAL; // No need to check because no harm done by copying.
+        return *this;
+    }
+    ap_private& operator=(const ap_private& RHS) {
+        // Don't do anything for X = X
+        VAL = RHS.VAL; // No need to check because no harm done by copying.
+        return *this;
+    }
+
+    volatile ap_private& operator=(const volatile ap_private& RHS) volatile {
+        // Don't do anything for X = X
+        VAL = RHS.VAL; // No need to check because no harm done by copying.
+        return *this;
+    }
+    ap_private& operator=(const volatile ap_private& RHS) {
+        // Don't do anything for X = X
+        VAL = RHS.VAL; // No need to check because no harm done by copying.
+        return *this;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private& operator = (const ap_range_ref<_AP_W2, _AP_S2>& op2) {
+        *this = ap_private<_AP_W2, false>(op2);
+        return *this;
+    }
+
+    explicit INLINE ap_private(uint64_t* val) : VAL(val[0]), pVal(&VAL){
+        clearUnusedBits();
+    }
+
+    INLINE bool isSingleWord() const { return true; }
+
+    INLINE void fromString(const char *strStart, uint32_t slen,
+                           uint8_t radix, int offset=0)  {
+        // Check our assumptions here
+        assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+                "Radix should be 2, 8, 10, or 16!");
+        assert(strStart && "String is null?");
+        strStart+=offset;
+        switch(radix) {
+            case 2:
+                //        sscanf(strStart,"%b",&VAL);
+                VAL = *strStart =='1' ? ~0ULL : 0;
+                for (;*strStart; ++strStart) {
+                    assert((*strStart=='0'|| *strStart=='1')&&("Wrong binary number") );
+                    VAL <<=1;
+                    VAL |= (*strStart-'0');
+                }
+                break;
+            case 8:
+#if __WIN32__
+                sscanf(strStart,"%I64o",&VAL);
+#else
+
+#if defined __x86_64__
+                sscanf(strStart,"%lo",&VAL);
+#else
+                sscanf(strStart,"%llo",&VAL);
+#endif
+
+#endif
+                break;
+            case 10:
+#if __WIN32__
+                sscanf(strStart,"%I64u",&VAL);
+#else
+
+#if defined __x86_64__
+                sscanf(strStart,"%lu",&VAL);
+#else
+                sscanf(strStart,"%llu",&VAL);
+#endif
+
+#endif
+                break;
+            case 16:
+#if __WIN32__
+                sscanf(strStart,"%I64x",&VAL);
+#else
+
+#if defined __x86_64__
+                sscanf(strStart,"%lx",&VAL);
+#else
+                sscanf(strStart,"%llx",&VAL);
+#endif
+
+#endif
+                break;
+            default:
+                assert(true && "Unknown radix");
+                // error
+        }
+        clearUnusedBits();
+    }
+
+    INLINE ap_private() : pVal(&VAL){VAL = 0ULL;}
+
+#define CTOR(TYPE)                                                \
+    INLINE  ap_private(TYPE v) : VAL((uint64_t)v), pVal(&VAL) {        \
+        clearUnusedBits(); \
+    }
+    CTOR(int)
+    CTOR(bool)
+    CTOR(signed char)
+    CTOR(unsigned char)
+    CTOR(short)
+    CTOR(unsigned short)
+    CTOR(unsigned int)
+    CTOR(long)
+    CTOR(unsigned long)
+    CTOR(unsigned long long)
+    CTOR(long long)
+    CTOR(float)
+    CTOR(double)
+#undef CTOR
+    ap_private(uint32_t numWords, const uint64_t bigVal[]): VAL(bigVal[0]), pVal(&VAL)  {clearUnusedBits();}
+
+    ap_private(const std::string& val, uint8_t radix=2, int base=0, int offset=0): VAL(0), pVal(&VAL) {
+        assert(!val.empty() && "String empty?");
+        fromString(val.c_str()+base, val.size()-base, radix);
+    }
+
+    ap_private(const char strStart[], uint32_t slen, uint8_t radix, int base=0, int offset=0) : VAL(0), pVal(&VAL) {
+        fromString(strStart+base, slen-base, radix, offset);
+    }
+
+    ap_private(const ap_private& that) : VAL(that.VAL), pVal(&VAL) {
+        clearUnusedBits();
+    }
+
+    template<int _AP_W1, bool _AP_S1>
+    ap_private(const ap_private<_AP_W1, _AP_S1, 1>& that) : VAL(that.VAL), pVal(&VAL) {
+        clearUnusedBits();
+    }
+
+    template<int _AP_W1, int _AP_N1, bool _AP_S1>
+    ap_private(const ap_private<_AP_W1, _AP_S1, _AP_N1>& that) : VAL(that.pVal[0]), pVal(&VAL) {
+        clearUnusedBits();
+    }
+
+    template<int _AP_W1, int _AP_N1, bool _AP_S1>
+    ap_private(const volatile ap_private<_AP_W1, _AP_S1, _AP_N1>& that) : VAL(that.pVal[0]), pVal(&VAL) {
+        clearUnusedBits();
+    }
+
+#if 0
+template<int _AP_W1>
+  explicit ap_private(const ap_private<_AP_W1, true, 1+_AP_W1/64>& that)
+    : VAL((_AP_W1>_AP_W) ? that.VAL & mask : ((1ULL<<(_AP_W1-1)&that.pVal[0]) ?  sign_ext_mask<_AP_W1>::mask | that.VAL : that.pVal[0])), pVal(&VAL) {}
+
+template<int _AP_W1>
+   explicit ap_private(const ap_private<_AP_W1, false, (_AP_W1+63)/64>& that)
+     : VAL(that.VAL & mask), pVal(&VAL) {}
+#endif
+
+    explicit ap_private(const char* val) : pVal(&VAL) {
+        std::string str(val);
+        uint32_t strLen = str.length();
+        const char *strp = str.c_str();
+        uint32_t offset = 0;
+        uint32_t base = 0;
+        bool neg = false;
+        uint32_t radix = 10;
+        ap_parse_sign(strp, base, neg);
+        ap_parse_prefix(strp + base, offset, radix);
+
+        if ((radix != 10 && neg) ||
+                (strLen - base - offset <= 0) ||
+                InvalidDigit(strp, strLen, base + offset, radix))  {
+            fprintf(stderr, "invalid character string %s !\n", val);
+            assert(0);
+        }
+
+        ap_private<_AP_W, _AP_S> ap_private_val(str.c_str(), strLen, radix, base, offset);
+        if (neg)
+            ap_private_val = -ap_private_val;
+        operator =  (ap_private_val);
+    }
+
+    ap_private(const char* val, signed char rd): pVal(&VAL) {
+        std::string str(val);
+        uint32_t strLen = str.length();
+        const char *strp = str.c_str();
+        uint32_t offset = 0;
+        uint32_t base = 0;
+        uint32_t radix = rd;
+        bool neg = false;
+        ap_parse_sign(strp, base, neg);
+        ap_parse_prefix(strp + base, offset, radix);
+
+        if ((radix != 10 && neg) ||
+                (strLen - base - offset <= 0) ||
+                InvalidDigit(strp, strLen, base + offset, radix))  {
+            fprintf(stderr, "invalid character string %s !\n", val);
+            assert(0);
+        }
+
+        uint32_t bitsNeeded = ap_private<_AP_W, _AP_S>::getBitsNeeded(strp, strLen, radix);
+        ap_private<_AP_W, _AP_S> ap_private_val(strp , strLen, radix, base, offset);
+        //ap_private<_AP_W, _AP_S> ap_private_val(bitsNeeded, strp , strLen, radix, base, offset);
+        if (strp[0] == '-')
+            ap_private_val = -ap_private_val;
+        operator =  (ap_private_val);
+    }
+
+    INLINE bool isNegative() const {
+        static const uint64_t sign_mask = 1ULL << (_AP_W-1);
+        return _AP_S && (sign_mask & VAL);
+    }
+
+    INLINE bool isPositive() const {
+        return !isNegative();
+    }
+
+    INLINE bool isStrictlyPositive() const {
+        return !isNegative() && VAL!=0;
+    }
+
+    INLINE bool isAllOnesValue() const {
+        return (mask & VAL) == mask;
+    }
+
+    template<int _AP_W1, bool _AP_S1>
+    INLINE bool operator==(const ap_private<_AP_W1, _AP_S1, 1>& RHS) const {
+        return (VAL == RHS.VAL);
+    }
+
+    INLINE  bool operator==(const ap_private<_AP_W, _AP_S>& RHS) const { return VAL == RHS.VAL; }
+    INLINE  bool operator==(const ap_private<_AP_W, !_AP_S>& RHS) const { return getVal() == RHS.getVal(); }
+    INLINE  bool operator==(uint64_t Val) const { return (VAL == Val); }
+    INLINE  bool operator!=(uint64_t Val) const { return (VAL != Val); }
+    INLINE  bool operator!=(const ap_private<_AP_W, _AP_S>& RHS) const { return VAL != RHS.VAL; }
+    INLINE  bool operator!=(const ap_private<_AP_W, !_AP_S>& RHS) const { return getVal() != RHS.getVal(); }
+    const ap_private operator++() { ++VAL; clearUnusedBits(); return *this; }
+    const ap_private operator--(int) {
+           ap_private orig(*this);
+           --VAL; clearUnusedBits();
+           return orig;
+    }
+    const ap_private operator--() { --VAL; clearUnusedBits(); return *this;}
+    INLINE bool operator !() const { return !VAL;}
+
+    const ap_private operator++(int) {
+        ap_private orig(*this);
+        VAL++; clearUnusedBits();
+        return orig;
+    }
+
+    const ap_private operator~() {return ap_private(~VAL);}
+    INLINE typename RType<1,false>::minus operator-() const {
+        return ap_private<1,false>(0) - (*this);
+    }
+
+    INLINE std::string toString(uint8_t radix, bool wantSigned) const ;
+    INLINE std::string toStringUnsigned(uint8_t radix = 10) const {
+        return toString(radix, false);
+    }
+    INLINE std::string toStringSigned(uint8_t radix = 10) const {
+        return toString(radix, true);
+    }
+    INLINE void clear() {
+        VAL=0;
+    }
+    INLINE ap_private& clear(uint32_t bitPosition) { VAL &= ~(1ULL<<(bitPosition)); clearUnusedBits(); return *this;}
+
+    INLINE ap_private ashr(uint32_t shiftAmt) const {
+        enum {excess_bits = APINT_BITS_PER_WORD - BitWidth};
+        if (_AP_S)
+            return ap_private((shiftAmt == BitWidth) ? 0 : ((int64_t)VAL) >> (shiftAmt));
+        else
+            return ap_private((shiftAmt == BitWidth) ? 0 : (VAL) >> (shiftAmt));
+    }
+
+    INLINE  ap_private lshr(uint32_t shiftAmt) const  {
+        return ap_private((shiftAmt == BitWidth) ? ap_private(0) : ap_private((VAL&mask) >> (shiftAmt)));
+    }
+
+    INLINE  ap_private shl(uint32_t shiftAmt) const  {
+        if (shiftAmt > BitWidth) {
+            if (!isNegative())
+                return ap_private(0);
+            else return ap_private(-1);
+        }
+        if (shiftAmt == BitWidth) return ap_private(0);
+        else return ap_private((VAL) << (shiftAmt));
+        //return ap_private((shiftAmt == BitWidth) ? ap_private(0ULL) : ap_private(VAL << shiftAmt));
+    }
+
+    INLINE int64_t getSExtValue() const {
+        return VAL;
+    }
+
+    INLINE uint64_t getZExtValue() const {
+        return VAL & mask;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private(const ap_range_ref<_AP_W2,_AP_S2>& ref) : pVal(&VAL) {
+        *this=ref.get();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private(const ap_bit_ref<_AP_W2,_AP_S2>& ref) : pVal(&VAL) {
+        *this = ((uint64_t)(bool)ref);
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private(const ap_concat_ref<_AP_W2, _AP_T2,_AP_W3, _AP_T3>& ref) : pVal(&VAL) {
+        *this=ref.get();
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+         ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_private(const af_range_ref<_AP_W2, _AP_I2, _AP_S2,
+                       _AP_Q2, _AP_O2, _AP_N2> &val) : pVal(&VAL) {
+        *this = ((val.operator ap_private<_AP_W2, false> ()));
+    }
+
+    template<int _AP_W2, int _AP_I2, bool _AP_S2,
+         ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_private(const af_bit_ref<_AP_W2, _AP_I2, _AP_S2,
+                       _AP_Q2, _AP_O2, _AP_N2> &val) : pVal(&VAL) {
+        *this = (uint64_t)(bool)val;
+    }
+
+    INLINE void write(const ap_private<_AP_W, _AP_S>& op2) volatile {
+        *this = (op2);
+    }
+
+    //Explicit conversions to C interger types
+    //-----------------------------------------------------------
+    ValType getVal() const {
+        return VAL;
+    }
+    operator ValType () const {
+        return getVal();
+    }
+    INLINE int to_int() const {
+        //      ap_private<64 /* _AP_W */, _AP_S> res(V);
+        return (int) getVal();
+    }
+
+    INLINE unsigned to_uint() const {
+        return (unsigned) getVal();
+    }
+
+    INLINE long to_long() const {
+        return (long) getVal();
+    }
+
+    INLINE unsigned long to_ulong() const {
+        return (unsigned long) getVal();
+    }
+
+    INLINE ap_slong to_int64() const {
+        return (ap_slong) getVal();
+    }
+
+    INLINE ap_ulong to_uint64() const {
+        return (ap_ulong) getVal();
+    }
+
+    INLINE double to_double() const {
+        if (isNegative())
+            return roundToDouble(true);
+        else
+            return roundToDouble(false);
+    }
+
+    INLINE bool isMinValue() const { return VAL == 0;}
+    template<int _AP_W1, bool _AP_S1> INLINE ap_private& operator&=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+        VAL = VAL&RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1> INLINE ap_private& operator|=(const ap_private<_AP_W1, _AP_S1>& RHS) {
+        VAL = VAL|RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1> INLINE ap_private& operator^=(const ap_private<_AP_W1, _AP_S1>& RHS){
+        VAL = VAL^RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1> INLINE ap_private& operator*=(const ap_private<_AP_W1, _AP_S1>& RHS){
+        VAL = VAL*RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1> INLINE ap_private& operator+=(const ap_private<_AP_W1, _AP_S1>& RHS){
+        VAL = VAL+RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+
+    template<int _AP_W1, bool _AP_S1> INLINE ap_private& operator-=(const ap_private<_AP_W1, _AP_S1>& RHS){
+        VAL = VAL-RHS.pVal[0];
+        clearUnusedBits();
+        return *this;
+    }
+    INLINE const ap_private& operator<<=(uint32_t shiftAmt) { VAL<<=shiftAmt; clearUnusedBits(); return *this; }
+
+    template <int _AP_W1, bool _AP_S1> INLINE typename RType<_AP_W1, _AP_S1>::logic operator&(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        if (RType<_AP_W1, _AP_S1>::logic_w <= 64) {
+            typename RType<_AP_W1, _AP_S1>::logic Ret(VAL & RHS.VAL);
+            return Ret;
+        } else {
+            typename RType<_AP_W1, _AP_S1>::logic Ret = *this;
+            return Ret & RHS;
+        }
+    }
+
+    template <int _AP_W1, bool _AP_S1> INLINE typename RType<_AP_W1, _AP_S1>::logic operator^(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        if (RType<_AP_W1, _AP_S1>::logic_w <= 64) {
+            typename RType<_AP_W1, _AP_S1>::logic Ret(VAL ^ RHS.VAL);
+            return Ret;
+        } else {
+            typename RType<_AP_W1, _AP_S1>::logic Ret = *this;
+            return Ret ^ RHS;
+        }
+    }
+
+    template <int _AP_W1, bool _AP_S1> INLINE typename RType<_AP_W1, _AP_S1>::logic operator|(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        if (RType<_AP_W1, _AP_S1>::logic_w <= 64) {
+            typename RType<_AP_W1, _AP_S1>::logic Ret(VAL | RHS.VAL);
+            return Ret;
+        } else {
+            typename RType<_AP_W1, _AP_S1>::logic Ret = *this;
+            return Ret | RHS;
+        }
+    }
+
+    INLINE ap_private<_AP_W, _AP_S> And(const ap_private<_AP_W, _AP_S>& RHS) const {
+        return ap_private<_AP_W, _AP_S>(VAL & RHS.VAL);
+    }
+
+    INLINE ap_private<_AP_W, _AP_S> Or(const ap_private<_AP_W, _AP_S>& RHS) const {
+        return ap_private<_AP_W, _AP_S>(VAL | RHS.VAL);
+    }
+
+    INLINE ap_private<_AP_W, _AP_S> Xor(const ap_private<_AP_W, _AP_S>& RHS) const {
+        return ap_private<_AP_W, _AP_S>(VAL ^ RHS.VAL);
+    }
+#if 1
+    template <int _AP_W1, bool _AP_S1>
+    INLINE typename RType<_AP_W1, _AP_S1>::mult operator*(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        if (RType<_AP_W1, _AP_S1>::mult_w <= 64) {
+            typename RType<_AP_W1, _AP_S1>::mult Result(VAL * RHS.VAL);
+            return Result;
+        } else {
+            typename RType<_AP_W1, _AP_S1>::mult Result = typename RType<_AP_W1, _AP_S1>::mult(*this);
+            Result *= RHS;
+            return Result;
+        }
+    }
+#endif
+    INLINE ap_private<_AP_W, _AP_S> Mul(const ap_private<_AP_W, _AP_S>& RHS) const {
+        return ap_private<_AP_W, _AP_S>(VAL * RHS.VAL);
+    }
+
+    INLINE ap_private<_AP_W, _AP_S> Add(const ap_private<_AP_W, _AP_S>& RHS) const {
+        return ap_private<_AP_W, _AP_S>(VAL + RHS.VAL);
+    }
+
+    INLINE ap_private<_AP_W, _AP_S> Sub(const ap_private<_AP_W, _AP_S>& RHS) const {
+        return ap_private<_AP_W, _AP_S>(VAL - RHS.VAL);
+    }
+
+#if 1
+    INLINE ap_private& operator&=(uint64_t RHS) { VAL &= RHS; clearUnusedBits(); return *this;}
+    INLINE ap_private& operator|=(uint64_t RHS) { VAL |= RHS; clearUnusedBits(); return *this;}
+    INLINE ap_private& operator^=(uint64_t RHS){ VAL ^=  RHS; clearUnusedBits(); return *this;}
+    INLINE ap_private& operator*=(uint64_t RHS){ VAL *= RHS; clearUnusedBits(); return *this; }
+    INLINE ap_private& operator+=(uint64_t RHS){ VAL += RHS; clearUnusedBits(); return *this;}
+    INLINE ap_private& operator-=(uint64_t RHS){ VAL -= RHS; clearUnusedBits(); return *this; }
+    INLINE ap_private operator&(uint64_t RHS) const { return ap_private(VAL & RHS); }
+    INLINE ap_private operator|(uint64_t RHS) const { return ap_private(VAL | RHS); }
+    INLINE ap_private operator^(uint64_t RHS) const { return ap_private(VAL ^ RHS); }
+    INLINE ap_private operator*(uint64_t RHS) const { return ap_private(VAL * RHS); }
+    INLINE ap_private operator/(uint64_t RHS) const { return ap_private(VAL / RHS); }
+    INLINE ap_private operator+(uint64_t RHS) const { return ap_private(VAL + RHS); }
+    INLINE ap_private operator-(uint64_t RHS) const { return ap_private(VAL - RHS); }
+#endif
+    INLINE bool isMinSignedValue() const {
+        static const uint64_t min_mask = ~(~0ULL << (_AP_W-1));
+        return BitWidth == 1 ? VAL == 1 :
+           (ap_private_ops::isNegative<_AP_W>(*this) && ((min_mask & VAL)==0));
+    }
+
+#if 1
+
+    template<int _AP_W1, bool _AP_S1> INLINE
+    typename RType<_AP_W1,_AP_S1>::plus  operator+(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        if (RType<_AP_W1,_AP_S1>::plus_w <=64)
+            return typename RType<_AP_W1,_AP_S1>::plus(RType<_AP_W1,_AP_S1>::plus_s ? int64_t(VAL+RHS.VAL):uint64_t(VAL+RHS.VAL));
+        typename RType<_AP_W1,_AP_S1>::plus Result=RHS;
+        Result += VAL;
+        return Result;
+    }
+
+    template<int _AP_W1, bool _AP_S1> INLINE
+    typename RType<_AP_W1,_AP_S1>::minus  operator-(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        if (RType<_AP_W1,_AP_S1>::minus_w <=64)
+            return typename RType<_AP_W1,_AP_S1>::minus(int64_t(VAL-RHS.VAL));
+        typename RType<_AP_W1,_AP_S1>::minus Result=*this;
+        Result -= RHS;
+        return Result;
+    }
+#endif // #if 1
+
+    INLINE ap_private& flip() {
+        VAL = (~0ULL^VAL)&mask;
+        clearUnusedBits();
+        return *this;
+    }
+
+    uint32_t countPopulation() const { return CountPopulation_64(VAL);}
+    uint32_t countLeadingZeros() const {
+        int remainder = BitWidth % APINT_BITS_PER_WORD;
+        int excessBits = (APINT_BITS_PER_WORD - remainder) % APINT_BITS_PER_WORD;
+        //enum { remainder = BitWidth % APINT_BITS_PER_WORD, excessBits = APINT_BITS_PER_WORD - remainder};
+        uint32_t Count = CountLeadingZeros_64(VAL);
+        if (Count)
+            Count-=excessBits;
+        return AESL_std::min(Count, (uint32_t)_AP_W);
+    }
+
+    /// HiBits - This function returns the high "numBits" bits of this ap_private.
+    ap_private<_AP_W, _AP_S, 1> getHiBits(uint32_t numBits) const {
+        ap_private<_AP_W, _AP_S, 1> ret(*this);
+        ret = (ret)>>(BitWidth - numBits);
+        return ret;
+    }
+
+    /// LoBits - This function returns the low "numBits" bits of this ap_private.
+    ap_private<_AP_W, _AP_S, 1> getLoBits(uint32_t numBits) const {
+        ap_private<_AP_W, _AP_S, 1> ret((VAL) << (BitWidth - numBits));
+        ret = (ret)>>(BitWidth - numBits);
+        return ret;
+        //return ap_private(numBits, (VAL << (BitWidth - numBits))>> (BitWidth - numBits));
+    }
+
+    ap_private<_AP_W, _AP_S,1>& set(uint32_t bitPosition) {
+        VAL |= (1ULL << (bitPosition));
+        clearUnusedBits();
+        return *this; // clearUnusedBits();
+    }
+
+    void set() {
+        VAL = ~0ULL;
+        clearUnusedBits();
+    }
+
+    template<int _AP_W3>
+    INLINE void set(const ap_private<_AP_W3, false> & val) {
+        operator = (ap_private<_AP_W3, _AP_S>(val));
+    }
+
+    INLINE void set(const ap_private & val) {
+        operator = (val);
+    }
+
+    bool operator[](uint32_t bitPosition) const {
+        return (((1ULL << (bitPosition)) & VAL) != 0);
+    }
+
+    INLINE void clearUnusedBits(void) {
+        enum { excess_bits = (_AP_W%APINT_BITS_PER_WORD) ? APINT_BITS_PER_WORD -_AP_W%APINT_BITS_PER_WORD : 0};
+        VAL = _AP_S ? ((((int64_t)VAL)<<(excess_bits))>> (excess_bits)) : (excess_bits ? ((VAL)<<(excess_bits))>>(excess_bits) : VAL);
+    }
+
+    INLINE void clearUnusedBitsToZero(void) {
+        enum { excess_bits = (_AP_W%APINT_BITS_PER_WORD) ? APINT_BITS_PER_WORD -_AP_W%APINT_BITS_PER_WORD : 0};
+        static uint64_t mask = ~0ULL >> (excess_bits);
+        VAL &= mask;
+    }
+
+    template<bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1> udiv(const ap_private<_AP_W, _AP_S1>& RHS) const {
+        return ap_private<_AP_W, _AP_S||_AP_S1>(VAL / RHS.VAL);
+    }
+
+    INLINE  ap_private udiv(uint64_t RHS) const {
+        return ap_private(VAL / RHS);
+    }
+
+    /// Signed divide this ap_private by ap_private RHS.
+    /// @brief Signed division function for ap_private.
+    template<bool _AP_S1>
+    INLINE ap_private<_AP_W, _AP_S||_AP_S1> sdiv(const ap_private<_AP_W, _AP_S1> & RHS) const {
+        if (isNegative())
+            if (RHS.isNegative())
+                return (-(*this)).udiv(-RHS);
+            else
+                return -((-(*this)).udiv(RHS));
+        else if (RHS.isNegative())
+            return -(this->udiv(-RHS));
+        return this->udiv(RHS);
+    }
+
+    /// Signed divide this ap_private by ap_private RHS.
+    /// @brief Signed division function for ap_private.
+    INLINE ap_private sdiv(int64_t RHS) const {
+        if (isNegative())
+            if (RHS<0)
+                return (-(*this)).udiv(-RHS);
+            else
+                return -((-(*this)).udiv(RHS));
+        else if (RHS<0)
+            return -(this->udiv(-RHS));
+        return this->udiv(RHS);
+    }
+
+    template<bool _AP_S2>
+    INLINE  ap_private urem(const ap_private<_AP_W, _AP_S2>& RHS) const {
+        assert(RHS.VAL != 0 && "Divide by 0");
+        return ap_private(VAL%RHS.VAL);
+    }
+
+    INLINE  ap_private urem(uint64_t RHS) const {
+        assert(RHS != 0 && "Divide by 0");
+        return ap_private(VAL%RHS);
+    }
+
+    /// Signed remainder operation on ap_private.
+    /// @brief Function for signed remainder operation.
+    template<bool _AP_S2>
+    INLINE ap_private srem(const ap_private<_AP_W, _AP_S2>& RHS) const {
+        if (isNegative()) {
+            ap_private lhs = -(*this);
+            if (RHS.isNegative()) {
+                ap_private rhs = -RHS;
+                return -(lhs.urem(rhs));
+            } else
+                return -(lhs.urem(RHS));
+        } else if (RHS.isNegative()) {
+            ap_private rhs = -RHS;
+            return this->urem(rhs);
+        }
+        return this->urem(RHS);
+    }
+
+    /// Signed remainder operation on ap_private.
+    /// @brief Function for signed remainder operation.
+    INLINE ap_private srem(int64_t RHS) const {
+        if (isNegative())
+            if (RHS<0)
+                return -((-(*this)).urem(-RHS));
+            else
+                return -((-(*this)).urem(RHS));
+        else if (RHS<0)
+            return this->urem(-RHS);
+        return this->urem(RHS);
+    }
+
+    INLINE static void udivrem(const ap_private &LHS, const ap_private &RHS,
+                               ap_private &Quotient, ap_private &Remainder){
+        assert(RHS!=0 && "Divide by 0");
+        Quotient = LHS.VAl/RHS.VAl;
+        Remainder = LHS.VAL % RHS.VAL;
+    }
+
+    INLINE static void udivrem(const ap_private &LHS, uint64_t RHS,
+                               ap_private &Quotient, ap_private &Remainder){
+        assert(RHS!=0 && "Divide by 0");
+        Quotient = LHS.VAl/RHS;
+        Remainder = LHS.VAL % RHS;
+    }
+
+    INLINE static void sdivrem(const ap_private &LHS, const ap_private &RHS,
+                               ap_private &Quotient, ap_private &Remainder) {
+        if (LHS.isNegative()) {
+            if (RHS.isNegative())
+                ap_private::udivrem(-LHS, -RHS, Quotient, Remainder);
+            else
+                ap_private::udivrem(-LHS, RHS, Quotient, Remainder);
+            Quotient = -Quotient;
+            Remainder = -Remainder;
+        } else if (RHS.isNegative()) {
+            ap_private::udivrem(LHS, -RHS, Quotient, Remainder);
+            Quotient = -Quotient;
+        } else {
+            ap_private::udivrem(LHS, RHS, Quotient, Remainder);
+        }
+    }
+
+    INLINE static void sdivrem(const ap_private &LHS, int64_t RHS,
+            ap_private &Quotient, ap_private &Remainder) {
+        if (LHS.isNegative()) {
+            if (RHS<0)
+                ap_private::udivrem(-LHS, -RHS, Quotient, Remainder);
+            else
+                ap_private::udivrem(-LHS, RHS, Quotient, Remainder);
+            Quotient = -Quotient;
+            Remainder = -Remainder;
+        } else if (RHS<0) {
+            ap_private::udivrem(LHS, -RHS, Quotient, Remainder);
+            Quotient = -Quotient;
+        } else {
+            ap_private::udivrem(LHS, RHS, Quotient, Remainder);
+        }
+    }
+
+    template <int _AP_W1, bool _AP_S1> INLINE bool eq(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return (*this) == RHS;
+    }
+
+    template <int _AP_W1, bool _AP_S1> INLINE bool ne(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return !((*this) == RHS);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// the validity of the less-than relationship.
+    /// @returns true if *this < RHS when both are considered unsigned.
+    /// @brief Unsigned less than comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool ult(const ap_private<_AP_W1, _AP_S1, 1>& RHS) const {
+        uint64_t lhsZext = ((uint64_t(VAL)) << (64-_AP_W)) >> (64-_AP_W);
+        uint64_t rhsZext = ((uint64_t(RHS.VAL)) << (64-_AP_W1)) >> (64-_AP_W1);
+        return lhsZext < rhsZext;
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// validity of the less-than relationship.
+    /// @returns true if *this < RHS when both are considered signed.
+    /// @brief Signed less than comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool slt(const ap_private<_AP_W1, _AP_S1, 1>& RHS) const {
+        int64_t lhsSext = ((int64_t(VAL)) << (64-_AP_W)) >> (64-_AP_W);
+        int64_t rhsSext = ((int64_t(RHS.VAL)) << (64-_AP_W1)) >> (64-_AP_W1);
+        return lhsSext < rhsSext;
+    }
+
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// validity of the less-or-equal relationship.
+    /// @returns true if *this <= RHS when both are considered unsigned.
+    /// @brief Unsigned less or equal comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool ule(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return ult(RHS) || eq(RHS);
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// validity of the less-or-equal relationship.
+    /// @returns true if *this <= RHS when both are considered signed.
+    /// @brief Signed less or equal comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool sle(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return slt(RHS) || eq(RHS);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// the validity of the greater-than relationship.
+    /// @returns true if *this > RHS when both are considered unsigned.
+    /// @brief Unsigned greather than comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool ugt(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return !ult(RHS) && !eq(RHS);
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// the validity of the greater-than relationship.
+    /// @returns true if *this > RHS when both are considered signed.
+    /// @brief Signed greather than comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool sgt(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return !slt(RHS) && !eq(RHS);
+    }
+
+    /// Regards both *this and RHS as unsigned quantities and compares them for
+    /// validity of the greater-or-equal relationship.
+    /// @returns true if *this >= RHS when both are considered unsigned.
+    /// @brief Unsigned greater or equal comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool uge(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return !ult(RHS);
+    }
+
+    /// Regards both *this and RHS as signed quantities and compares them for
+    /// validity of the greater-or-equal relationship.
+    /// @returns true if *this >= RHS when both are considered signed.
+    /// @brief Signed greather or equal comparison
+    template <int _AP_W1, bool _AP_S1> INLINE bool sge(const ap_private<_AP_W1, _AP_S1>& RHS) const {
+        return !slt(RHS);
+    }
+
+    INLINE ap_private abs() const {
+        if (isNegative())
+            return -(*this);
+        return *this;
+    }
+
+    ap_private<_AP_W, false> get() const {
+        ap_private<_AP_W,false> ret(*this);
+        return ret;
+    }
+
+    INLINE static uint32_t getBitsNeeded(const char* str, uint32_t slen, uint8_t radix) {
+        return _AP_W;
+    }
+
+    INLINE uint32_t getActiveBits() const {
+        uint32_t bits=_AP_W - countLeadingZeros();
+        return bits?bits:1;
+    }
+
+    INLINE double roundToDouble(bool isSigned=false) const {
+        const static uint64_t mask = ~0ULL << (APINT_BITS_PER_WORD - _AP_W);
+        return double(VAL);
+    }
+
+    INLINE unsigned length() const { return _AP_W; }
+
+    /*Reverse the contents of ap_private instance. I.e. LSB becomes MSB and vise versa*/
+    INLINE ap_private& reverse () {
+        for (int i = 0; i < _AP_W/2; ++i) {
+            bool tmp = operator[](i);
+            if (operator[](_AP_W - 1 - i))
+                set(i);
+            else
+                clear(i);
+            if (tmp)
+                set(_AP_W - 1 - i);
+            else
+                clear(_AP_W - 1 - i);
+        }
+        clearUnusedBits();
+        return *this;
+    }
+
+    /*Return true if the value of ap_private instance is zero*/
+    INLINE bool iszero () const {
+        return isMinValue();
+    }
+
+    /* x < 0 */
+    INLINE bool sign () const {
+        if (isNegative())
+            return true;
+        return false;
+    }
+
+    /* x[i] = !x[i] */
+    INLINE void invert (int i) {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        flip(i);
+    }
+
+    /* x[i] */
+    INLINE bool test (int i) const {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        return operator[](i);
+    }
+
+    //This is used for sc_lv and sc_bv, which is implemented by sc_uint
+    //Rotate an ap_private object n places to the left
+    INLINE void lrotate(int n) {
+        assert( n >= 0 && "Attempting to shift negative index");
+        assert( n < _AP_W && "Shift value larger than bit width");
+        operator =  (shl(n) | lshr(_AP_W - n));
+    }
+
+    //This is used for sc_lv and sc_bv, which is implemented by sc_uint
+    //Rotate an ap_private object n places to the right
+    INLINE void rrotate(int n) {
+        assert( n >= 0 && "Attempting to shift negative index");
+        assert( n < _AP_W && "Shift value larger than bit width");
+        operator = (lshr(n) | shl(_AP_W - n));
+    }
+
+    //Set the ith bit into v
+    INLINE void set (int i, bool v) {
+        assert( i >= 0 && "Attempting to write bit with negative index");
+        assert( i < _AP_W && "Attempting to write bit beyond MSB");
+        v ? set(i) : clear(i);
+    }
+
+    //Set the ith bit into v
+    INLINE void set_bit (int i, bool v) {
+        assert( i >= 0 && "Attempting to write bit with negative index");
+        assert( i < _AP_W && "Attempting to write bit beyond MSB");
+        v ? set(i) : clear(i);
+    }
+
+    //Get the value of ith bit
+    INLINE bool get_bit (int i) const {
+        assert( i >= 0 && "Attempting to read bit with negative index");
+        assert( i < _AP_W && "Attempting to read bit beyond MSB");
+        return operator [](i);
+    }
+
+    //complements every bit
+    INLINE void b_not() {
+        flip();
+    }
+
+    //Binary Arithmetic
+    //-----------------------------------------------------------
+#define OP_BIN_AP(Sym,Rty, Fun)                                        \
+    template<int _AP_W2, bool _AP_S2>                                \
+    INLINE                                                        \
+    typename RType<_AP_W2,_AP_S2>::Rty                                \
+    operator Sym (const ap_private<_AP_W2,_AP_S2>& op) const {        \
+        typename RType<_AP_W2,_AP_S2>::Rty lhs(*this);                \
+        typename RType<_AP_W2,_AP_S2>::Rty rhs(op);                \
+        return lhs.Fun(rhs);                                        \
+    }                                                                \
+
+    ///Bitwise and, or, xor
+    //OP_BIN_AP(&,logic, And)
+    //OP_BIN_AP(|,logic, Or)
+    //OP_BIN_AP(^,logic, Xor)
+
+#undef OP_BIN_AP
+    template<int _AP_W2, bool _AP_S2>
+    INLINE typename RType<_AP_W2,_AP_S2>::div
+    operator / (const ap_private<_AP_W2,_AP_S2>&op) const {
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S> lhs=ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S>(*this);
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S> rhs=ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2)+(_AP_S||_AP_S2), _AP_S>(op);
+        return typename RType<_AP_W2,_AP_S2>::div((_AP_S||_AP_S2)?lhs.sdiv(rhs):lhs.udiv(rhs));
+    }
+
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE typename RType<_AP_W2,_AP_S2>::mod
+    operator % (const ap_private<_AP_W2,_AP_S2>&op) const {
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S||_AP_S2> lhs=*this;
+        ap_private<AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2)), _AP_S||_AP_S2> rhs=op;
+        typename RType<_AP_W2,_AP_S2>::mod res = typename RType<_AP_W2,_AP_S2>::mod (_AP_S?lhs.srem(rhs):lhs.urem(rhs));
+        return res;
+    }
+
+
+#define OP_ASSIGN_AP_2(Sym) \
+    template<int _AP_W2, bool _AP_S2> \
+    INLINE ap_private<_AP_W, _AP_S>& operator Sym##=(const ap_private<_AP_W2,_AP_S2>& op) \
+    { \
+        *this=operator Sym (op); \
+        return *this; \
+    } \
+
+    OP_ASSIGN_AP_2(/)
+    OP_ASSIGN_AP_2(%)
+#undef OP_ASSIGN_AP_2
+
+    ///Bitwise assign: and, or, xor
+    //-------------------------------------------------------------
+    //    OP_ASSIGN_AP(&)
+    //    OP_ASSIGN_AP(^)
+    //    OP_ASSIGN_AP(|)
+#undef OP_ASSIGN_AP
+#if 1
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private<_AP_W, _AP_S>
+    operator << (const ap_private<_AP_W2, _AP_S2>& op2) const {
+        uint32_t sh=op2.to_uint();
+        return *this << sh;
+    }
+
+    INLINE ap_private<_AP_W, _AP_S>
+    operator << (uint32_t sh) const {
+        return shl(sh);
+    }
+
+#endif
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_private<_AP_W, _AP_S>
+    operator >> (const ap_private<_AP_W2, _AP_S2>& op2) const {
+        uint32_t sh = op2.to_uint();
+        return *this >> sh;
+    }
+
+    INLINE ap_private<_AP_W, _AP_S>
+    operator >>(uint32_t sh) const {
+        ap_private<_AP_W, _AP_S> r(*this);
+        bool overflow=(sh>=_AP_W);
+        bool neg_v=r.isNegative();
+        if(_AP_S) {
+            if(overflow)
+                neg_v?r.set():r.clear();
+            else
+                return r.ashr(sh);
+        } else {
+            if(overflow)
+                r.clear();
+            else
+                return r.lshr(sh);
+        }
+        return r;
+    }
+
+    ///Shift assign
+    //------------------------------------------------------------------
+#define OP_ASSIGN_AP_3_SINGLE(Sym)                                                \
+    template<int _AP_W2, bool _AP_S2>                                        \
+    INLINE ap_private<_AP_W, _AP_S>& operator Sym##=(const ap_private<_AP_W2,_AP_S2>& op) \
+    {                                                                        \
+        *this=operator Sym (op.getVal());                                        \
+        return *this;                                                        \
+    }
+    OP_ASSIGN_AP_3_SINGLE(>>)
+#undef OP_ASSIGN_AP_3_SINGLE
+
+    ///Comparisons
+    //-----------------------------------------------------------------
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator != (const ap_private<_AP_W2, _AP_S2, 1>& op) const {
+        return !(*this==op);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator > (const ap_private<_AP_W2, _AP_S2, 1>& op) const {
+        return op < *this;
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator <= (const ap_private<_AP_W2, _AP_S2, 1>& op) const {
+        return !(*this>op);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator < (const ap_private<_AP_W2, _AP_S2, 1>& op) const {
+        enum { _AP_MAX_W = AP_MAX(_AP_W+(_AP_S||_AP_S2),_AP_W2+(_AP_S||_AP_S2))};
+        ap_private<_AP_MAX_W, _AP_S> lhs(*this);
+        ap_private<_AP_MAX_W, _AP_S2> rhs(op);
+        if (_AP_S == _AP_S2)
+            return _AP_S?lhs.slt(rhs):lhs.ult(rhs);
+        else if (_AP_W < 32 && _AP_W2 < 32)
+            return lhs.slt(rhs);
+        else
+            if (_AP_S)
+                if (_AP_W2 >= _AP_W)
+                    return lhs.ult(rhs);
+                else
+                    return lhs.slt(rhs);
+            else
+                if (_AP_W >= _AP_W2)
+                    return lhs.ult(rhs);
+                else
+                    return lhs.slt(rhs);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator >=(const ap_private<_AP_W2, _AP_S2, 1>& op) const {
+        return !(*this<op);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE bool operator == (const ap_private<_AP_W2, _AP_S2, _AP_N2>& op) const {
+        return op == *this;
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE bool operator != (const ap_private<_AP_W2, _AP_S2, _AP_N2>& op) const {
+        return !(op==*this);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE bool operator > (const ap_private<_AP_W2, _AP_S2, _AP_N2>& op) const {
+        return op < (*this);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE bool operator <= (const ap_private<_AP_W2, _AP_S2, _AP_N2>& op) const {
+        return op >= *this;
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE bool operator <(const ap_private<_AP_W2, _AP_S2, _AP_N2>& op) const {
+        return op > *this;
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_N2>
+    INLINE bool operator >=(const ap_private<_AP_W2,_AP_S2,_AP_N2>& op) const {
+        return op <= *this;
+    }
+    ///Bit and Part Select
+    //--------------------------------------------------------------
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (int Hi, int Lo) {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (int Hi, int Lo) const {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(const_cast<ap_private<_AP_W,
+                _AP_S>*>(this), Hi, Lo);
+    }
+
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (int Hi, int Lo) const {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>((const_cast<ap_private<_AP_W,
+                    _AP_S>*> (this)), Hi, Lo);
+    }
+
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (int Hi, int Lo) {
+        assert((Hi < _AP_W) && (Lo < _AP_W)&&"Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+           const ap_private<_AP_W3, _AP_S3> &LoIdx) {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+                 const ap_private<_AP_W3, _AP_S3> &LoIdx) {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(this, Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    range (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+           const ap_private<_AP_W3, _AP_S3> &LoIdx) const {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        assert((Hi < _AP_W) && (Lo < _AP_W) && "Out of bounds in range()");
+        return ap_range_ref<_AP_W,_AP_S>(const_cast<ap_private*>(this), Hi, Lo);
+    }
+
+    template<int _AP_W2, bool _AP_S2, int _AP_W3, bool _AP_S3>
+    INLINE ap_range_ref<_AP_W,_AP_S>
+    operator () (const ap_private<_AP_W2, _AP_S2> &HiIdx,
+                 const ap_private<_AP_W3, _AP_S3> &LoIdx) const {
+        int Hi = HiIdx.to_int();
+        int Lo = LoIdx.to_int();
+        return this->range(Hi, Lo);
+    }
+
+
+    INLINE ap_bit_ref<_AP_W,_AP_S> operator [] (uint32_t index) {
+        assert(index >= 0&&"Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S> (*this, (int)index);
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref<_AP_W,_AP_S> operator [] (const ap_private<_AP_W2,_AP_S2> &index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index.to_int() );
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool operator [] (const ap_private<_AP_W2,_AP_S2>& index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        ap_bit_ref<_AP_W,_AP_S> br =operator [] (index);
+        return br.to_bool();
+    }
+
+    INLINE ap_bit_ref<_AP_W,_AP_S> bit (int index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index );
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE ap_bit_ref<_AP_W,_AP_S> bit (const ap_private<_AP_W2,_AP_S2> &index) {
+        assert(index >= 0 && "Attempting to read bit with negative index");
+        assert(index < _AP_W &&"Attempting to read bit beyond MSB");
+        return ap_bit_ref<_AP_W,_AP_S>( *this, index.to_int() );
+    }
+
+    INLINE bool bit (int index) const {
+        assert(index >= 0 &&  "Attempting to read bit with negative index");
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        ap_bit_ref<_AP_W,_AP_S> br(const_cast<ap_private<_AP_W, _AP_S>*>(this), index);
+        return br.to_bool();
+    }
+
+    template<int _AP_W2, bool _AP_S2>
+    INLINE bool bit (const ap_private<_AP_W2,_AP_S2>& index) const {
+        assert(index < _AP_W && "Attempting to read bit beyond MSB");
+        ap_bit_ref<_AP_W,_AP_S> br = bit(index);
+        return br.to_bool();
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W,ap_private<_AP_W, _AP_S>,_AP_W2,ap_private<_AP_W2,_AP_S2> > concat(const ap_private<_AP_W2,_AP_S2>& a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_private<_AP_W2,_AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                                                                                                  const_cast<ap_private<_AP_W2,_AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W,ap_private<_AP_W, _AP_S>,_AP_W2,ap_private<_AP_W2,_AP_S2> > concat(ap_private<_AP_W2,_AP_S2>& a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_private<_AP_W2,_AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (const ap_private<_AP_W2, _AP_S2>& a2) const {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this), const_cast<ap_private<_AP_W2,_AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (const ap_private<_AP_W2, _AP_S2>& a2) {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(*this, const_cast<ap_private<_AP_W2,_AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (ap_private<_AP_W2, _AP_S2>& a2) const {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this), a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2, _AP_S2> >
+    operator, (ap_private<_AP_W2, _AP_S2>& a2) {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, ap_private<_AP_W2,
+                 _AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_range_ref<_AP_W2, _AP_S2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+                             ap_range_ref<_AP_W2, _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                             const_cast<ap_range_ref<_AP_W2, _AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2, ap_range_ref<_AP_W2, _AP_S2> >
+    operator, (ap_range_ref<_AP_W2, _AP_S2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2,
+                             ap_range_ref<_AP_W2, _AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+    operator, (const ap_bit_ref<_AP_W2, _AP_S2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+                  ap_bit_ref<_AP_W2, _AP_S2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                  const_cast<ap_bit_ref<_AP_W2, _AP_S2>& >(a2));
+    }
+
+    template <int _AP_W2, bool _AP_S2>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1, ap_bit_ref<_AP_W2, _AP_S2> >
+    operator, (ap_bit_ref<_AP_W2, _AP_S2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, 1,
+                  ap_bit_ref<_AP_W2, _AP_S2> >(*this, a2);
+    }
+
+    template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3, ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3,
+                             ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                      const_cast<ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& >(a2));
+    }
+
+    template <int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3, ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >
+    operator, (ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> &a2) {
+        return ap_concat_ref<_AP_W, ap_private<_AP_W, _AP_S>, _AP_W2+_AP_W3,
+                             ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3> >(*this, a2);
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                const_cast<af_range_ref<_AP_W2,_AP_I2, _AP_S2, _AP_Q2,
+                _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (af_range_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private, _AP_W2, af_range_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (const af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) const {
+        return ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(const_cast<ap_private<_AP_W,_AP_S>& >(*this),
+                const_cast<af_bit_ref<_AP_W2,_AP_I2, _AP_S2, _AP_Q2,
+                _AP_O2, _AP_N2>& >(a2));
+    }
+
+    template <int _AP_W2, int _AP_I2, bool _AP_S2, ap_q_mode _AP_Q2, ap_o_mode _AP_O2, int _AP_N2>
+    INLINE ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >
+    operator, (af_bit_ref<_AP_W2, _AP_I2, _AP_S2, _AP_Q2,
+               _AP_O2, _AP_N2> &a2) {
+        return ap_concat_ref<_AP_W, ap_private, 1, af_bit_ref<_AP_W2,
+                _AP_I2, _AP_S2, _AP_Q2, _AP_O2, _AP_N2> >(*this, a2);
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private<AP_MAX(_AP_W2+_AP_W3,_AP_W), _AP_S>
+    operator & (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+        return *this & a2.get();
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private<AP_MAX(_AP_W2+_AP_W3,_AP_W), _AP_S>
+    operator | (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+        return *this | a2.get();
+    }
+
+    template<int _AP_W2, typename _AP_T2, int _AP_W3, typename _AP_T3>
+    INLINE ap_private<AP_MAX(_AP_W2+_AP_W3,_AP_W), _AP_S>
+    operator ^ (const ap_concat_ref<_AP_W2, _AP_T2, _AP_W3, _AP_T3>& a2) {
+        return *this ^ a2.get();
+    }
+
+
+    //Reduce operation
+    //-----------------------------------------------------------
+    INLINE bool and_reduce() const {
+        return (VAL & mask) == mask;
+    }
+
+    INLINE bool nand_reduce() const {
+        return (VAL & mask) != mask;
+    }
+
+    INLINE bool or_reduce() const {
+        return (bool)VAL;
+    }
+
+    INLINE bool nor_reduce() const {
+        return VAL==0;
+    }
+
+    INLINE bool xor_reduce() const {
+        unsigned int i=countPopulation();
+        return (i%2)?true:false;
+    }
+
+    INLINE bool xnor_reduce() const {
+        unsigned int i=countPopulation();
+        return (i%2)?false:true;
+    }
+
+    INLINE std::string to_string(uint8_t radix=2, bool sign=false) const {
+        return toString(radix, radix==10?_AP_S:sign);
+    }
+};
+template<int _AP_W, bool _AP_S>
+std::string ap_private<_AP_W, _AP_S, 1>::toString(uint8_t radix, bool wantSigned) const {
+    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+            "Radix should be 2, 8, 10, or 16!");
+    static const char *digits[] = {
+        "0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"
+    };
+    std::string result;
+    if (radix != 10) {
+        // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+        // because the number of bits per digit (1,3 and 4 respectively) divides
+        // equaly. We just shift until there value is zero.
+
+        // First, check for a zero value and just short circuit the logic below.
+        if (*this == (uint64_t)(0))
+            result = "0";
+        else {
+            ap_private<_AP_W, false, 1> tmp(*this);
+            size_t insert_at = 0;
+            if (wantSigned && isNegative()) {
+                // They want to print the signed version and it is a negative value
+                // Flip the bits and add one to turn it into the equivalent positive
+                // value and put a '-' in the result.
+                tmp.flip();
+                tmp++;
+                result = "-";
+                insert_at = 1;
+            }
+            // Just shift tmp right for each digit width until it becomes zero
+            uint32_t shift = (radix == 16 ? 4 : (radix == 8 ? 3 : 1));
+            uint64_t mask = radix - 1;
+            ap_private<_AP_W, false, 1> zero(0);
+            while (tmp.ne(zero)) {
+                unsigned digit = (unsigned)(tmp.VAL & mask);
+                result.insert(insert_at, digits[digit]);
+                tmp = tmp.lshr(shift);
+            }
+        }
+        return result;
+    }
+
+    ap_private<_AP_W, false, 1> tmp(*this);
+    ap_private<6, false, 1> divisor(radix);
+    ap_private<_AP_W, _AP_S, 1> zero(0);
+    size_t insert_at = 0;
+    if (wantSigned && isNegative()) {
+        // They want to print the signed version and it is a negative value
+        // Flip the bits and add one to turn it into the equivalent positive
+        // value and put a '-' in the result.
+        tmp.flip();
+        tmp++;
+        result = "-";
+        insert_at = 1;
+    }
+    if (tmp == ap_private<_AP_W, false, 1>(0ULL))
+        result = "0";
+    else while (tmp.ne(zero)) {
+        ap_private<_AP_W, false, 1> APdigit = tmp%divisor;
+        ap_private<_AP_W, false, 1> tmp2 = tmp/divisor;
+        uint32_t digit = (uint32_t)(APdigit.getZExtValue());
+        assert(digit < radix && "divide failed");
+        result.insert(insert_at,digits[digit]);
+        tmp = tmp2;
+    }
+    return result;
+
+}
+
+#endif /* #ifndef LLVM_SUPPORT_MATHEXTRAS_H */
\ No newline at end of file
diff --git a/hls_2018/router_03_boardstr/main.cpp b/hls_2018/router_03_boardstr/main.cpp
new file mode 100755
index 0000000..2de79fc
--- /dev/null
+++ b/hls_2018/router_03_boardstr/main.cpp
@@ -0,0 +1,99 @@
+/**
+ * main.cpp
+ *
+ * for Vivado HLS
+ */
+
+#ifdef SOFTWARE
+#include "ap_int.h"
+#else
+#include <ap_int.h>
+#endif
+
+#ifdef CALCTIME
+#include <stdio.h>
+#include <time.h>
+#endif
+
+#include "router.hpp"
+
+#define PRINT_SOLUTION
+
+
+int main(int argc, char *argv[]) {
+    using namespace std;
+
+    // Test data // 
+    // NL_Q00.txt
+    //char boardstr[BOARDSTR_SIZE] = "X10Y05Z3L0000107041L0004107002L0102102021L0900100003";
+    // NL_Q06.txt
+    char boardstr[BOARDSTR_SIZE] = "X10Y18Z2L0900109002L0901105012L0902103052L0903103062L0904100102L0905106012L0906109022L0717109102L0808109112L0017209172L0401200072L0912208152L0009201092L0709209092L0901206052L0309204092L0701209072L0101201022L0011202152L0016202162";
+    // NL_Q08.txt
+    //char boardstr[BOARDSTR_SIZE] = "X17Y20Z2L0000103022L1603115052L0916107032L0302108012L1104111042L1002100002L0919116162L1616113182L1001115012L0500201182L1603213152L0600210022";
+    char boardstr_high[BOARDSTR_SIZE] = {};
+
+    // Read boardstr from command line
+    if (1 < argc) {
+        // From stdin
+        if(argv[1][0]!='X')
+        {
+            char* c_p=fgets(boardstr, BOARDSTR_SIZE, stdin);
+            int length=strlen(c_p);
+            boardstr[length-1]=0;
+        }
+        else
+        {
+            strcpy(boardstr, argv[1]);
+        }
+    }
+
+    // Seed value
+    int seed = 12345;
+    if (2 < argc) {
+        seed = atoi(argv[2]);
+    }
+
+#ifdef PRINT_SOLUTION
+    int size_x = (boardstr[1] - '0') * 10 + (boardstr[2] - '0');
+    int size_y = (boardstr[4] - '0') * 10 + (boardstr[5] - '0');
+    int size_z = (boardstr[7] - '0');
+#endif
+
+    // Solver
+    ap_int<32> status;
+    clock_t clock_start, clock_done;
+    clock_start = clock();
+    bool result = pynqrouter(boardstr, boardstr_high, seed, &status);
+    clock_done = clock();
+    if (result) {
+        cout << endl << "Test Passed!" << endl;
+    } else {
+        cout << endl << "Test Failed!" << endl;
+    }
+    cout << "status = " << (int)status << endl;
+    cout << "elapsed = " << ((double)(clock_done - clock_start) / CLOCKS_PER_SEC) << endl << endl;
+
+#ifdef PRINT_SOLUTION
+    cout << "SOLUTION" << endl;
+    cout << "========" << endl;
+    cout << "SIZE " << size_x << "X" << size_y << "X" << size_z << endl;
+    for (int z = 0; z < size_z; z++) {
+        cout << "LAYER " << (z + 1) << endl;
+        for (int y = 0; y < size_y; y++) {
+            for (int x = 0; x < size_x; x++) {
+                if (x != 0) {
+                    cout << ",";
+                }
+                int i = ((x * MAX_WIDTH + y) << BITWIDTH_Z) | z;
+                unsigned int num = (unsigned char)(boardstr[i]) + ((unsigned char)(boardstr_high[i]) << 8);
+                cout << setfill('0') << setw(3) << right << num;
+                //cout << num;
+            }
+            cout << endl;
+        }
+    }
+#endif
+
+    return 0;
+}
+
diff --git a/hls_2018/router_03_boardstr/router.cpp b/hls_2018/router_03_boardstr/router.cpp
new file mode 100755
index 0000000..93ea273
--- /dev/null
+++ b/hls_2018/router_03_boardstr/router.cpp
@@ -0,0 +1,518 @@
+/**
+ * router.cpp
+ *
+ * for Vivado HLS
+ */
+
+#ifdef SOFTWARE
+#include "ap_int.h"
+#else
+#include <ap_int.h>
+#endif
+
+#include "./router.hpp"
+
+// Set weight
+ap_uint<8> new_weight(ap_uint<16> x) {
+#pragma HLS INLINE
+    // K. Terada: y = 1~32 (8bit)
+    ap_uint<8> y;
+    y = ((x & 255) >> 3) + 1;
+    return y;
+}
+
+
+// Global values
+static ap_uint<7> size_x;    // X
+static ap_uint<7> size_y;    // Y
+static ap_uint<4> size_z;    // Z
+
+static ap_uint<LINE_BIT> line_num = 0; // #Lines
+
+#ifdef DEBUG_PRINT
+int max_queue_length;    // Max length of priority queue
+int max_search_count;    // Max count of queue pop
+int max_buffer_length;   // Max length of line buffer
+#endif
+
+
+bool pynqrouter(char boardstr[BOARDSTR_SIZE], char boardstr_high[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status) {
+#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS
+#pragma HLS INTERFACE s_axilite port=boardstr_high bundle=AXI4LS
+#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS
+#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS
+#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS
+
+    // status(0:Solved, 1:Not solved)
+    *status = -1;
+
+    // For all lines
+    ap_uint<CELL_BIT> paths[MAX_BUFFER];    // Line buffer
+
+    // For each line
+    // Note: Should not partition completely
+    bool adjacents[MAX_LINES];              // Line has adjacent terminals?
+    ap_uint<CELL_BIT> starts[MAX_LINES];    // Start list
+    ap_uint<CELL_BIT> goals[MAX_LINES];     // Goal list
+    ap_uint<BUFF_BIT> s_idx[MAX_LINES];     // Start point on line buffer
+
+    ap_uint<8> weights[MAX_CELLS];          // Weight of each cell
+    // Note: Should not partition weight array
+    // since each element will be accessed in "random" order
+
+
+    // ================================
+    // (Step.0) Initialization (BEGIN)
+    // ================================
+
+    // Note: Loop counter -> need an extra bit (for condition determination)
+
+    INIT_WEIGHTS:
+    for (ap_uint<CELL_BIT> i = 0; i < (ap_uint<CELL_BIT>)(MAX_CELLS); i++) {
+        weights[i] = 1;
+    }
+
+    /// Parse ///
+    size_x = (boardstr[1] - '0') * 10 + (boardstr[2] - '0');
+    size_y = (boardstr[4] - '0') * 10 + (boardstr[5] - '0');
+    size_z = (boardstr[7] - '0');
+
+    INIT_BOARDS:
+    for (ap_uint<CELL_BIT> idx = 8; idx < (ap_uint<CELL_BIT>)(BOARDSTR_SIZE); idx+=11) {
+
+    	// NULL-terminated
+        if (boardstr[idx] == 0) break;
+
+        // Start & Goal of each line
+        ap_uint<7> s_x = (boardstr[idx+1] - '0') * 10 + (boardstr[idx+2] - '0');
+        ap_uint<7> s_y = (boardstr[idx+3] - '0') * 10 + (boardstr[idx+4] - '0');
+        ap_uint<3> s_z = (boardstr[idx+5] - '0') - 1;
+        ap_uint<7> g_x = (boardstr[idx+6] - '0') * 10 + (boardstr[idx+7] - '0');
+        ap_uint<7> g_y = (boardstr[idx+8] - '0') * 10 + (boardstr[idx+9] - '0');
+        ap_uint<3> g_z = (boardstr[idx+10] - '0') - 1;
+
+        ap_uint<CELL_BIT> start_id = (((ap_uint<CELL_BIT>)s_x * MAX_WIDTH + (ap_uint<CELL_BIT>)s_y) << BITWIDTH_Z) | (ap_uint<CELL_BIT>)s_z;
+        ap_uint<CELL_BIT> goal_id  = (((ap_uint<CELL_BIT>)g_x * MAX_WIDTH + (ap_uint<CELL_BIT>)g_y) << BITWIDTH_Z) | (ap_uint<CELL_BIT>)g_z;
+        starts[line_num] = start_id;
+        goals[line_num] = goal_id;
+        weights[start_id] = MAX_WEIGHT;
+        weights[goal_id] = MAX_WEIGHT;
+
+        // Line has adjacent terminals?
+        adjacents[line_num] = false;
+        ap_int<8> dx = (ap_int<8>)g_x - (ap_int<8>)s_x; // Min: -71, Max: 71 (Signed 8bit)
+        ap_int<8> dy = (ap_int<8>)g_y - (ap_int<8>)s_y; // Min: -71, Max: 71 (Signed 8bit)
+        ap_int<4> dz = (ap_int<4>)g_z - (ap_int<4>)s_z; // Min:  -7, Max:  7 (Signed 4bit)
+        if ((dx == 0 && dy == 0 && (dz == 1 || dz == -1)) || (dx == 0 && (dy == 1 || dy == -1) && dz == 0) || ((dx == 1 || dx == -1) && dy == 0 && dz == 0)) {
+            adjacents[line_num] = true;
+        }
+
+        line_num++;
+    }
+
+    // ================================
+    // (Step.0) Initialization (END)
+    // ================================
+
+#ifdef DEBUG_PRINT
+    max_queue_length = 0;
+    max_search_count = 0;
+    max_buffer_length = 0;
+#endif
+
+    ap_uint<BUFF_BIT> pointer = 0; // Pointer for line buffer
+
+    // ================================
+    // (Step.1) Initial Routing (BEGIN)
+    // ================================
+
+#ifdef DEBUG_PRINT
+    cout << "Initial Routing ..." << endl;
+#endif
+
+    FIRST_ROUTING:
+    for (ap_uint<LINE_BIT> i = 0; i < (ap_uint<LINE_BIT>)(line_num); i++) {
+#pragma HLS LOOP_TRIPCOUNT min=2 max=999
+
+        s_idx[i] = pointer;
+
+        if (adjacents[i] == true) continue; // Skip routing
+
+#ifdef DEBUG_PRINT
+        //cout << "LINE #" << (int)(i + 1) << endl;
+#endif
+        // Routing
+        pointer = search(s_idx[i], paths, starts[i], goals[i], weights);
+    }
+
+    // ================================
+    // (Step.1) Initial Routing (END)
+    // ================================
+
+
+    // Memories for Overlap Check
+    ap_uint<1> overlap_checks[MAX_CELLS];
+    bool has_overlap = false;
+
+    // ================================
+    // (Step.2) Rip-up Routing (BEGIN)
+    // ================================
+
+#ifdef DEBUG_PRINT
+    cout << "Rip-up Routing ..." << endl;
+#endif
+
+    ROUTING:
+    for (ap_uint<16> round = 0; round < 32768 /* = (2048 * 16) */; round++) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=32768
+
+        // Target line
+        ap_uint<LINE_BIT> target = round % line_num;
+        ap_uint<LINE_BIT> next_target = target + 1;
+        if (next_target == line_num) next_target = 0;
+
+#ifdef DEBUG_PRINT
+        //cout << "(round " << round << ") LINE #" << (int)(target + 1);
+        //cout << " -> " << pointer << endl;
+#endif
+#ifdef DEBUG_PRINT
+        int buffer_length = pointer - s_idx[target];
+        if (max_buffer_length < buffer_length) { max_buffer_length = buffer_length; }
+#endif
+
+        // Skip routing
+        if (adjacents[target] == true) {
+            s_idx[target] = pointer;  continue;
+        }
+
+
+        // (Step.2-1) Reset weights of target line
+        WEIGHT_RESET:
+        for (ap_uint<BUFF_BIT> j = s_idx[target]; j != s_idx[next_target]; j++) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=256
+            weights[paths[j]] = 1;
+        }
+
+        // (Step.2-2) Set weights of non-target lines and terminals
+        ap_uint<8> current_round_weight = new_weight(round);
+        WEIGHT_PATH:
+        for (ap_uint<BUFF_BIT> j = s_idx[next_target]; j != pointer; j++) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
+            weights[paths[j]] = current_round_weight;
+        }
+        WEIGHT_TERMINAL:
+        for (ap_uint<LINE_BIT> i = 0; i < (ap_uint<LINE_BIT>)(line_num); i++) {
+#pragma HLS LOOP_TRIPCOUNT min=2 max=999
+            weights[starts[i]] = MAX_WEIGHT;
+            weights[goals[i]] = MAX_WEIGHT;
+        }
+        // Reset weight of start terminal of target line (bug avoiding)
+        // Restore original settings in (*)
+        weights[starts[target]] = 1;
+
+        // (Step.2-3) Routing
+        s_idx[target] = pointer;
+        pointer = search(s_idx[target], paths, starts[target], goals[target], weights);
+
+        // (*)
+        weights[starts[target]] = MAX_WEIGHT;
+
+#ifdef DEBUG_PRINT
+    bool ng = false;
+    for (ap_uint<LINE_BIT> i = 0; i < (ap_uint<LINE_BIT>)(line_num); i++) {
+    	if (weights[starts[i]] != 255 || weights[goals[i]] != 255) {
+            cout << i << " "; ng = true;
+        }
+    }
+    if(ng) { cout << endl; }
+#endif
+
+        // (Step.2-4) Overlap check
+        has_overlap = false;
+        OVERLAP_RESET:
+        for (ap_uint<CELL_BIT> i = 0; i < (ap_uint<CELL_BIT>)(MAX_CELLS); i++) {
+            overlap_checks[i] = 0;
+        }
+        OVERLAP_CHECK_LINE:
+        for (ap_uint<LINE_BIT> i = 0; i < (ap_uint<LINE_BIT>)(line_num); i++) {
+#pragma HLS LOOP_TRIPCOUNT min=2 max=999
+            overlap_checks[starts[i]] = 1;
+            overlap_checks[goals[i]] = 1;
+        }
+        OVERLAP_CHECK_PATH:
+        for (ap_uint<BUFF_BIT> j = s_idx[next_target]; j != pointer; j++) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
+            ap_uint<CELL_BIT> cell_id = paths[j];
+            if (overlap_checks[cell_id]) {
+                has_overlap = true;  break;
+            }
+            overlap_checks[cell_id] = 1;
+        }
+#ifdef DEBUG_PRINT
+    if(!has_overlap){ cout << "ROUND: " << round << endl; }
+#endif
+        if (!has_overlap) break; // Finish routing?
+    }
+
+#ifdef DEBUG_PRINT
+    cout << "MAX PQ LENGTH: " << max_queue_length << endl;
+    cout << "MAX SEARCH COUNT: " << max_search_count << endl;
+    cout << "MAX BUFFER: " << max_buffer_length << endl;
+#endif
+
+    // Not solved
+    if (has_overlap) {
+        *status = 1;  return false;
+    }
+
+    // ================================
+    // (Step.2) Rip-up Routing (END)
+    // ================================
+
+
+    // ================================
+    // (Step.3) Output (BEGIN)
+    // ================================
+
+#ifdef DEBUG_PRINT
+    cout << "Output ..." << endl;
+#endif
+
+    // Init: Blank = 0
+    OUTPUT_INIT:
+    for (ap_uint<CELL_BIT> i = 0; i < (ap_uint<CELL_BIT>)(MAX_CELLS); i++) {
+        boardstr[i] = 0;
+        boardstr_high[i] = 0;
+    }
+    // Line
+    OUTPUT_LINE:
+    for (ap_uint<LINE_BIT> i = 0; i < (ap_uint<LINE_BIT>)(line_num); i++) {
+#pragma HLS LOOP_TRIPCOUNT min=2 max=999
+        boardstr[starts[i]] = (i + 1);
+        boardstr[goals[i]] = (i + 1);
+        boardstr_high[starts[i]] = (i + 1) >> 8;
+        boardstr_high[goals[i]] = (i + 1) >> 8;
+
+        ap_uint<BUFF_BIT> p1; // p1: s_idx of target
+        ap_uint<BUFF_BIT> p2; // p2: s_idx of next target
+        p1 = s_idx[i];
+        if (i == (ap_uint<LINE_BIT>)(line_num-1)) {
+            p2 = s_idx[0];
+        }
+        else {
+            p2 = s_idx[i+1];
+        }
+        if ((ap_uint<BUFF_BIT>)(p2 - p1) > 8192){
+            p2 = pointer;
+        }
+        OUTPUT_LINE_PATH:
+        for (ap_uint<BUFF_BIT> j = p1; j != p2; j++) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=256
+            boardstr[paths[j]] = (i + 1);
+            boardstr_high[paths[j]] = (i + 1) >> 8;
+        }
+    }
+
+    // ================================
+    // (Step.3) Output (END)
+    // ================================
+
+    *status = 0;  return true;
+}
+
+
+// ================================ //
+// For Routing
+// ================================ //
+
+// Max: 71, Min: 0 (7bit)
+ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b) {
+#pragma HLS INLINE
+    if (a < b) { return b - a; }
+    else  { return a - b; }
+}
+// Max: 7, Min: 0 (3bit)
+ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b) {
+#pragma HLS INLINE
+    if (a < b) { return b - a; }
+    else  { return a - b; }
+}
+
+// Reference codes:
+// http://lethe2211.hatenablog.com/entry/2014/12/30/011030
+// http://www.redblobgames.com/pathfinding/a-star/implementation.html
+// Need to modify "array partition factor"
+ap_uint<BUFF_BIT> search(ap_uint<BUFF_BIT> idx, ap_uint<CELL_BIT> paths[MAX_BUFFER], ap_uint<CELL_BIT> start, ap_uint<CELL_BIT> goal, ap_uint<8> w[MAX_CELLS]) {
+
+    ap_uint<CELL_BIT> dist[MAX_CELLS];
+    ap_uint<CELL_BIT> prev[MAX_CELLS];
+
+    SEARCH_INIT_DIST:
+    for (ap_uint<CELL_BIT> i = 0; i < (ap_uint<CELL_BIT>)(MAX_CELLS); i++) {
+        dist[i] = 65535; // = (2^16 - 1)
+    }
+
+    // Priority queue (Heap)
+    ap_uint<PQ_BIT> pq_len = 0;
+    bool is_empty = true;
+    ap_uint<32> pq_nodes[MAX_PQ];
+
+#ifdef DEBUG_PRINT
+    int queue_length = 0;
+    int search_count = 0;
+#endif
+
+    // Point of goal terminal
+    ap_uint<13> goal_xy = (ap_uint<13>)(goal >> BITWIDTH_Z);
+    ap_uint<7> goal_x = (ap_uint<7>)(goal_xy / MAX_WIDTH);
+    ap_uint<7> goal_y = (ap_uint<7>)(goal_xy - goal_x * MAX_WIDTH);
+    ap_uint<3> goal_z = (ap_uint<3>)(goal & BITMASK_Z);
+
+    dist[start] = 0;
+    pq_push(pq_nodes, 0, start, &pq_len, &is_empty); // push start terminal
+
+    SEARCH_PQ:
+    while (!is_empty) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=1000
+#pragma HLS LOOP_FLATTEN off
+
+        ap_uint<16> prev_cost;
+        ap_uint<16> src; // target cell
+        pq_pop(pq_nodes, &prev_cost, &src, &pq_len, &is_empty);
+#ifdef DEBUG_PRINT
+        search_count++;
+#endif
+
+
+        // End routing
+        if (src == goal) break;
+
+
+        // Target cell
+        ap_uint<16> dist_src = dist[src];
+        ap_uint<8> cost = w[src];
+        // Point of target cell
+        ap_uint<13> src_xy = (ap_uint<13>)(src >> BITWIDTH_Z);
+        ap_uint<7> src_x = (ap_uint<7>)(src_xy / MAX_WIDTH);
+        ap_uint<7> src_y = (ap_uint<7>)(src_xy - src_x * MAX_WIDTH);
+        ap_uint<3> src_z = (ap_uint<3>)(src & BITMASK_Z);
+
+        // Search adjacent cells
+        SEARCH_ADJACENTS:
+        for (ap_uint<3> a = 0; a < 6; a++) {
+            ap_int<8> dest_x = (ap_int<8>)src_x; // Min: -1, Max: 72 (Signed 8bit)
+            ap_int<8> dest_y = (ap_int<8>)src_y; // Min: -1, Max: 72 (Signed 8bit)
+            ap_int<5> dest_z = (ap_int<5>)src_z; // Min: -1, Max:  8 (Signed 5bit)
+            if (a == 0) { dest_x -= 1; }
+            if (a == 1) { dest_x += 1; }
+            if (a == 2) { dest_y -= 1; }
+            if (a == 3) { dest_y += 1; }
+            if (a == 4) { dest_z -= 1; }
+            if (a == 5) { dest_z += 1; }
+
+            // Inside the board ? //
+            if (0 <= dest_x && dest_x < (ap_int<8>)size_x && 0 <= dest_y && dest_y < (ap_int<8>)size_y && 0 <= dest_z && dest_z < (ap_int<5>)size_z) {
+            	// Adjacent cell
+                ap_uint<16> dest = (((ap_uint<16>)dest_x * MAX_WIDTH + (ap_uint<16>)dest_y) << BITWIDTH_Z) | (ap_uint<16>)dest_z;
+                ap_uint<16> dist_new = dist_src + cost;
+
+                if (dist[dest] > dist_new) {
+                    dist[dest] = dist_new;	// Update dist
+                    prev[dest] = src;		// Recode previous cell
+                    dist_new += abs_uint7(dest_x, goal_x) + abs_uint7(dest_y, goal_y) + abs_uint3(dest_z, goal_z); // A* heuristic
+                    pq_push(pq_nodes, dist_new, dest, &pq_len, &is_empty); // push adjacent cell
+                }
+            }
+        }
+#ifdef DEBUG_PRINT
+        if (queue_length < pq_len) { queue_length = pq_len; }
+#endif
+    }
+
+    // Output target path
+    // Note: Do not include start & goal terminals
+    ap_uint<16> t = prev[goal];
+
+    // Backtracking
+    ap_uint<BUFF_BIT> p = idx; // buffer-idx
+    SEARCH_BACKTRACK:
+    while (t != start) {
+#pragma HLS LOOP_TRIPCOUNT min=1 max=256
+        paths[p] = t;
+        p++;
+        t = prev[t];
+    }
+
+#ifdef DEBUG_PRINT
+    if (max_queue_length < queue_length) { max_queue_length = queue_length; }
+    if (max_search_count < search_count) { max_search_count = search_count; }
+#endif
+
+    return p;
+}
+
+// Queue push (Enqueue)
+// Need to modify "trip count" (1)
+void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint<PQ_BIT> *pq_len, bool *is_empty) {
+#pragma HLS INLINE
+
+    (*pq_len)++;
+    if ((*pq_len) == 0) { (*pq_len)--; } // Queue is full -> Last element is automatically removed
+
+    // Binary search for circular list
+    ap_uint<PQ_BIT> i = (*pq_len);
+    ap_uint<PQ_BIT> p = (*pq_len) >> 1; // parent node
+    PQ_PUSH_LOOP:
+    while (i > 1 && (ap_uint<16>)(pq_nodes[p] & PQ_PRIORITY_MASK) >= priority) {
+#pragma HLS LOOP_TRIPCOUNT min=0 max=15
+/** Set!: min=0 max=PQ_BIT **/
+        pq_nodes[i] = pq_nodes[p];
+        i = p;
+        p = p >> 1; // parent node
+    }
+    pq_nodes[i] = ((ap_uint<32>)data << PQ_PRIORITY_WIDTH) | (ap_uint<32>)priority;
+    *is_empty = false;
+}
+
+// Queue pop (Dequeue)
+// Need to modify "trip count" (1)
+void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint<PQ_BIT> *pq_len, bool *is_empty) {
+#pragma HLS INLINE
+
+    *ret_priority = (ap_uint<16>)(pq_nodes[1] & PQ_PRIORITY_MASK);
+    *ret_data     = (ap_uint<16>)(pq_nodes[1] >> PQ_PRIORITY_WIDTH);
+
+    ap_uint<PQ_BIT> i = 1; // root node
+    ap_uint<PQ_BIT> last_priority = (ap_uint<16>)(pq_nodes[*pq_len] & PQ_PRIORITY_MASK); // Priority of last element
+
+    PQ_POP_LOOP:
+    while (!(i >> (PQ_BIT-1))) { // (2018.08.24) Loop condition fixed
+#pragma HLS LOOP_TRIPCOUNT min=1 max=15
+/** Set!: min=0 max=PQ_BIT **/
+        ap_uint<PQ_BIT> c1 = i << 1; // child node(left)
+        ap_uint<PQ_BIT> c2 = c1 + 1; // child node(right)
+        if (c1 < *pq_len && (ap_uint<16>)(pq_nodes[c1] & PQ_PRIORITY_MASK) <= last_priority) {
+            if (c2 < *pq_len && (ap_uint<16>)(pq_nodes[c2] & PQ_PRIORITY_MASK) <= (ap_uint<16>)(pq_nodes[c1] & PQ_PRIORITY_MASK)) {
+                pq_nodes[i] = pq_nodes[c2];
+                i = c2;
+            }
+            else {
+                pq_nodes[i] = pq_nodes[c1];
+                i = c1;
+            }
+        }
+        else {
+            if (c2 < *pq_len && (ap_uint<16>)(pq_nodes[c2] & PQ_PRIORITY_MASK) <= last_priority) {
+                pq_nodes[i] = pq_nodes[c2];
+                i = c2;
+            }
+            else {
+                break;
+            }
+        }
+    }
+    pq_nodes[i] = pq_nodes[*pq_len];
+    (*pq_len)--;
+    if ((*pq_len) == 0) { *is_empty = true; }
+}
+
diff --git a/hls_2018/router_03_boardstr/router.hpp b/hls_2018/router_03_boardstr/router.hpp
new file mode 100755
index 0000000..cdc6ef0
--- /dev/null
+++ b/hls_2018/router_03_boardstr/router.hpp
@@ -0,0 +1,56 @@
+/**
+ * router.hpp
+ *
+ * for Vivado HLS
+ */
+
+#ifndef __ROUTER_HPP__
+#define __ROUTER_HPP__
+
+#ifdef SOFTWARE
+#include "ap_int.h"
+#else
+#include <ap_int.h>
+#endif
+
+//#define DEBUG_PRINT // for debug
+
+#ifdef DEBUG_PRINT
+using namespace std;
+#endif
+
+// Parameters
+#define MAX_WIDTH   72      // Max of X, Y
+#define BITWIDTH_XY 13
+#define BITMASK_XY  65528   // 1111 1111 1111 1000
+#define MAX_LAYER   8       // Max of Z
+#define BITWIDTH_Z  3
+#define BITMASK_Z   7       // 0000 0000 0000 0111
+
+#define MAX_CELLS	41472    // Max #cells (16bit)
+#define MAX_LINES	1024     // Max #lines (10bit)
+#define MAX_PQ		32768    // Queue size (15bit)
+#define MAX_BUFFER	16384    // Line buffer size (14bit)
+#define CELL_BIT	16
+#define LINE_BIT	10
+#define PQ_BIT		15
+#define BUFF_BIT	14
+
+#define PQ_PRIORITY_WIDTH 16
+#define PQ_PRIORITY_MASK  65535       // 0000 0000 0000 0000 1111 1111 1111 1111
+#define PQ_DATA_WIDTH     16
+#define PQ_DATA_MASK      4294901760  // 1111 1111 1111 1111 0000 0000 0000 0000
+
+#define MAX_WEIGHT	255      // Max weight
+#define BOARDSTR_SIZE 41472  // Size of I/O
+
+ap_uint<8> new_weight(ap_uint<16> x);
+bool pynqrouter(char boardstr[BOARDSTR_SIZE], char boardstr_high[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status);
+
+ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b);
+ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b);
+ap_uint<BUFF_BIT> search(ap_uint<BUFF_BIT> idx, ap_uint<CELL_BIT> paths[MAX_BUFFER], ap_uint<CELL_BIT> start, ap_uint<CELL_BIT> goal, ap_uint<8> w[MAX_WEIGHT]);
+void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint<PQ_BIT> *pq_len, bool *is_empty);
+void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint<PQ_BIT> *pq_len, bool *is_empty);
+
+#endif /* __ROUTER_HPP__ */
-- 
2.22.0