diff --git a/hls_2018/router_01/router.cpp b/hls_2018/router_01/router.cpp new file mode 100755 index 0000000000000000000000000000000000000000..de396f1bfe58fa23ed3acc62d1f78512bf41b0dd --- /dev/null +++ b/hls_2018/router_01/router.cpp @@ -0,0 +1,499 @@ +/** + * router.cpp + * + * for Vivado HLS + */ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +#include "./router.hpp" + +// Set weight +ap_uint<8> new_weight(ap_uint<16> x) { +#pragma HLS INLINE + // K. Terada: y = 1~32 (8bit) + ap_uint<8> y; + y = ((x & 255) >> 3) + 1; + return y; +} + + +// Global values +static ap_uint<7> size_x; // X +static ap_uint<7> size_y; // Y +static ap_uint<4> size_z; // Z + +static ap_uint line_num = 0; // #Lines + +#ifdef DEBUG_PRINT +int max_queue_length; // Max length of priority queue +int max_search_count; // Max count of queue pop +int max_buffer_length; // Max length of line buffer +#endif + + +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status) { +#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS + + // status(0:Solved, 1:Not solved) + *status = -1; + + // For all lines + ap_uint paths[MAX_BUFFER]; // Line buffer + + // For each line + // Note: Should not partition completely + bool adjacents[MAX_LINES]; // Line has adjacent terminals? + ap_uint starts[MAX_LINES]; // Start list + ap_uint goals[MAX_LINES]; // Goal list + ap_uint s_idx[MAX_LINES]; // Start point on line buffer + + ap_uint<8> weights[MAX_CELLS]; // Weight of each cell + // Note: Should not partition weight array + // since each element will be accessed in "random" order + + + // ================================ + // (Step.0) Initialization (BEGIN) + // ================================ + + // Note: Loop counter -> need an extra bit (for condition determination) + + INIT_WEIGHTS: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + weights[i] = 1; + } + + /// Parse /// + size_x = (boardstr[1] - '0') * 10 + (boardstr[2] - '0'); + size_y = (boardstr[4] - '0') * 10 + (boardstr[5] - '0'); + size_z = (boardstr[7] - '0'); + + INIT_BOARDS: + for (ap_uint idx = 8; idx < (ap_uint)(BOARDSTR_SIZE); idx+=11) { + + // NULL-terminated + if (boardstr[idx] == 0) break; + + // Start & Goal of each line + ap_uint<7> s_x = (boardstr[idx+1] - '0') * 10 + (boardstr[idx+2] - '0'); + ap_uint<7> s_y = (boardstr[idx+3] - '0') * 10 + (boardstr[idx+4] - '0'); + ap_uint<3> s_z = (boardstr[idx+5] - '0') - 1; + ap_uint<7> g_x = (boardstr[idx+6] - '0') * 10 + (boardstr[idx+7] - '0'); + ap_uint<7> g_y = (boardstr[idx+8] - '0') * 10 + (boardstr[idx+9] - '0'); + ap_uint<3> g_z = (boardstr[idx+10] - '0') - 1; + + ap_uint start_id = (((ap_uint)s_x * MAX_WIDTH + (ap_uint)s_y) << BITWIDTH_Z) | (ap_uint)s_z; + ap_uint goal_id = (((ap_uint)g_x * MAX_WIDTH + (ap_uint)g_y) << BITWIDTH_Z) | (ap_uint)g_z; + starts[line_num] = start_id; + goals[line_num] = goal_id; + weights[start_id] = MAX_WEIGHT; + weights[goal_id] = MAX_WEIGHT; + + // Line has adjacent terminals? + adjacents[line_num] = false; + ap_int<8> dx = (ap_int<8>)g_x - (ap_int<8>)s_x; // Min: -71, Max: 71 (Signed 8bit) + ap_int<8> dy = (ap_int<8>)g_y - (ap_int<8>)s_y; // Min: -71, Max: 71 (Signed 8bit) + ap_int<4> dz = (ap_int<4>)g_z - (ap_int<4>)s_z; // Min: -7, Max: 7 (Signed 4bit) + if ((dx == 0 && dy == 0 && (dz == 1 || dz == -1)) || (dx == 0 && (dy == 1 || dy == -1) && dz == 0) || ((dx == 1 || dx == -1) && dy == 0 && dz == 0)) { + adjacents[line_num] = true; + } + + line_num++; + } + + // ================================ + // (Step.0) Initialization (END) + // ================================ + +#ifdef DEBUG_PRINT + max_queue_length = 0; + max_search_count = 0; + max_buffer_length = 0; +#endif + + ap_uint pointer = 0; // Pointer for line buffer + + // ================================ + // (Step.1) Initial Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Initial Routing ..." << endl; +#endif + + FIRST_ROUTING: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + + s_idx[i] = pointer; + + if (adjacents[i] == true) continue; // Skip routing + +#ifdef DEBUG_PRINT + //cout << "LINE #" << (int)(i + 1) << endl; +#endif + // Routing + pointer = search(s_idx[i], paths, starts[i], goals[i], weights); + } + + // ================================ + // (Step.1) Initial Routing (END) + // ================================ + + + // Memories for Overlap Check + ap_uint<1> overlap_checks[MAX_CELLS]; + bool has_overlap = false; + + // ================================ + // (Step.2) Rip-up Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Rip-up Routing ..." << endl; +#endif + + ROUTING: + for (ap_uint<16> round = 0; round < 32768 /* = (2048 * 16) */; round++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=32768 + + // Target line + ap_uint target = round % line_num; + ap_uint next_target = target + 1; + if (next_target == line_num) next_target = 0; + +#ifdef DEBUG_PRINT + //cout << "(round " << round << ") LINE #" << (int)(target + 1); + //cout << " -> " << pointer << endl; +#endif +#ifdef DEBUG_PRINT + int buffer_length = pointer - s_idx[target]; + if (max_buffer_length < buffer_length) { max_buffer_length = buffer_length; } +#endif + + // Skip routing + if (adjacents[target] == true) { + s_idx[target] = pointer; continue; + } + + + // (Step.2-1) Reset weights of target line + WEIGHT_RESET: + for (ap_uint j = s_idx[target]; j != s_idx[next_target]; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + weights[paths[j]] = 1; + } + + // (Step.2-2) Set weights of non-target lines and terminals + ap_uint<8> current_round_weight = new_weight(round); + WEIGHT_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + weights[paths[j]] = current_round_weight; + } + WEIGHT_TERMINAL: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + weights[starts[i]] = MAX_WEIGHT; + weights[goals[i]] = MAX_WEIGHT; + } + // Reset weight of start terminal of target line (bug avoiding) + // Restore original settings in (*) + weights[starts[target]] = 1; + + // (Step.2-3) Routing + s_idx[target] = pointer; + pointer = search(s_idx[target], paths, starts[target], goals[target], weights); + + // (*) + weights[starts[target]] = MAX_WEIGHT; + +#ifdef DEBUG_PRINT + bool ng = false; + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { + if (weights[starts[i]] != 255 || weights[goals[i]] != 255) { + cout << i << " "; ng = true; + } + } + if(ng) { cout << endl; } +#endif + + // (Step.2-4) Overlap check + has_overlap = false; + OVERLAP_RESET: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + overlap_checks[i] = 0; + } + OVERLAP_CHECK_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + overlap_checks[starts[i]] = 1; + overlap_checks[goals[i]] = 1; + } + OVERLAP_CHECK_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + ap_uint cell_id = paths[j]; + if (overlap_checks[cell_id]) { + has_overlap = true; break; + } + overlap_checks[cell_id] = 1; + } +#ifdef DEBUG_PRINT + if(!has_overlap){ cout << "ROUND: " << round << endl; } +#endif + if (!has_overlap) break; // Finish routing? + } + +#ifdef DEBUG_PRINT + cout << "MAX PQ LENGTH: " << max_queue_length << endl; + cout << "MAX SEARCH COUNT: " << max_search_count << endl; + cout << "MAX BUFFER: " << max_buffer_length << endl; +#endif + + // Not solved + if (has_overlap) { + *status = 1; return false; + } + + // ================================ + // (Step.2) Rip-up Routing (END) + // ================================ + + + // ================================ + // (Step.3) Output (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Output ..." << endl; +#endif + + // Init: Blank = 0 + OUTPUT_INIT: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + boardstr[i] = 0; + } + // Line + OUTPUT_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + boardstr[starts[i]] = (i + 1); + boardstr[goals[i]] = (i + 1); + + ap_uint p1; // p1: s_idx of target + ap_uint p2; // p2: s_idx of next target + p1 = s_idx[i]; + if (i == (ap_uint)(line_num-1)) { + p2 = s_idx[0]; + } + else { + p2 = s_idx[i+1]; + } + if ((ap_uint)(p2 - p1) > 8192){ + p2 = pointer; + } + OUTPUT_LINE_PATH: + for (ap_uint j = p1; j != p2; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + boardstr[paths[j]] = (i + 1); + } + } + + // ================================ + // (Step.3) Output (END) + // ================================ + + *status = 0; return true; +} + + +// ================================ // +// For Routing +// ================================ // + +// Max: 71, Min: 0 (7bit) +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} +// Max: 7, Min: 0 (3bit) +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} + +// Reference codes: +// http://lethe2211.hatenablog.com/entry/2014/12/30/011030 +// http://www.redblobgames.com/pathfinding/a-star/implementation.html +// Need to modify "array partition factor" +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_CELLS]) { + + ap_uint dist[MAX_CELLS]; + ap_uint prev[MAX_CELLS]; + + SEARCH_INIT_DIST: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + dist[i] = 65535; // = (2^16 - 1) + } + + // Priority queue (Circular list) + ap_uint top = 1, bottom = 0; + bool is_empty = true; + ap_uint<32> pq_nodes[MAX_PQ]; + +#ifdef DEBUG_PRINT + int queue_length = 0; + int search_count = 0; +#endif + + // Point of goal terminal + ap_uint<13> goal_xy = (ap_uint<13>)(goal >> BITWIDTH_Z); + ap_uint<7> goal_x = (ap_uint<7>)(goal_xy / MAX_WIDTH); + ap_uint<7> goal_y = (ap_uint<7>)(goal_xy - goal_x * MAX_WIDTH); + ap_uint<3> goal_z = (ap_uint<3>)(goal & BITMASK_Z); + + dist[start] = 0; + pq_push(pq_nodes, 0, start, &top, &bottom, &is_empty); // push start terminal + + SEARCH_PQ: + while (!is_empty) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=1000 +#pragma HLS LOOP_FLATTEN off + + ap_uint<16> prev_cost; + ap_uint<16> src; // target cell + pq_pop(pq_nodes, &prev_cost, &src, &top, &bottom, &is_empty); +#ifdef DEBUG_PRINT + search_count++; +#endif + + + // End routing + if (src == goal) break; + + + // Target cell + ap_uint<16> dist_src = dist[src]; + ap_uint<8> cost = w[src]; + // Point of target cell + ap_uint<13> src_xy = (ap_uint<13>)(src >> BITWIDTH_Z); + ap_uint<7> src_x = (ap_uint<7>)(src_xy / MAX_WIDTH); + ap_uint<7> src_y = (ap_uint<7>)(src_xy - src_x * MAX_WIDTH); + ap_uint<3> src_z = (ap_uint<3>)(src & BITMASK_Z); + + // Search adjacent cells + SEARCH_ADJACENTS: + for (ap_uint<3> a = 0; a < 6; a++) { + ap_int<8> dest_x = (ap_int<8>)src_x; // Min: -1, Max: 72 (Signed 8bit) + ap_int<8> dest_y = (ap_int<8>)src_y; // Min: -1, Max: 72 (Signed 8bit) + ap_int<5> dest_z = (ap_int<5>)src_z; // Min: -1, Max: 8 (Signed 5bit) + if (a == 0) { dest_x -= 1; } + if (a == 1) { dest_x += 1; } + if (a == 2) { dest_y -= 1; } + if (a == 3) { dest_y += 1; } + if (a == 4) { dest_z -= 1; } + if (a == 5) { dest_z += 1; } + + // Inside the board ? // + if (0 <= dest_x && dest_x < (ap_int<8>)size_x && 0 <= dest_y && dest_y < (ap_int<8>)size_y && 0 <= dest_z && dest_z < (ap_int<5>)size_z) { + // Adjacent cell + ap_uint<16> dest = (((ap_uint<16>)dest_x * MAX_WIDTH + (ap_uint<16>)dest_y) << BITWIDTH_Z) | (ap_uint<16>)dest_z; + ap_uint<16> dist_new = dist_src + cost; + + if (dist[dest] > dist_new) { + dist[dest] = dist_new; // Update dist + prev[dest] = src; // Recode previous cell + dist_new += abs_uint7(dest_x, goal_x) + abs_uint7(dest_y, goal_y) + abs_uint3(dest_z, goal_z); // A* heuristic + pq_push(pq_nodes, dist_new, dest, &top, &bottom, &is_empty); // push adjacent cell + } + } + } +#ifdef DEBUG_PRINT + if (queue_length < (bottom-top+1)) { queue_length = (bottom-top+1); } +#endif + } + + // Output target path + // Note: Do not include start & goal terminals + ap_uint<16> t = prev[goal]; + + // Backtracking + ap_uint p = idx; // buffer-idx + SEARCH_BACKTRACK: + while (t != start) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + paths[p] = t; + p++; + t = prev[t]; + } + +#ifdef DEBUG_PRINT + if (max_queue_length < queue_length) { max_queue_length = queue_length; } + if (max_search_count < search_count) { max_search_count = search_count; } +#endif + + return p; +} + +// Queue push (Enqueue) +// Need to modify "trip count" (2) +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *top, ap_uint *bottom, bool *is_empty) { +#pragma HLS INLINE + + (*bottom)++; + if ((*bottom) == (*top) && !(*is_empty)) { (*bottom)--; } // Queue is full -> Last element is automatically removed + + // Binary search for circular list + ap_uint t = (*top); + ap_uint b = (*bottom); + ap_uint h = ((ap_uint)(b - t) / 2) + t; + // Note: "h = (t + b) / 2" causes a bug! + PQ_PUSH_BINARY: + while (t != b) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=13 +/** Set!: min=0 max=PQ_BIT **/ + if ((ap_uint<16>)(pq_nodes[h] & PQ_PRIORITY_MASK) >= priority) { + b = h; + } + else { + t = h + 1; + } + h = ((ap_uint)(b - t) / 2) + t; + } + + // Shifting + ap_uint shift_count = (*bottom) - t; // # of shifting + + ap_uint p0 = (*bottom), p1; + PQ_PUSH_SHIFT: + for (ap_uint j = 0; j < (ap_uint)(shift_count); j++) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=8191 +/** Set!: min=0 max=MAX_PQ-1 **/ + p0 = p0 - 1; + p1 = p0 + 1; + pq_nodes[p1] = pq_nodes[p0]; + } + pq_nodes[p0] = ((ap_uint<32>)data << PQ_PRIORITY_WIDTH) | (ap_uint<32>)priority; + *is_empty = false; +} + +// Queue pop (Dequeue) +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *top, ap_uint *bottom, bool *is_empty) { +#pragma HLS INLINE + + *ret_priority = (ap_uint<16>)(pq_nodes[(*top)] & PQ_PRIORITY_MASK); + *ret_data = (ap_uint<16>)(pq_nodes[(*top)] >> PQ_PRIORITY_WIDTH); + (*top)++; + if (((*bottom)-(*top)+1) == 0) { *is_empty = true; } +} diff --git a/hls_2018/router_01/router.hpp b/hls_2018/router_01/router.hpp new file mode 100755 index 0000000000000000000000000000000000000000..3455260033e300eb80bbd03e81146e4ad0996bb0 --- /dev/null +++ b/hls_2018/router_01/router.hpp @@ -0,0 +1,56 @@ +/** + * router.hpp + * + * for Vivado HLS + */ + +#ifndef __ROUTER_HPP__ +#define __ROUTER_HPP__ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +//#define DEBUG_PRINT // for debug + +#ifdef DEBUG_PRINT +using namespace std; +#endif + +// Parameters +#define MAX_WIDTH 72 // Max of X, Y +#define BITWIDTH_XY 13 +#define BITMASK_XY 65528 // 1111 1111 1111 1000 +#define MAX_LAYER 8 // Max of Z +#define BITWIDTH_Z 3 +#define BITMASK_Z 7 // 0000 0000 0000 0111 + +#define MAX_CELLS 41472 // Max #cells (16bit) +#define MAX_LINES 1024 // Max #lines (10bit) +#define MAX_PQ 8192 // Queue size (13bit) +#define MAX_BUFFER 16384 // Line buffer size (14bit) +#define CELL_BIT 16 +#define LINE_BIT 10 +#define PQ_BIT 13 +#define BUFF_BIT 14 + +#define PQ_PRIORITY_WIDTH 16 +#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111 +#define PQ_DATA_WIDTH 16 +#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000 + +#define MAX_WEIGHT 255 // Max weight +#define BOARDSTR_SIZE 41472 // Size of I/O + +ap_uint<8> new_weight(ap_uint<16> x); +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status); + +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b); +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b); +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_WEIGHT]); +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *top, ap_uint *bottom, bool *is_empty); +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *top, ap_uint *bottom, bool *is_empty); + +#endif /* __ROUTER_HPP__ */ diff --git a/hls_2018/router_02/router.cpp b/hls_2018/router_02/router.cpp new file mode 100755 index 0000000000000000000000000000000000000000..cf50624b09b25376142663570acba82f78c3f15a --- /dev/null +++ b/hls_2018/router_02/router.cpp @@ -0,0 +1,557 @@ +/** + * router.cpp + * + * for Vivado HLS + */ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +#include "./router.hpp" + +// Set weight +ap_uint<8> new_weight(ap_uint<16> x) { +#pragma HLS INLINE + // K. Terada: y = 1~32 (8bit) + ap_uint<8> y; + y = ((x & 255) >> 3) + 1; + return y; +} + + +// Global values +static ap_uint<7> size_x; // X +static ap_uint<7> size_y; // Y +static ap_uint<4> size_z; // Z + +static ap_uint line_num = 0; // #Lines + +#ifdef DEBUG_PRINT +int max_queue_length; // Max length of priority queue +int max_search_count; // Max count of queue pop +int max_buffer_length; // Max length of line buffer +#endif + + +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status) { +#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS + + // status(0:Solved, 1:Not solved) + *status = -1; + + // For all lines + ap_uint paths[MAX_BUFFER]; // Line buffer + + // For each line + // Note: Should not partition completely + bool adjacents[MAX_LINES]; // Line has adjacent terminals? + ap_uint starts[MAX_LINES]; // Start list + ap_uint goals[MAX_LINES]; // Goal list + ap_uint s_idx[MAX_LINES]; // Start point on line buffer + + ap_uint<8> weights[MAX_CELLS]; // Weight of each cell + // Note: Should not partition weight array + // since each element will be accessed in "random" order + + + // ================================ + // (Step.0) Initialization (BEGIN) + // ================================ + + // Note: Loop counter -> need an extra bit (for condition determination) + + INIT_WEIGHTS: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + weights[i] = 1; + } + + /// Parse /// + size_x = (boardstr[1] - '0') * 10 + (boardstr[2] - '0'); + size_y = (boardstr[4] - '0') * 10 + (boardstr[5] - '0'); + size_z = (boardstr[7] - '0'); + + INIT_BOARDS: + for (ap_uint idx = 8; idx < (ap_uint)(BOARDSTR_SIZE); idx+=11) { + + // NULL-terminated + if (boardstr[idx] == 0) break; + + // Start & Goal of each line + ap_uint<7> s_x = (boardstr[idx+1] - '0') * 10 + (boardstr[idx+2] - '0'); + ap_uint<7> s_y = (boardstr[idx+3] - '0') * 10 + (boardstr[idx+4] - '0'); + ap_uint<3> s_z = (boardstr[idx+5] - '0') - 1; + ap_uint<7> g_x = (boardstr[idx+6] - '0') * 10 + (boardstr[idx+7] - '0'); + ap_uint<7> g_y = (boardstr[idx+8] - '0') * 10 + (boardstr[idx+9] - '0'); + ap_uint<3> g_z = (boardstr[idx+10] - '0') - 1; + + ap_uint start_id = (((ap_uint)s_x * MAX_WIDTH + (ap_uint)s_y) << BITWIDTH_Z) | (ap_uint)s_z; + ap_uint goal_id = (((ap_uint)g_x * MAX_WIDTH + (ap_uint)g_y) << BITWIDTH_Z) | (ap_uint)g_z; + starts[line_num] = start_id; + goals[line_num] = goal_id; + weights[start_id] = MAX_WEIGHT; + weights[goal_id] = MAX_WEIGHT; + + // Line has adjacent terminals? + adjacents[line_num] = false; + ap_int<8> dx = (ap_int<8>)g_x - (ap_int<8>)s_x; // Min: -71, Max: 71 (Signed 8bit) + ap_int<8> dy = (ap_int<8>)g_y - (ap_int<8>)s_y; // Min: -71, Max: 71 (Signed 8bit) + ap_int<4> dz = (ap_int<4>)g_z - (ap_int<4>)s_z; // Min: -7, Max: 7 (Signed 4bit) + if ((dx == 0 && dy == 0 && (dz == 1 || dz == -1)) || (dx == 0 && (dy == 1 || dy == -1) && dz == 0) || ((dx == 1 || dx == -1) && dy == 0 && dz == 0)) { + adjacents[line_num] = true; + } + + line_num++; + } + + // ================================ + // (Step.0) Initialization (END) + // ================================ + +#ifdef DEBUG_PRINT + max_queue_length = 0; + max_search_count = 0; + max_buffer_length = 0; +#endif + + ap_uint pointer = 0; // Pointer for line buffer + + // ================================ + // (Step.1) Initial Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Initial Routing ..." << endl; +#endif + + FIRST_ROUTING: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + + s_idx[i] = pointer; + + if (adjacents[i] == true) continue; // Skip routing + +#ifdef DEBUG_PRINT + //cout << "LINE #" << (int)(i + 1) << endl; +#endif + // Routing + pointer = search(s_idx[i], paths, starts[i], goals[i], weights); + } + + // ================================ + // (Step.1) Initial Routing (END) + // ================================ + + + // Memories for Overlap Check + ap_uint<1> overlap_checks[MAX_CELLS]; + bool has_overlap = false; + + // ================================ + // (Step.2) Rip-up Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Rip-up Routing ..." << endl; +#endif + + ROUTING: + for (ap_uint<16> round = 0; round < 32768 /* = (2048 * 16) */; round++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=32768 + + // Target line + ap_uint target = round % line_num; + ap_uint next_target = target + 1; + if (next_target == line_num) next_target = 0; + +#ifdef DEBUG_PRINT + //cout << "(round " << round << ") LINE #" << (int)(target + 1); + //cout << " -> " << pointer << endl; +#endif +#ifdef DEBUG_PRINT + int buffer_length = pointer - s_idx[target]; + if (max_buffer_length < buffer_length) { max_buffer_length = buffer_length; } +#endif + + // Skip routing + if (adjacents[target] == true) { + s_idx[target] = pointer; continue; + } + + + // (Step.2-1) Reset weights of target line + WEIGHT_RESET: + for (ap_uint j = s_idx[target]; j != s_idx[next_target]; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + weights[paths[j]] = 1; + } + + // (Step.2-2) Set weights of non-target lines and terminals + ap_uint<8> current_round_weight = new_weight(round); + WEIGHT_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + weights[paths[j]] = current_round_weight; + } + WEIGHT_TERMINAL: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + weights[starts[i]] = MAX_WEIGHT; + weights[goals[i]] = MAX_WEIGHT; + } + // Reset weight of start terminal of target line (bug avoiding) + // Restore original settings in (*) + weights[starts[target]] = 1; + + // (Step.2-3) Routing + s_idx[target] = pointer; + pointer = search(s_idx[target], paths, starts[target], goals[target], weights); + + // (*) + weights[starts[target]] = MAX_WEIGHT; + +#ifdef DEBUG_PRINT + bool ng = false; + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { + if (weights[starts[i]] != 255 || weights[goals[i]] != 255) { + cout << i << " "; ng = true; + } + } + if(ng) { cout << endl; } +#endif + + // (Step.2-4) Overlap check + has_overlap = false; + OVERLAP_RESET: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + overlap_checks[i] = 0; + } + OVERLAP_CHECK_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + overlap_checks[starts[i]] = 1; + overlap_checks[goals[i]] = 1; + } + OVERLAP_CHECK_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + ap_uint cell_id = paths[j]; + if (overlap_checks[cell_id]) { + has_overlap = true; break; + } + overlap_checks[cell_id] = 1; + } +#ifdef DEBUG_PRINT + if(!has_overlap){ cout << "ROUND: " << round << endl; } +#endif + if (!has_overlap) break; // Finish routing? + } + +#ifdef DEBUG_PRINT + cout << "MAX PQ LENGTH: " << max_queue_length << endl; + cout << "MAX SEARCH COUNT: " << max_search_count << endl; + cout << "MAX BUFFER: " << max_buffer_length << endl; +#endif + + // Not solved + if (has_overlap) { + *status = 1; return false; + } + + // ================================ + // (Step.2) Rip-up Routing (END) + // ================================ + + + // ================================ + // (Step.3) Output (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Output ..." << endl; +#endif + + // Init: Blank = 0 + OUTPUT_INIT: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + boardstr[i] = 0; + } + // Line + OUTPUT_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + boardstr[starts[i]] = (i + 1); + boardstr[goals[i]] = (i + 1); + + ap_uint p1; // p1: s_idx of target + ap_uint p2; // p2: s_idx of next target + p1 = s_idx[i]; + if (i == (ap_uint)(line_num-1)) { + p2 = s_idx[0]; + } + else { + p2 = s_idx[i+1]; + } + if ((ap_uint)(p2 - p1) > 8192){ + p2 = pointer; + } + OUTPUT_LINE_PATH: + for (ap_uint j = p1; j != p2; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + boardstr[paths[j]] = (i + 1); + } + } + + // ================================ + // (Step.3) Output (END) + // ================================ + + *status = 0; return true; +} + + +// ================================ // +// For Routing +// ================================ // + +// Max: 71, Min: 0 (7bit) +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} +// Max: 7, Min: 0 (3bit) +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} + +// Reference codes: +// http://lethe2211.hatenablog.com/entry/2014/12/30/011030 +// http://www.redblobgames.com/pathfinding/a-star/implementation.html +// Need to modify "array partition factor" +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_CELLS]) { + + ap_uint dist[MAX_CELLS]; + ap_uint prev[MAX_CELLS]; + + SEARCH_INIT_DIST: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + dist[i] = 65535; // = (2^16 - 1) + } + + // Priority queue (Circular list) + ap_uint top = 1, bottom = 0; + bool is_empty = true; + ap_uint<32> pq_nodes[MAX_PQ]; +#pragma HLS ARRAY_PARTITION variable=pq_nodes cyclic factor=32 dim=1 +/** Set!: factor=PP **/ + +#ifdef DEBUG_PRINT + int queue_length = 0; + int search_count = 0; +#endif + + // Point of goal terminal + ap_uint<13> goal_xy = (ap_uint<13>)(goal >> BITWIDTH_Z); + ap_uint<7> goal_x = (ap_uint<7>)(goal_xy / MAX_WIDTH); + ap_uint<7> goal_y = (ap_uint<7>)(goal_xy - goal_x * MAX_WIDTH); + ap_uint<3> goal_z = (ap_uint<3>)(goal & BITMASK_Z); + + dist[start] = 0; + pq_push(pq_nodes, 0, start, &top, &bottom, &is_empty); // push start terminal + + SEARCH_PQ: + while (!is_empty) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=1000 +#pragma HLS LOOP_FLATTEN off + + ap_uint<16> prev_cost; + ap_uint<16> src; // target cell + pq_pop(pq_nodes, &prev_cost, &src, &top, &bottom, &is_empty); +#ifdef DEBUG_PRINT + search_count++; +#endif + + + // End routing + if (src == goal) break; + + + // Target cell + ap_uint<16> dist_src = dist[src]; + ap_uint<8> cost = w[src]; + // Point of target cell + ap_uint<13> src_xy = (ap_uint<13>)(src >> BITWIDTH_Z); + ap_uint<7> src_x = (ap_uint<7>)(src_xy / MAX_WIDTH); + ap_uint<7> src_y = (ap_uint<7>)(src_xy - src_x * MAX_WIDTH); + ap_uint<3> src_z = (ap_uint<3>)(src & BITMASK_Z); + + // Search adjacent cells + SEARCH_ADJACENTS: + for (ap_uint<3> a = 0; a < 6; a++) { + ap_int<8> dest_x = (ap_int<8>)src_x; // Min: -1, Max: 72 (Signed 8bit) + ap_int<8> dest_y = (ap_int<8>)src_y; // Min: -1, Max: 72 (Signed 8bit) + ap_int<5> dest_z = (ap_int<5>)src_z; // Min: -1, Max: 8 (Signed 5bit) + if (a == 0) { dest_x -= 1; } + if (a == 1) { dest_x += 1; } + if (a == 2) { dest_y -= 1; } + if (a == 3) { dest_y += 1; } + if (a == 4) { dest_z -= 1; } + if (a == 5) { dest_z += 1; } + + // Inside the board ? // + if (0 <= dest_x && dest_x < (ap_int<8>)size_x && 0 <= dest_y && dest_y < (ap_int<8>)size_y && 0 <= dest_z && dest_z < (ap_int<5>)size_z) { + // Adjacent cell + ap_uint<16> dest = (((ap_uint<16>)dest_x * MAX_WIDTH + (ap_uint<16>)dest_y) << BITWIDTH_Z) | (ap_uint<16>)dest_z; + ap_uint<16> dist_new = dist_src + cost; + + if (dist[dest] > dist_new) { + dist[dest] = dist_new; // Update dist + prev[dest] = src; // Recode previous cell + dist_new += abs_uint7(dest_x, goal_x) + abs_uint7(dest_y, goal_y) + abs_uint3(dest_z, goal_z); // A* heuristic + pq_push(pq_nodes, dist_new, dest, &top, &bottom, &is_empty); // push adjacent cell + } + } + } +#ifdef DEBUG_PRINT + if (queue_length < (bottom-top+1)) { queue_length = (bottom-top+1); } +#endif + } + + // Output target path + // Note: Do not include start & goal terminals + ap_uint<16> t = prev[goal]; + + // Backtracking + ap_uint p = idx; // buffer-idx + SEARCH_BACKTRACK: + while (t != start) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + paths[p] = t; + p++; + t = prev[t]; + } + +#ifdef DEBUG_PRINT + if (max_queue_length < queue_length) { max_queue_length = queue_length; } + if (max_search_count < search_count) { max_search_count = search_count; } +#endif + + return p; +} + +// Queue push (Enqueue) +// Need to modify "trip count" (5) +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *top, ap_uint *bottom, bool *is_empty) { +#pragma HLS INLINE + + (*bottom)++; + if ((*bottom) == (*top) && !(*is_empty)) { (*bottom)--; } // Queue is full -> Last element is automatically removed + + // Binary search for circular list + ap_uint t = (*top); + ap_uint b = (*bottom); + ap_uint h = ((ap_uint)(b - t) / 2) + t; + // Note: "h = (t + b) / 2" causes a bug! + PQ_PUSH_BINARY: + while (t != b) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=13 +/** Set!: min=0 max=PQ_BIT **/ + if ((ap_uint<16>)(pq_nodes[h] & PQ_PRIORITY_MASK) >= priority) { + b = h; + } + else { + t = h + 1; + } + h = ((ap_uint)(b - t) / 2) + t; + } + + // Parallel shifting (PP parallel) + ap_uint shift_count = (*bottom) - t; // # of shifting + + ap_uint bottom_H = (*bottom) / PP; // High-order bits of (*bottom) + ap_uint bottom_L = (*bottom) % PP; // Low-order bits of (*bottom) + ap_uint p0 = (*bottom), p1; + if (shift_count > (ap_uint)(bottom_L)) { + // bottom_L bit shift + PQ_PUSH_SHIFT_H_1: + for (ap_uint k = 0; k < (ap_uint)(bottom_L); k++) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=31 +/** Set!: min=0 max=PP-1 **/ + p0 = p0 - 1; // source + p1 = p0 + 1; // destination + pq_nodes[p1] = pq_nodes[p0]; + } + shift_count = shift_count - bottom_L; + } + else { + // shift_count bit shift + PQ_PUSH_SHIFT_H_2: + for (ap_uint k = 0; k < (ap_uint)(shift_count); k++) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=31 +/** Set!: min=0 max=PP-1 **/ + p0 = p0 - 1; // source + p1 = p0 + 1; // destination + pq_nodes[p1] = pq_nodes[p0]; + } + pq_nodes[p0] = ((ap_uint<32>)data << PQ_PRIORITY_WIDTH) | (ap_uint<32>)priority; + *is_empty = false; return; + } + + ap_uint shift_count_i = shift_count >> LOG_PP; + ap_uint shift_count_j = shift_count - (shift_count_i << LOG_PP); + // Note: Bit adjustment is necessary + // when the values of PP & LOG_PP have been changed. + + ap_uint p[PP+1]; +#pragma HLS ARRAY_PARTITION variable=p complete dim=1 + p[0] = bottom_H * PP; + ap_uint<32> tmp[PP]; +#pragma HLS ARRAY_PARTITION variable=tmp complete dim=1 + PQ_PUSH_SHIFT_I: + for (ap_uint i = 0; i < (ap_uint)(shift_count_i); i++) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=255 +/** Set!: min=0 max=(MAX_PQ)/(PP)-1 **/ + p[0] = p[0] - PP; + for (ap_uint k = 1; k <= (ap_uint)(PP); k++) { +#pragma HLS UNROLL + p[k] = p[0] + k; + } + for (ap_uint k = 1; k <= (ap_uint)(PP); k++) { +#pragma HLS UNROLL + tmp[k-1] = pq_nodes[p[k-1]]; + } + for (ap_uint k = 1; k <= (ap_uint)(PP); k++) { +#pragma HLS UNROLL + pq_nodes[p[k]] = tmp[k-1]; + } + } + PQ_PUSH_SHIFT_J: + for (ap_uint j = 0; j < (ap_uint)(shift_count_j); j++) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=31 +/** Set!: min=0 max=PP-1 **/ + p[0] = p[0] - 1; + p[1] = p[0] + 1; + pq_nodes[p[1]] = pq_nodes[p[0]]; + } + pq_nodes[p[0]] = ((ap_uint<32>)data << PQ_PRIORITY_WIDTH) | (ap_uint<32>)priority; + *is_empty = false; +} + +// Queue pop (Dequeue) +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *top, ap_uint *bottom, bool *is_empty) { +#pragma HLS INLINE + + *ret_priority = (ap_uint<16>)(pq_nodes[(*top)] & PQ_PRIORITY_MASK); + *ret_data = (ap_uint<16>)(pq_nodes[(*top)] >> PQ_PRIORITY_WIDTH); + (*top)++; + if (((*bottom)-(*top)+1) == 0) { *is_empty = true; } +} diff --git a/hls_2018/router_02/router.hpp b/hls_2018/router_02/router.hpp new file mode 100755 index 0000000000000000000000000000000000000000..7a5b2412941fec9d64718355fbf5bed1e3c128be --- /dev/null +++ b/hls_2018/router_02/router.hpp @@ -0,0 +1,59 @@ +/** + * router.hpp + * + * for Vivado HLS + */ + +#ifndef __ROUTER_HPP__ +#define __ROUTER_HPP__ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +//#define DEBUG_PRINT // for debug + +#ifdef DEBUG_PRINT +using namespace std; +#endif + +// Parameters +#define MAX_WIDTH 72 // Max of X, Y +#define BITWIDTH_XY 13 +#define BITMASK_XY 65528 // 1111 1111 1111 1000 +#define MAX_LAYER 8 // Max of Z +#define BITWIDTH_Z 3 +#define BITMASK_Z 7 // 0000 0000 0000 0111 + +#define MAX_CELLS 41472 // Max #cells (16bit) +#define MAX_LINES 1024 // Max #lines (10bit) +#define MAX_PQ 8192 // Queue size (13bit) +#define MAX_BUFFER 16384 // Line buffer size (14bit) +#define CELL_BIT 16 +#define LINE_BIT 10 +#define PQ_BIT 13 +#define BUFF_BIT 14 + +#define PQ_PRIORITY_WIDTH 16 +#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111 +#define PQ_DATA_WIDTH 16 +#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000 + +#define MAX_WEIGHT 255 // Max weight +#define BOARDSTR_SIZE 41472 // Size of I/O + +#define PP 32 // Parameter for Parallel shifting in queue push +#define LOG_PP 5 // LOG_PP is log2(PP) + +ap_uint<8> new_weight(ap_uint<16> x); +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status); + +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b); +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b); +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_WEIGHT]); +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *top, ap_uint *bottom, bool *is_empty); +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *top, ap_uint *bottom, bool *is_empty); + +#endif /* __ROUTER_HPP__ */ diff --git a/hls_2018/router_03/router.cpp b/hls_2018/router_03/router.cpp new file mode 100755 index 0000000000000000000000000000000000000000..a2e01cbcd25f823316b1f718ad2ec9e4a4d2349d --- /dev/null +++ b/hls_2018/router_03/router.cpp @@ -0,0 +1,513 @@ +/** + * router.cpp + * + * for Vivado HLS + */ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +#include "./router.hpp" + +// Set weight +ap_uint<8> new_weight(ap_uint<16> x) { +#pragma HLS INLINE + // K. Terada: y = 1~32 (8bit) + ap_uint<8> y; + y = ((x & 255) >> 3) + 1; + return y; +} + + +// Global values +static ap_uint<7> size_x; // X +static ap_uint<7> size_y; // Y +static ap_uint<4> size_z; // Z + +static ap_uint line_num = 0; // #Lines + +#ifdef DEBUG_PRINT +int max_queue_length; // Max length of priority queue +int max_search_count; // Max count of queue pop +int max_buffer_length; // Max length of line buffer +#endif + + +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status) { +#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS + + // status(0:Solved, 1:Not solved) + *status = -1; + + // For all lines + ap_uint paths[MAX_BUFFER]; // Line buffer + + // For each line + // Note: Should not partition completely + bool adjacents[MAX_LINES]; // Line has adjacent terminals? + ap_uint starts[MAX_LINES]; // Start list + ap_uint goals[MAX_LINES]; // Goal list + ap_uint s_idx[MAX_LINES]; // Start point on line buffer + + ap_uint<8> weights[MAX_CELLS]; // Weight of each cell + // Note: Should not partition weight array + // since each element will be accessed in "random" order + + + // ================================ + // (Step.0) Initialization (BEGIN) + // ================================ + + // Note: Loop counter -> need an extra bit (for condition determination) + + INIT_WEIGHTS: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + weights[i] = 1; + } + + /// Parse /// + size_x = (boardstr[1] - '0') * 10 + (boardstr[2] - '0'); + size_y = (boardstr[4] - '0') * 10 + (boardstr[5] - '0'); + size_z = (boardstr[7] - '0'); + + INIT_BOARDS: + for (ap_uint idx = 8; idx < (ap_uint)(BOARDSTR_SIZE); idx+=11) { + + // NULL-terminated + if (boardstr[idx] == 0) break; + + // Start & Goal of each line + ap_uint<7> s_x = (boardstr[idx+1] - '0') * 10 + (boardstr[idx+2] - '0'); + ap_uint<7> s_y = (boardstr[idx+3] - '0') * 10 + (boardstr[idx+4] - '0'); + ap_uint<3> s_z = (boardstr[idx+5] - '0') - 1; + ap_uint<7> g_x = (boardstr[idx+6] - '0') * 10 + (boardstr[idx+7] - '0'); + ap_uint<7> g_y = (boardstr[idx+8] - '0') * 10 + (boardstr[idx+9] - '0'); + ap_uint<3> g_z = (boardstr[idx+10] - '0') - 1; + + ap_uint start_id = (((ap_uint)s_x * MAX_WIDTH + (ap_uint)s_y) << BITWIDTH_Z) | (ap_uint)s_z; + ap_uint goal_id = (((ap_uint)g_x * MAX_WIDTH + (ap_uint)g_y) << BITWIDTH_Z) | (ap_uint)g_z; + starts[line_num] = start_id; + goals[line_num] = goal_id; + weights[start_id] = MAX_WEIGHT; + weights[goal_id] = MAX_WEIGHT; + + // Line has adjacent terminals? + adjacents[line_num] = false; + ap_int<8> dx = (ap_int<8>)g_x - (ap_int<8>)s_x; // Min: -71, Max: 71 (Signed 8bit) + ap_int<8> dy = (ap_int<8>)g_y - (ap_int<8>)s_y; // Min: -71, Max: 71 (Signed 8bit) + ap_int<4> dz = (ap_int<4>)g_z - (ap_int<4>)s_z; // Min: -7, Max: 7 (Signed 4bit) + if ((dx == 0 && dy == 0 && (dz == 1 || dz == -1)) || (dx == 0 && (dy == 1 || dy == -1) && dz == 0) || ((dx == 1 || dx == -1) && dy == 0 && dz == 0)) { + adjacents[line_num] = true; + } + + line_num++; + } + + // ================================ + // (Step.0) Initialization (END) + // ================================ + +#ifdef DEBUG_PRINT + max_queue_length = 0; + max_search_count = 0; + max_buffer_length = 0; +#endif + + ap_uint pointer = 0; // Pointer for line buffer + + // ================================ + // (Step.1) Initial Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Initial Routing ..." << endl; +#endif + + FIRST_ROUTING: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + + s_idx[i] = pointer; + + if (adjacents[i] == true) continue; // Skip routing + +#ifdef DEBUG_PRINT + //cout << "LINE #" << (int)(i + 1) << endl; +#endif + // Routing + pointer = search(s_idx[i], paths, starts[i], goals[i], weights); + } + + // ================================ + // (Step.1) Initial Routing (END) + // ================================ + + + // Memories for Overlap Check + ap_uint<1> overlap_checks[MAX_CELLS]; + bool has_overlap = false; + + // ================================ + // (Step.2) Rip-up Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Rip-up Routing ..." << endl; +#endif + + ROUTING: + for (ap_uint<16> round = 0; round < 32768 /* = (2048 * 16) */; round++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=32768 + + // Target line + ap_uint target = round % line_num; + ap_uint next_target = target + 1; + if (next_target == line_num) next_target = 0; + +#ifdef DEBUG_PRINT + cout << "(round " << round << ") LINE #" << (int)(target + 1); + cout << " -> " << pointer << endl; +#endif +#ifdef DEBUG_PRINT + int buffer_length = pointer - s_idx[target]; + if (max_buffer_length < buffer_length) { max_buffer_length = buffer_length; } +#endif + + // Skip routing + if (adjacents[target] == true) { + s_idx[target] = pointer; continue; + } + + + // (Step.2-1) Reset weights of target line + WEIGHT_RESET: + for (ap_uint j = s_idx[target]; j != s_idx[next_target]; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + weights[paths[j]] = 1; + } + + // (Step.2-2) Set weights of non-target lines and terminals + ap_uint<8> current_round_weight = new_weight(round); + WEIGHT_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + weights[paths[j]] = current_round_weight; + } + WEIGHT_TERMINAL: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + weights[starts[i]] = MAX_WEIGHT; + weights[goals[i]] = MAX_WEIGHT; + } + // Reset weight of start terminal of target line (bug avoiding) + // Restore original settings in (*) + weights[starts[target]] = 1; + + // (Step.2-3) Routing + s_idx[target] = pointer; + pointer = search(s_idx[target], paths, starts[target], goals[target], weights); + + // (*) + weights[starts[target]] = MAX_WEIGHT; + +#ifdef DEBUG_PRINT + bool ng = false; + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { + if (weights[starts[i]] != 255 || weights[goals[i]] != 255) { + cout << i << " "; ng = true; + } + } + if(ng) { cout << endl; } +#endif + + // (Step.2-4) Overlap check + has_overlap = false; + OVERLAP_RESET: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + overlap_checks[i] = 0; + } + OVERLAP_CHECK_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + overlap_checks[starts[i]] = 1; + overlap_checks[goals[i]] = 1; + } + OVERLAP_CHECK_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + ap_uint cell_id = paths[j]; + if (overlap_checks[cell_id]) { + has_overlap = true; break; + } + overlap_checks[cell_id] = 1; + } +#ifdef DEBUG_PRINT + if(!has_overlap){ cout << "ROUND: " << round << endl; } +#endif + if (!has_overlap) break; // Finish routing? + } + +#ifdef DEBUG_PRINT + cout << "MAX PQ LENGTH: " << max_queue_length << endl; + cout << "MAX SEARCH COUNT: " << max_search_count << endl; + cout << "MAX BUFFER: " << max_buffer_length << endl; +#endif + + // Not solved + if (has_overlap) { + *status = 1; return false; + } + + // ================================ + // (Step.2) Rip-up Routing (END) + // ================================ + + + // ================================ + // (Step.3) Output (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Output ..." << endl; +#endif + + // Init: Blank = 0 + OUTPUT_INIT: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + boardstr[i] = 0; + } + // Line + OUTPUT_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + boardstr[starts[i]] = (i + 1); + boardstr[goals[i]] = (i + 1); + + ap_uint p1; // p1: s_idx of target + ap_uint p2; // p2: s_idx of next target + p1 = s_idx[i]; + if (i == (ap_uint)(line_num-1)) { + p2 = s_idx[0]; + } + else { + p2 = s_idx[i+1]; + } + if ((ap_uint)(p2 - p1) > 8192){ + p2 = pointer; + } + OUTPUT_LINE_PATH: + for (ap_uint j = p1; j != p2; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + boardstr[paths[j]] = (i + 1); + } + } + + // ================================ + // (Step.3) Output (END) + // ================================ + + *status = 0; return true; +} + + +// ================================ // +// For Routing +// ================================ // + +// Max: 71, Min: 0 (7bit) +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} +// Max: 7, Min: 0 (3bit) +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} + +// Reference codes: +// http://lethe2211.hatenablog.com/entry/2014/12/30/011030 +// http://www.redblobgames.com/pathfinding/a-star/implementation.html +// Need to modify "array partition factor" +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_CELLS]) { + + ap_uint dist[MAX_CELLS]; + ap_uint prev[MAX_CELLS]; + + SEARCH_INIT_DIST: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + dist[i] = 65535; // = (2^16 - 1) + } + + // Priority queue (Heap) + ap_uint pq_len = 0; + bool is_empty = true; + ap_uint<32> pq_nodes[MAX_PQ]; + +#ifdef DEBUG_PRINT + int queue_length = 0; + int search_count = 0; +#endif + + // Point of goal terminal + ap_uint<13> goal_xy = (ap_uint<13>)(goal >> BITWIDTH_Z); + ap_uint<7> goal_x = (ap_uint<7>)(goal_xy / MAX_WIDTH); + ap_uint<7> goal_y = (ap_uint<7>)(goal_xy - goal_x * MAX_WIDTH); + ap_uint<3> goal_z = (ap_uint<3>)(goal & BITMASK_Z); + + dist[start] = 0; + pq_push(pq_nodes, 0, start, &pq_len, &is_empty); // push start terminal + + SEARCH_PQ: + while (!is_empty) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=1000 +#pragma HLS LOOP_FLATTEN off + + ap_uint<16> prev_cost; + ap_uint<16> src; // target cell + pq_pop(pq_nodes, &prev_cost, &src, &pq_len, &is_empty); +#ifdef DEBUG_PRINT + search_count++; +#endif + + + // End routing + if (src == goal) break; + + + // Target cell + ap_uint<16> dist_src = dist[src]; + ap_uint<8> cost = w[src]; + // Point of target cell + ap_uint<13> src_xy = (ap_uint<13>)(src >> BITWIDTH_Z); + ap_uint<7> src_x = (ap_uint<7>)(src_xy / MAX_WIDTH); + ap_uint<7> src_y = (ap_uint<7>)(src_xy - src_x * MAX_WIDTH); + ap_uint<3> src_z = (ap_uint<3>)(src & BITMASK_Z); + + // Search adjacent cells + SEARCH_ADJACENTS: + for (ap_uint<3> a = 0; a < 6; a++) { + ap_int<8> dest_x = (ap_int<8>)src_x; // Min: -1, Max: 72 (Signed 8bit) + ap_int<8> dest_y = (ap_int<8>)src_y; // Min: -1, Max: 72 (Signed 8bit) + ap_int<5> dest_z = (ap_int<5>)src_z; // Min: -1, Max: 8 (Signed 5bit) + if (a == 0) { dest_x -= 1; } + if (a == 1) { dest_x += 1; } + if (a == 2) { dest_y -= 1; } + if (a == 3) { dest_y += 1; } + if (a == 4) { dest_z -= 1; } + if (a == 5) { dest_z += 1; } + + // Inside the board ? // + if (0 <= dest_x && dest_x < (ap_int<8>)size_x && 0 <= dest_y && dest_y < (ap_int<8>)size_y && 0 <= dest_z && dest_z < (ap_int<5>)size_z) { + // Adjacent cell + ap_uint<16> dest = (((ap_uint<16>)dest_x * MAX_WIDTH + (ap_uint<16>)dest_y) << BITWIDTH_Z) | (ap_uint<16>)dest_z; + ap_uint<16> dist_new = dist_src + cost; + + if (dist[dest] > dist_new) { + dist[dest] = dist_new; // Update dist + prev[dest] = src; // Recode previous cell + dist_new += abs_uint7(dest_x, goal_x) + abs_uint7(dest_y, goal_y) + abs_uint3(dest_z, goal_z); // A* heuristic + pq_push(pq_nodes, dist_new, dest, &pq_len, &is_empty); // push adjacent cell + } + } + } +#ifdef DEBUG_PRINT + if (queue_length < pq_len) { queue_length = pq_len; } +#endif + } + + // Output target path + // Note: Do not include start & goal terminals + ap_uint<16> t = prev[goal]; + + // Backtracking + ap_uint p = idx; // buffer-idx + SEARCH_BACKTRACK: + while (t != start) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + paths[p] = t; + p++; + t = prev[t]; + } + +#ifdef DEBUG_PRINT + if (max_queue_length < queue_length) { max_queue_length = queue_length; } + if (max_search_count < search_count) { max_search_count = search_count; } +#endif + + return p; +} + +// Queue push (Enqueue) +// Need to modify "trip count" (1) +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *pq_len, bool *is_empty) { +#pragma HLS INLINE + + (*pq_len)++; + if ((*pq_len) == 0) { (*pq_len)--; } // Queue is full -> Last element is automatically removed + + // Binary search for circular list + ap_uint i = (*pq_len); + ap_uint p = (*pq_len) >> 1; // parent node + PQ_PUSH_LOOP: + while (i > 1 && (ap_uint<16>)(pq_nodes[p] & PQ_PRIORITY_MASK) >= priority) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=16 +/** Set!: min=0 max=PQ_BIT **/ + pq_nodes[i] = pq_nodes[p]; + i = p; + p = p >> 1; // parent node + } + pq_nodes[i] = ((ap_uint<32>)data << PQ_PRIORITY_WIDTH) | (ap_uint<32>)priority; + *is_empty = false; +} + +// Queue pop (Dequeue) +// Need to modify "trip count" (1) +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *pq_len, bool *is_empty) { +#pragma HLS INLINE + + *ret_priority = (ap_uint<16>)(pq_nodes[1] & PQ_PRIORITY_MASK); + *ret_data = (ap_uint<16>)(pq_nodes[1] >> PQ_PRIORITY_WIDTH); + + ap_uint i = 1; // root node + ap_uint last_priority = (ap_uint<16>)(pq_nodes[*pq_len] & PQ_PRIORITY_MASK); // Priority of last element + + PQ_POP_LOOP: + while (1) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=16 +/** Set!: min=0 max=PQ_BIT **/ + ap_uint c1 = i << 1; // child node(left) + ap_uint c2 = c1 + 1; // child node(right) + if (c1 < *pq_len && (ap_uint<16>)(pq_nodes[c1] & PQ_PRIORITY_MASK) <= last_priority) { + if (c2 < *pq_len && (ap_uint<16>)(pq_nodes[c2] & PQ_PRIORITY_MASK) <= (ap_uint<16>)(pq_nodes[c1] & PQ_PRIORITY_MASK)) { + pq_nodes[i] = pq_nodes[c2]; + i = c2; + } + else { + pq_nodes[i] = pq_nodes[c1]; + i = c1; + } + } + else { + if (c2 < *pq_len && (ap_uint<16>)(pq_nodes[c2] & PQ_PRIORITY_MASK) <= last_priority) { + pq_nodes[i] = pq_nodes[c2]; + i = c2; + } + else { + break; + } + } + } + pq_nodes[i] = pq_nodes[*pq_len]; + (*pq_len)--; + if ((*pq_len) == 0) { *is_empty = true; } +} + diff --git a/hls_2018/router_03/router.hpp b/hls_2018/router_03/router.hpp new file mode 100755 index 0000000000000000000000000000000000000000..0f523d43f21f508130a3d322cea91ed66fb6f54c --- /dev/null +++ b/hls_2018/router_03/router.hpp @@ -0,0 +1,56 @@ +/** + * router.hpp + * + * for Vivado HLS + */ + +#ifndef __ROUTER_HPP__ +#define __ROUTER_HPP__ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +//#define DEBUG_PRINT // for debug + +#ifdef DEBUG_PRINT +using namespace std; +#endif + +// Parameters +#define MAX_WIDTH 72 // Max of X, Y +#define BITWIDTH_XY 13 +#define BITMASK_XY 65528 // 1111 1111 1111 1000 +#define MAX_LAYER 8 // Max of Z +#define BITWIDTH_Z 3 +#define BITMASK_Z 7 // 0000 0000 0000 0111 + +#define MAX_CELLS 41472 // Max #cells (16bit) +#define MAX_LINES 1024 // Max #lines (10bit) +#define MAX_PQ 32768 // Queue size (15bit) +#define MAX_BUFFER 16384 // Line buffer size (14bit) +#define CELL_BIT 16 +#define LINE_BIT 10 +#define PQ_BIT 15 +#define BUFF_BIT 14 + +#define PQ_PRIORITY_WIDTH 16 +#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111 +#define PQ_DATA_WIDTH 16 +#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000 + +#define MAX_WEIGHT 255 // Max weight +#define BOARDSTR_SIZE 41472 // Size of I/O + +ap_uint<8> new_weight(ap_uint<16> x); +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status); + +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b); +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b); +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_WEIGHT]); +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *pq_len, bool *is_empty); +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *pq_len, bool *is_empty); + +#endif /* __ROUTER_HPP__ */ diff --git a/hls_2018/router_04/router.cpp b/hls_2018/router_04/router.cpp new file mode 100755 index 0000000000000000000000000000000000000000..a2e01cbcd25f823316b1f718ad2ec9e4a4d2349d --- /dev/null +++ b/hls_2018/router_04/router.cpp @@ -0,0 +1,513 @@ +/** + * router.cpp + * + * for Vivado HLS + */ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +#include "./router.hpp" + +// Set weight +ap_uint<8> new_weight(ap_uint<16> x) { +#pragma HLS INLINE + // K. Terada: y = 1~32 (8bit) + ap_uint<8> y; + y = ((x & 255) >> 3) + 1; + return y; +} + + +// Global values +static ap_uint<7> size_x; // X +static ap_uint<7> size_y; // Y +static ap_uint<4> size_z; // Z + +static ap_uint line_num = 0; // #Lines + +#ifdef DEBUG_PRINT +int max_queue_length; // Max length of priority queue +int max_search_count; // Max count of queue pop +int max_buffer_length; // Max length of line buffer +#endif + + +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status) { +#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS +#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS + + // status(0:Solved, 1:Not solved) + *status = -1; + + // For all lines + ap_uint paths[MAX_BUFFER]; // Line buffer + + // For each line + // Note: Should not partition completely + bool adjacents[MAX_LINES]; // Line has adjacent terminals? + ap_uint starts[MAX_LINES]; // Start list + ap_uint goals[MAX_LINES]; // Goal list + ap_uint s_idx[MAX_LINES]; // Start point on line buffer + + ap_uint<8> weights[MAX_CELLS]; // Weight of each cell + // Note: Should not partition weight array + // since each element will be accessed in "random" order + + + // ================================ + // (Step.0) Initialization (BEGIN) + // ================================ + + // Note: Loop counter -> need an extra bit (for condition determination) + + INIT_WEIGHTS: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + weights[i] = 1; + } + + /// Parse /// + size_x = (boardstr[1] - '0') * 10 + (boardstr[2] - '0'); + size_y = (boardstr[4] - '0') * 10 + (boardstr[5] - '0'); + size_z = (boardstr[7] - '0'); + + INIT_BOARDS: + for (ap_uint idx = 8; idx < (ap_uint)(BOARDSTR_SIZE); idx+=11) { + + // NULL-terminated + if (boardstr[idx] == 0) break; + + // Start & Goal of each line + ap_uint<7> s_x = (boardstr[idx+1] - '0') * 10 + (boardstr[idx+2] - '0'); + ap_uint<7> s_y = (boardstr[idx+3] - '0') * 10 + (boardstr[idx+4] - '0'); + ap_uint<3> s_z = (boardstr[idx+5] - '0') - 1; + ap_uint<7> g_x = (boardstr[idx+6] - '0') * 10 + (boardstr[idx+7] - '0'); + ap_uint<7> g_y = (boardstr[idx+8] - '0') * 10 + (boardstr[idx+9] - '0'); + ap_uint<3> g_z = (boardstr[idx+10] - '0') - 1; + + ap_uint start_id = (((ap_uint)s_x * MAX_WIDTH + (ap_uint)s_y) << BITWIDTH_Z) | (ap_uint)s_z; + ap_uint goal_id = (((ap_uint)g_x * MAX_WIDTH + (ap_uint)g_y) << BITWIDTH_Z) | (ap_uint)g_z; + starts[line_num] = start_id; + goals[line_num] = goal_id; + weights[start_id] = MAX_WEIGHT; + weights[goal_id] = MAX_WEIGHT; + + // Line has adjacent terminals? + adjacents[line_num] = false; + ap_int<8> dx = (ap_int<8>)g_x - (ap_int<8>)s_x; // Min: -71, Max: 71 (Signed 8bit) + ap_int<8> dy = (ap_int<8>)g_y - (ap_int<8>)s_y; // Min: -71, Max: 71 (Signed 8bit) + ap_int<4> dz = (ap_int<4>)g_z - (ap_int<4>)s_z; // Min: -7, Max: 7 (Signed 4bit) + if ((dx == 0 && dy == 0 && (dz == 1 || dz == -1)) || (dx == 0 && (dy == 1 || dy == -1) && dz == 0) || ((dx == 1 || dx == -1) && dy == 0 && dz == 0)) { + adjacents[line_num] = true; + } + + line_num++; + } + + // ================================ + // (Step.0) Initialization (END) + // ================================ + +#ifdef DEBUG_PRINT + max_queue_length = 0; + max_search_count = 0; + max_buffer_length = 0; +#endif + + ap_uint pointer = 0; // Pointer for line buffer + + // ================================ + // (Step.1) Initial Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Initial Routing ..." << endl; +#endif + + FIRST_ROUTING: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + + s_idx[i] = pointer; + + if (adjacents[i] == true) continue; // Skip routing + +#ifdef DEBUG_PRINT + //cout << "LINE #" << (int)(i + 1) << endl; +#endif + // Routing + pointer = search(s_idx[i], paths, starts[i], goals[i], weights); + } + + // ================================ + // (Step.1) Initial Routing (END) + // ================================ + + + // Memories for Overlap Check + ap_uint<1> overlap_checks[MAX_CELLS]; + bool has_overlap = false; + + // ================================ + // (Step.2) Rip-up Routing (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Rip-up Routing ..." << endl; +#endif + + ROUTING: + for (ap_uint<16> round = 0; round < 32768 /* = (2048 * 16) */; round++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=32768 + + // Target line + ap_uint target = round % line_num; + ap_uint next_target = target + 1; + if (next_target == line_num) next_target = 0; + +#ifdef DEBUG_PRINT + cout << "(round " << round << ") LINE #" << (int)(target + 1); + cout << " -> " << pointer << endl; +#endif +#ifdef DEBUG_PRINT + int buffer_length = pointer - s_idx[target]; + if (max_buffer_length < buffer_length) { max_buffer_length = buffer_length; } +#endif + + // Skip routing + if (adjacents[target] == true) { + s_idx[target] = pointer; continue; + } + + + // (Step.2-1) Reset weights of target line + WEIGHT_RESET: + for (ap_uint j = s_idx[target]; j != s_idx[next_target]; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + weights[paths[j]] = 1; + } + + // (Step.2-2) Set weights of non-target lines and terminals + ap_uint<8> current_round_weight = new_weight(round); + WEIGHT_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + weights[paths[j]] = current_round_weight; + } + WEIGHT_TERMINAL: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + weights[starts[i]] = MAX_WEIGHT; + weights[goals[i]] = MAX_WEIGHT; + } + // Reset weight of start terminal of target line (bug avoiding) + // Restore original settings in (*) + weights[starts[target]] = 1; + + // (Step.2-3) Routing + s_idx[target] = pointer; + pointer = search(s_idx[target], paths, starts[target], goals[target], weights); + + // (*) + weights[starts[target]] = MAX_WEIGHT; + +#ifdef DEBUG_PRINT + bool ng = false; + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { + if (weights[starts[i]] != 255 || weights[goals[i]] != 255) { + cout << i << " "; ng = true; + } + } + if(ng) { cout << endl; } +#endif + + // (Step.2-4) Overlap check + has_overlap = false; + OVERLAP_RESET: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + overlap_checks[i] = 0; + } + OVERLAP_CHECK_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + overlap_checks[starts[i]] = 1; + overlap_checks[goals[i]] = 1; + } + OVERLAP_CHECK_PATH: + for (ap_uint j = s_idx[next_target]; j != pointer; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=8192 + ap_uint cell_id = paths[j]; + if (overlap_checks[cell_id]) { + has_overlap = true; break; + } + overlap_checks[cell_id] = 1; + } +#ifdef DEBUG_PRINT + if(!has_overlap){ cout << "ROUND: " << round << endl; } +#endif + if (!has_overlap) break; // Finish routing? + } + +#ifdef DEBUG_PRINT + cout << "MAX PQ LENGTH: " << max_queue_length << endl; + cout << "MAX SEARCH COUNT: " << max_search_count << endl; + cout << "MAX BUFFER: " << max_buffer_length << endl; +#endif + + // Not solved + if (has_overlap) { + *status = 1; return false; + } + + // ================================ + // (Step.2) Rip-up Routing (END) + // ================================ + + + // ================================ + // (Step.3) Output (BEGIN) + // ================================ + +#ifdef DEBUG_PRINT + cout << "Output ..." << endl; +#endif + + // Init: Blank = 0 + OUTPUT_INIT: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + boardstr[i] = 0; + } + // Line + OUTPUT_LINE: + for (ap_uint i = 0; i < (ap_uint)(line_num); i++) { +#pragma HLS LOOP_TRIPCOUNT min=2 max=999 + boardstr[starts[i]] = (i + 1); + boardstr[goals[i]] = (i + 1); + + ap_uint p1; // p1: s_idx of target + ap_uint p2; // p2: s_idx of next target + p1 = s_idx[i]; + if (i == (ap_uint)(line_num-1)) { + p2 = s_idx[0]; + } + else { + p2 = s_idx[i+1]; + } + if ((ap_uint)(p2 - p1) > 8192){ + p2 = pointer; + } + OUTPUT_LINE_PATH: + for (ap_uint j = p1; j != p2; j++) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + boardstr[paths[j]] = (i + 1); + } + } + + // ================================ + // (Step.3) Output (END) + // ================================ + + *status = 0; return true; +} + + +// ================================ // +// For Routing +// ================================ // + +// Max: 71, Min: 0 (7bit) +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} +// Max: 7, Min: 0 (3bit) +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b) { +#pragma HLS INLINE + if (a < b) { return b - a; } + else { return a - b; } +} + +// Reference codes: +// http://lethe2211.hatenablog.com/entry/2014/12/30/011030 +// http://www.redblobgames.com/pathfinding/a-star/implementation.html +// Need to modify "array partition factor" +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_CELLS]) { + + ap_uint dist[MAX_CELLS]; + ap_uint prev[MAX_CELLS]; + + SEARCH_INIT_DIST: + for (ap_uint i = 0; i < (ap_uint)(MAX_CELLS); i++) { + dist[i] = 65535; // = (2^16 - 1) + } + + // Priority queue (Heap) + ap_uint pq_len = 0; + bool is_empty = true; + ap_uint<32> pq_nodes[MAX_PQ]; + +#ifdef DEBUG_PRINT + int queue_length = 0; + int search_count = 0; +#endif + + // Point of goal terminal + ap_uint<13> goal_xy = (ap_uint<13>)(goal >> BITWIDTH_Z); + ap_uint<7> goal_x = (ap_uint<7>)(goal_xy / MAX_WIDTH); + ap_uint<7> goal_y = (ap_uint<7>)(goal_xy - goal_x * MAX_WIDTH); + ap_uint<3> goal_z = (ap_uint<3>)(goal & BITMASK_Z); + + dist[start] = 0; + pq_push(pq_nodes, 0, start, &pq_len, &is_empty); // push start terminal + + SEARCH_PQ: + while (!is_empty) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=1000 +#pragma HLS LOOP_FLATTEN off + + ap_uint<16> prev_cost; + ap_uint<16> src; // target cell + pq_pop(pq_nodes, &prev_cost, &src, &pq_len, &is_empty); +#ifdef DEBUG_PRINT + search_count++; +#endif + + + // End routing + if (src == goal) break; + + + // Target cell + ap_uint<16> dist_src = dist[src]; + ap_uint<8> cost = w[src]; + // Point of target cell + ap_uint<13> src_xy = (ap_uint<13>)(src >> BITWIDTH_Z); + ap_uint<7> src_x = (ap_uint<7>)(src_xy / MAX_WIDTH); + ap_uint<7> src_y = (ap_uint<7>)(src_xy - src_x * MAX_WIDTH); + ap_uint<3> src_z = (ap_uint<3>)(src & BITMASK_Z); + + // Search adjacent cells + SEARCH_ADJACENTS: + for (ap_uint<3> a = 0; a < 6; a++) { + ap_int<8> dest_x = (ap_int<8>)src_x; // Min: -1, Max: 72 (Signed 8bit) + ap_int<8> dest_y = (ap_int<8>)src_y; // Min: -1, Max: 72 (Signed 8bit) + ap_int<5> dest_z = (ap_int<5>)src_z; // Min: -1, Max: 8 (Signed 5bit) + if (a == 0) { dest_x -= 1; } + if (a == 1) { dest_x += 1; } + if (a == 2) { dest_y -= 1; } + if (a == 3) { dest_y += 1; } + if (a == 4) { dest_z -= 1; } + if (a == 5) { dest_z += 1; } + + // Inside the board ? // + if (0 <= dest_x && dest_x < (ap_int<8>)size_x && 0 <= dest_y && dest_y < (ap_int<8>)size_y && 0 <= dest_z && dest_z < (ap_int<5>)size_z) { + // Adjacent cell + ap_uint<16> dest = (((ap_uint<16>)dest_x * MAX_WIDTH + (ap_uint<16>)dest_y) << BITWIDTH_Z) | (ap_uint<16>)dest_z; + ap_uint<16> dist_new = dist_src + cost; + + if (dist[dest] > dist_new) { + dist[dest] = dist_new; // Update dist + prev[dest] = src; // Recode previous cell + dist_new += abs_uint7(dest_x, goal_x) + abs_uint7(dest_y, goal_y) + abs_uint3(dest_z, goal_z); // A* heuristic + pq_push(pq_nodes, dist_new, dest, &pq_len, &is_empty); // push adjacent cell + } + } + } +#ifdef DEBUG_PRINT + if (queue_length < pq_len) { queue_length = pq_len; } +#endif + } + + // Output target path + // Note: Do not include start & goal terminals + ap_uint<16> t = prev[goal]; + + // Backtracking + ap_uint p = idx; // buffer-idx + SEARCH_BACKTRACK: + while (t != start) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=256 + paths[p] = t; + p++; + t = prev[t]; + } + +#ifdef DEBUG_PRINT + if (max_queue_length < queue_length) { max_queue_length = queue_length; } + if (max_search_count < search_count) { max_search_count = search_count; } +#endif + + return p; +} + +// Queue push (Enqueue) +// Need to modify "trip count" (1) +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *pq_len, bool *is_empty) { +#pragma HLS INLINE + + (*pq_len)++; + if ((*pq_len) == 0) { (*pq_len)--; } // Queue is full -> Last element is automatically removed + + // Binary search for circular list + ap_uint i = (*pq_len); + ap_uint p = (*pq_len) >> 1; // parent node + PQ_PUSH_LOOP: + while (i > 1 && (ap_uint<16>)(pq_nodes[p] & PQ_PRIORITY_MASK) >= priority) { +#pragma HLS LOOP_TRIPCOUNT min=0 max=16 +/** Set!: min=0 max=PQ_BIT **/ + pq_nodes[i] = pq_nodes[p]; + i = p; + p = p >> 1; // parent node + } + pq_nodes[i] = ((ap_uint<32>)data << PQ_PRIORITY_WIDTH) | (ap_uint<32>)priority; + *is_empty = false; +} + +// Queue pop (Dequeue) +// Need to modify "trip count" (1) +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *pq_len, bool *is_empty) { +#pragma HLS INLINE + + *ret_priority = (ap_uint<16>)(pq_nodes[1] & PQ_PRIORITY_MASK); + *ret_data = (ap_uint<16>)(pq_nodes[1] >> PQ_PRIORITY_WIDTH); + + ap_uint i = 1; // root node + ap_uint last_priority = (ap_uint<16>)(pq_nodes[*pq_len] & PQ_PRIORITY_MASK); // Priority of last element + + PQ_POP_LOOP: + while (1) { +#pragma HLS LOOP_TRIPCOUNT min=1 max=16 +/** Set!: min=0 max=PQ_BIT **/ + ap_uint c1 = i << 1; // child node(left) + ap_uint c2 = c1 + 1; // child node(right) + if (c1 < *pq_len && (ap_uint<16>)(pq_nodes[c1] & PQ_PRIORITY_MASK) <= last_priority) { + if (c2 < *pq_len && (ap_uint<16>)(pq_nodes[c2] & PQ_PRIORITY_MASK) <= (ap_uint<16>)(pq_nodes[c1] & PQ_PRIORITY_MASK)) { + pq_nodes[i] = pq_nodes[c2]; + i = c2; + } + else { + pq_nodes[i] = pq_nodes[c1]; + i = c1; + } + } + else { + if (c2 < *pq_len && (ap_uint<16>)(pq_nodes[c2] & PQ_PRIORITY_MASK) <= last_priority) { + pq_nodes[i] = pq_nodes[c2]; + i = c2; + } + else { + break; + } + } + } + pq_nodes[i] = pq_nodes[*pq_len]; + (*pq_len)--; + if ((*pq_len) == 0) { *is_empty = true; } +} + diff --git a/hls_2018/router_04/router.hpp b/hls_2018/router_04/router.hpp new file mode 100755 index 0000000000000000000000000000000000000000..0f523d43f21f508130a3d322cea91ed66fb6f54c --- /dev/null +++ b/hls_2018/router_04/router.hpp @@ -0,0 +1,56 @@ +/** + * router.hpp + * + * for Vivado HLS + */ + +#ifndef __ROUTER_HPP__ +#define __ROUTER_HPP__ + +#ifdef SOFTWARE +#include "ap_int.h" +#else +#include +#endif + +//#define DEBUG_PRINT // for debug + +#ifdef DEBUG_PRINT +using namespace std; +#endif + +// Parameters +#define MAX_WIDTH 72 // Max of X, Y +#define BITWIDTH_XY 13 +#define BITMASK_XY 65528 // 1111 1111 1111 1000 +#define MAX_LAYER 8 // Max of Z +#define BITWIDTH_Z 3 +#define BITMASK_Z 7 // 0000 0000 0000 0111 + +#define MAX_CELLS 41472 // Max #cells (16bit) +#define MAX_LINES 1024 // Max #lines (10bit) +#define MAX_PQ 32768 // Queue size (15bit) +#define MAX_BUFFER 16384 // Line buffer size (14bit) +#define CELL_BIT 16 +#define LINE_BIT 10 +#define PQ_BIT 15 +#define BUFF_BIT 14 + +#define PQ_PRIORITY_WIDTH 16 +#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111 +#define PQ_DATA_WIDTH 16 +#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000 + +#define MAX_WEIGHT 255 // Max weight +#define BOARDSTR_SIZE 41472 // Size of I/O + +ap_uint<8> new_weight(ap_uint<16> x); +bool pynqrouter(char boardstr[BOARDSTR_SIZE], ap_uint<32> seed, ap_int<32> *status); + +ap_uint<7> abs_uint7(ap_uint<7> a, ap_uint<7> b); +ap_uint<3> abs_uint3(ap_uint<3> a, ap_uint<3> b); +ap_uint search(ap_uint idx, ap_uint paths[MAX_BUFFER], ap_uint start, ap_uint goal, ap_uint<8> w[MAX_WEIGHT]); +void pq_push(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> priority, ap_uint<16> data, ap_uint *pq_len, bool *is_empty); +void pq_pop(ap_uint<32> pq_nodes[MAX_PQ], ap_uint<16> *ret_priority, ap_uint<16> *ret_data, ap_uint *pq_len, bool *is_empty); + +#endif /* __ROUTER_HPP__ */