Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
adc2018-system
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
adc2018
adc2018-system
Commits
98afc37f
Commit
98afc37f
authored
Aug 24, 2018
by
kazushi.kawamura
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add files
parent
51ae9171
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
2309 additions
and
0 deletions
+2309
-0
router.cpp
hls_2018/router_01/router.cpp
+499
-0
router.hpp
hls_2018/router_01/router.hpp
+56
-0
router.cpp
hls_2018/router_02/router.cpp
+557
-0
router.hpp
hls_2018/router_02/router.hpp
+59
-0
router.cpp
hls_2018/router_03/router.cpp
+513
-0
router.hpp
hls_2018/router_03/router.hpp
+56
-0
router.cpp
hls_2018/router_04/router.cpp
+513
-0
router.hpp
hls_2018/router_04/router.hpp
+56
-0
No files found.
hls_2018/router_01/router.cpp
0 → 100755
View file @
98afc37f
/**
* router.cpp
*
* for Vivado HLS
*/
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
#include "./router.hpp"
// Set weight
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
)
{
#pragma HLS INLINE
// K. Terada: y = 1~32 (8bit)
ap_uint
<
8
>
y
;
y
=
((
x
&
255
)
>>
3
)
+
1
;
return
y
;
}
// Global values
static
ap_uint
<
7
>
size_x
;
// X
static
ap_uint
<
7
>
size_y
;
// Y
static
ap_uint
<
4
>
size_z
;
// Z
static
ap_uint
<
LINE_BIT
>
line_num
=
0
;
// #Lines
#ifdef DEBUG_PRINT
int
max_queue_length
;
// Max length of priority queue
int
max_search_count
;
// Max count of queue pop
int
max_buffer_length
;
// Max length of line buffer
#endif
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
)
{
#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS
// status(0:Solved, 1:Not solved)
*
status
=
-
1
;
// For all lines
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
];
// Line buffer
// For each line
// Note: Should not partition completely
bool
adjacents
[
MAX_LINES
];
// Line has adjacent terminals?
ap_uint
<
CELL_BIT
>
starts
[
MAX_LINES
];
// Start list
ap_uint
<
CELL_BIT
>
goals
[
MAX_LINES
];
// Goal list
ap_uint
<
BUFF_BIT
>
s_idx
[
MAX_LINES
];
// Start point on line buffer
ap_uint
<
8
>
weights
[
MAX_CELLS
];
// Weight of each cell
// Note: Should not partition weight array
// since each element will be accessed in "random" order
// ================================
// (Step.0) Initialization (BEGIN)
// ================================
// Note: Loop counter -> need an extra bit (for condition determination)
INIT_WEIGHTS:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
weights
[
i
]
=
1
;
}
/// Parse ///
size_x
=
(
boardstr
[
1
]
-
'0'
)
*
10
+
(
boardstr
[
2
]
-
'0'
);
size_y
=
(
boardstr
[
4
]
-
'0'
)
*
10
+
(
boardstr
[
5
]
-
'0'
);
size_z
=
(
boardstr
[
7
]
-
'0'
);
INIT_BOARDS:
for
(
ap_uint
<
CELL_BIT
>
idx
=
8
;
idx
<
(
ap_uint
<
CELL_BIT
>
)(
BOARDSTR_SIZE
);
idx
+=
11
)
{
// NULL-terminated
if
(
boardstr
[
idx
]
==
0
)
break
;
// Start & Goal of each line
ap_uint
<
7
>
s_x
=
(
boardstr
[
idx
+
1
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
2
]
-
'0'
);
ap_uint
<
7
>
s_y
=
(
boardstr
[
idx
+
3
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
4
]
-
'0'
);
ap_uint
<
3
>
s_z
=
(
boardstr
[
idx
+
5
]
-
'0'
)
-
1
;
ap_uint
<
7
>
g_x
=
(
boardstr
[
idx
+
6
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
7
]
-
'0'
);
ap_uint
<
7
>
g_y
=
(
boardstr
[
idx
+
8
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
9
]
-
'0'
);
ap_uint
<
3
>
g_z
=
(
boardstr
[
idx
+
10
]
-
'0'
)
-
1
;
ap_uint
<
CELL_BIT
>
start_id
=
(((
ap_uint
<
CELL_BIT
>
)
s_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
s_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
s_z
;
ap_uint
<
CELL_BIT
>
goal_id
=
(((
ap_uint
<
CELL_BIT
>
)
g_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
g_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
g_z
;
starts
[
line_num
]
=
start_id
;
goals
[
line_num
]
=
goal_id
;
weights
[
start_id
]
=
MAX_WEIGHT
;
weights
[
goal_id
]
=
MAX_WEIGHT
;
// Line has adjacent terminals?
adjacents
[
line_num
]
=
false
;
ap_int
<
8
>
dx
=
(
ap_int
<
8
>
)
g_x
-
(
ap_int
<
8
>
)
s_x
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
8
>
dy
=
(
ap_int
<
8
>
)
g_y
-
(
ap_int
<
8
>
)
s_y
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
4
>
dz
=
(
ap_int
<
4
>
)
g_z
-
(
ap_int
<
4
>
)
s_z
;
// Min: -7, Max: 7 (Signed 4bit)
if
((
dx
==
0
&&
dy
==
0
&&
(
dz
==
1
||
dz
==
-
1
))
||
(
dx
==
0
&&
(
dy
==
1
||
dy
==
-
1
)
&&
dz
==
0
)
||
((
dx
==
1
||
dx
==
-
1
)
&&
dy
==
0
&&
dz
==
0
))
{
adjacents
[
line_num
]
=
true
;
}
line_num
++
;
}
// ================================
// (Step.0) Initialization (END)
// ================================
#ifdef DEBUG_PRINT
max_queue_length
=
0
;
max_search_count
=
0
;
max_buffer_length
=
0
;
#endif
ap_uint
<
BUFF_BIT
>
pointer
=
0
;
// Pointer for line buffer
// ================================
// (Step.1) Initial Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Initial Routing ..."
<<
endl
;
#endif
FIRST_ROUTING:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
s_idx
[
i
]
=
pointer
;
if
(
adjacents
[
i
]
==
true
)
continue
;
// Skip routing
#ifdef DEBUG_PRINT
//cout << "LINE #" << (int)(i + 1) << endl;
#endif
// Routing
pointer
=
search
(
s_idx
[
i
],
paths
,
starts
[
i
],
goals
[
i
],
weights
);
}
// ================================
// (Step.1) Initial Routing (END)
// ================================
// Memories for Overlap Check
ap_uint
<
1
>
overlap_checks
[
MAX_CELLS
];
bool
has_overlap
=
false
;
// ================================
// (Step.2) Rip-up Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Rip-up Routing ..."
<<
endl
;
#endif
ROUTING:
for
(
ap_uint
<
16
>
round
=
0
;
round
<
32768
/* = (2048 * 16) */
;
round
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=32768
// Target line
ap_uint
<
LINE_BIT
>
target
=
round
%
line_num
;
ap_uint
<
LINE_BIT
>
next_target
=
target
+
1
;
if
(
next_target
==
line_num
)
next_target
=
0
;
#ifdef DEBUG_PRINT
//cout << "(round " << round << ") LINE #" << (int)(target + 1);
//cout << " -> " << pointer << endl;
#endif
#ifdef DEBUG_PRINT
int
buffer_length
=
pointer
-
s_idx
[
target
];
if
(
max_buffer_length
<
buffer_length
)
{
max_buffer_length
=
buffer_length
;
}
#endif
// Skip routing
if
(
adjacents
[
target
]
==
true
)
{
s_idx
[
target
]
=
pointer
;
continue
;
}
// (Step.2-1) Reset weights of target line
WEIGHT_RESET:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
target
];
j
!=
s_idx
[
next_target
];
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
weights
[
paths
[
j
]]
=
1
;
}
// (Step.2-2) Set weights of non-target lines and terminals
ap_uint
<
8
>
current_round_weight
=
new_weight
(
round
);
WEIGHT_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
weights
[
paths
[
j
]]
=
current_round_weight
;
}
WEIGHT_TERMINAL:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
weights
[
starts
[
i
]]
=
MAX_WEIGHT
;
weights
[
goals
[
i
]]
=
MAX_WEIGHT
;
}
// Reset weight of start terminal of target line (bug avoiding)
// Restore original settings in (*)
weights
[
starts
[
target
]]
=
1
;
// (Step.2-3) Routing
s_idx
[
target
]
=
pointer
;
pointer
=
search
(
s_idx
[
target
],
paths
,
starts
[
target
],
goals
[
target
],
weights
);
// (*)
weights
[
starts
[
target
]]
=
MAX_WEIGHT
;
#ifdef DEBUG_PRINT
bool
ng
=
false
;
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
if
(
weights
[
starts
[
i
]]
!=
255
||
weights
[
goals
[
i
]]
!=
255
)
{
cout
<<
i
<<
" "
;
ng
=
true
;
}
}
if
(
ng
)
{
cout
<<
endl
;
}
#endif
// (Step.2-4) Overlap check
has_overlap
=
false
;
OVERLAP_RESET:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
overlap_checks
[
i
]
=
0
;
}
OVERLAP_CHECK_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
overlap_checks
[
starts
[
i
]]
=
1
;
overlap_checks
[
goals
[
i
]]
=
1
;
}
OVERLAP_CHECK_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
ap_uint
<
CELL_BIT
>
cell_id
=
paths
[
j
];
if
(
overlap_checks
[
cell_id
])
{
has_overlap
=
true
;
break
;
}
overlap_checks
[
cell_id
]
=
1
;
}
#ifdef DEBUG_PRINT
if
(
!
has_overlap
){
cout
<<
"ROUND: "
<<
round
<<
endl
;
}
#endif
if
(
!
has_overlap
)
break
;
// Finish routing?
}
#ifdef DEBUG_PRINT
cout
<<
"MAX PQ LENGTH: "
<<
max_queue_length
<<
endl
;
cout
<<
"MAX SEARCH COUNT: "
<<
max_search_count
<<
endl
;
cout
<<
"MAX BUFFER: "
<<
max_buffer_length
<<
endl
;
#endif
// Not solved
if
(
has_overlap
)
{
*
status
=
1
;
return
false
;
}
// ================================
// (Step.2) Rip-up Routing (END)
// ================================
// ================================
// (Step.3) Output (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Output ..."
<<
endl
;
#endif
// Init: Blank = 0
OUTPUT_INIT:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
boardstr
[
i
]
=
0
;
}
// Line
OUTPUT_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
boardstr
[
starts
[
i
]]
=
(
i
+
1
);
boardstr
[
goals
[
i
]]
=
(
i
+
1
);
ap_uint
<
BUFF_BIT
>
p1
;
// p1: s_idx of target
ap_uint
<
BUFF_BIT
>
p2
;
// p2: s_idx of next target
p1
=
s_idx
[
i
];
if
(
i
==
(
ap_uint
<
LINE_BIT
>
)(
line_num
-
1
))
{
p2
=
s_idx
[
0
];
}
else
{
p2
=
s_idx
[
i
+
1
];
}
if
((
ap_uint
<
BUFF_BIT
>
)(
p2
-
p1
)
>
8192
){
p2
=
pointer
;
}
OUTPUT_LINE_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
p1
;
j
!=
p2
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
boardstr
[
paths
[
j
]]
=
(
i
+
1
);
}
}
// ================================
// (Step.3) Output (END)
// ================================
*
status
=
0
;
return
true
;
}
// ================================ //
// For Routing
// ================================ //
// Max: 71, Min: 0 (7bit)
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Max: 7, Min: 0 (3bit)
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Reference codes:
// http://lethe2211.hatenablog.com/entry/2014/12/30/011030
// http://www.redblobgames.com/pathfinding/a-star/implementation.html
// Need to modify "array partition factor"
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_CELLS
])
{
ap_uint
<
CELL_BIT
>
dist
[
MAX_CELLS
];
ap_uint
<
CELL_BIT
>
prev
[
MAX_CELLS
];
SEARCH_INIT_DIST
:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
dist
[
i
]
=
65535
;
// = (2^16 - 1)
}
// Priority queue (Circular list)
ap_uint
<
PQ_BIT
>
top
=
1
,
bottom
=
0
;
bool
is_empty
=
true
;
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
];
#ifdef DEBUG_PRINT
int
queue_length
=
0
;
int
search_count
=
0
;
#endif
// Point of goal terminal
ap_uint
<
13
>
goal_xy
=
(
ap_uint
<
13
>
)(
goal
>>
BITWIDTH_Z
);
ap_uint
<
7
>
goal_x
=
(
ap_uint
<
7
>
)(
goal_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
goal_y
=
(
ap_uint
<
7
>
)(
goal_xy
-
goal_x
*
MAX_WIDTH
);
ap_uint
<
3
>
goal_z
=
(
ap_uint
<
3
>
)(
goal
&
BITMASK_Z
);
dist
[
start
]
=
0
;
pq_push
(
pq_nodes
,
0
,
start
,
&
top
,
&
bottom
,
&
is_empty
);
// push start terminal
SEARCH_PQ
:
while
(
!
is_empty
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=1000
#pragma HLS LOOP_FLATTEN off
ap_uint
<
16
>
prev_cost
;
ap_uint
<
16
>
src
;
// target cell
pq_pop
(
pq_nodes
,
&
prev_cost
,
&
src
,
&
top
,
&
bottom
,
&
is_empty
);
#ifdef DEBUG_PRINT
search_count
++
;
#endif
// End routing
if
(
src
==
goal
)
break
;
// Target cell
ap_uint
<
16
>
dist_src
=
dist
[
src
];
ap_uint
<
8
>
cost
=
w
[
src
];
// Point of target cell
ap_uint
<
13
>
src_xy
=
(
ap_uint
<
13
>
)(
src
>>
BITWIDTH_Z
);
ap_uint
<
7
>
src_x
=
(
ap_uint
<
7
>
)(
src_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
src_y
=
(
ap_uint
<
7
>
)(
src_xy
-
src_x
*
MAX_WIDTH
);
ap_uint
<
3
>
src_z
=
(
ap_uint
<
3
>
)(
src
&
BITMASK_Z
);
// Search adjacent cells
SEARCH_ADJACENTS
:
for
(
ap_uint
<
3
>
a
=
0
;
a
<
6
;
a
++
)
{
ap_int
<
8
>
dest_x
=
(
ap_int
<
8
>
)
src_x
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
8
>
dest_y
=
(
ap_int
<
8
>
)
src_y
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
5
>
dest_z
=
(
ap_int
<
5
>
)
src_z
;
// Min: -1, Max: 8 (Signed 5bit)
if
(
a
==
0
)
{
dest_x
-=
1
;
}
if
(
a
==
1
)
{
dest_x
+=
1
;
}
if
(
a
==
2
)
{
dest_y
-=
1
;
}
if
(
a
==
3
)
{
dest_y
+=
1
;
}
if
(
a
==
4
)
{
dest_z
-=
1
;
}
if
(
a
==
5
)
{
dest_z
+=
1
;
}
// Inside the board ? //
if
(
0
<=
dest_x
&&
dest_x
<
(
ap_int
<
8
>
)
size_x
&&
0
<=
dest_y
&&
dest_y
<
(
ap_int
<
8
>
)
size_y
&&
0
<=
dest_z
&&
dest_z
<
(
ap_int
<
5
>
)
size_z
)
{
// Adjacent cell
ap_uint
<
16
>
dest
=
(((
ap_uint
<
16
>
)
dest_x
*
MAX_WIDTH
+
(
ap_uint
<
16
>
)
dest_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
16
>
)
dest_z
;
ap_uint
<
16
>
dist_new
=
dist_src
+
cost
;
if
(
dist
[
dest
]
>
dist_new
)
{
dist
[
dest
]
=
dist_new
;
// Update dist
prev
[
dest
]
=
src
;
// Recode previous cell
dist_new
+=
abs_uint7
(
dest_x
,
goal_x
)
+
abs_uint7
(
dest_y
,
goal_y
)
+
abs_uint3
(
dest_z
,
goal_z
);
// A* heuristic
pq_push
(
pq_nodes
,
dist_new
,
dest
,
&
top
,
&
bottom
,
&
is_empty
);
// push adjacent cell
}
}
}
#ifdef DEBUG_PRINT
if
(
queue_length
<
(
bottom
-
top
+
1
))
{
queue_length
=
(
bottom
-
top
+
1
);
}
#endif
}
// Output target path
// Note: Do not include start & goal terminals
ap_uint
<
16
>
t
=
prev
[
goal
];
// Backtracking
ap_uint
<
BUFF_BIT
>
p
=
idx
;
// buffer-idx
SEARCH_BACKTRACK
:
while
(
t
!=
start
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
paths
[
p
]
=
t
;
p
++
;
t
=
prev
[
t
];
}
#ifdef DEBUG_PRINT
if
(
max_queue_length
<
queue_length
)
{
max_queue_length
=
queue_length
;
}
if
(
max_search_count
<
search_count
)
{
max_search_count
=
search_count
;
}
#endif
return
p
;
}
// Queue push (Enqueue)
// Need to modify "trip count" (2)
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
)
{
#pragma HLS INLINE
(
*
bottom
)
++
;
if
((
*
bottom
)
==
(
*
top
)
&&
!
(
*
is_empty
))
{
(
*
bottom
)
--
;
}
// Queue is full -> Last element is automatically removed
// Binary search for circular list
ap_uint
<
PQ_BIT
>
t
=
(
*
top
);
ap_uint
<
PQ_BIT
>
b
=
(
*
bottom
);
ap_uint
<
PQ_BIT
>
h
=
((
ap_uint
<
PQ_BIT
>
)(
b
-
t
)
/
2
)
+
t
;
// Note: "h = (t + b) / 2" causes a bug!
PQ_PUSH_BINARY
:
while
(
t
!=
b
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=13
/** Set!: min=0 max=PQ_BIT **/
if
((
ap_uint
<
16
>
)(
pq_nodes
[
h
]
&
PQ_PRIORITY_MASK
)
>=
priority
)
{
b
=
h
;
}
else
{
t
=
h
+
1
;
}
h
=
((
ap_uint
<
PQ_BIT
>
)(
b
-
t
)
/
2
)
+
t
;
}
// Shifting
ap_uint
<
PQ_BIT
>
shift_count
=
(
*
bottom
)
-
t
;
// # of shifting
ap_uint
<
PQ_BIT
>
p0
=
(
*
bottom
),
p1
;
PQ_PUSH_SHIFT
:
for
(
ap_uint
<
PQ_BIT
>
j
=
0
;
j
<
(
ap_uint
<
PQ_BIT
>
)(
shift_count
);
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=8191
/** Set!: min=0 max=MAX_PQ-1 **/
p0
=
p0
-
1
;
p1
=
p0
+
1
;
pq_nodes
[
p1
]
=
pq_nodes
[
p0
];
}
pq_nodes
[
p0
]
=
((
ap_uint
<
32
>
)
data
<<
PQ_PRIORITY_WIDTH
)
|
(
ap_uint
<
32
>
)
priority
;
*
is_empty
=
false
;
}
// Queue pop (Dequeue)
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
)
{
#pragma HLS INLINE
*
ret_priority
=
(
ap_uint
<
16
>
)(
pq_nodes
[(
*
top
)]
&
PQ_PRIORITY_MASK
);
*
ret_data
=
(
ap_uint
<
16
>
)(
pq_nodes
[(
*
top
)]
>>
PQ_PRIORITY_WIDTH
);
(
*
top
)
++
;
if
(((
*
bottom
)
-
(
*
top
)
+
1
)
==
0
)
{
*
is_empty
=
true
;
}
}
hls_2018/router_01/router.hpp
0 → 100755
View file @
98afc37f
/**
* router.hpp
*
* for Vivado HLS
*/
#ifndef __ROUTER_HPP__
#define __ROUTER_HPP__
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
//#define DEBUG_PRINT // for debug
#ifdef DEBUG_PRINT
using
namespace
std
;
#endif
// Parameters
#define MAX_WIDTH 72 // Max of X, Y
#define BITWIDTH_XY 13
#define BITMASK_XY 65528 // 1111 1111 1111 1000
#define MAX_LAYER 8 // Max of Z
#define BITWIDTH_Z 3
#define BITMASK_Z 7 // 0000 0000 0000 0111
#define MAX_CELLS 41472 // Max #cells (16bit)
#define MAX_LINES 1024 // Max #lines (10bit)
#define MAX_PQ 8192 // Queue size (13bit)
#define MAX_BUFFER 16384 // Line buffer size (14bit)
#define CELL_BIT 16
#define LINE_BIT 10
#define PQ_BIT 13
#define BUFF_BIT 14
#define PQ_PRIORITY_WIDTH 16
#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111
#define PQ_DATA_WIDTH 16
#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000
#define MAX_WEIGHT 255 // Max weight
#define BOARDSTR_SIZE 41472 // Size of I/O
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
);
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
);
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
);
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
);
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_WEIGHT
]);
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
);
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
);
#endif
/* __ROUTER_HPP__ */
hls_2018/router_02/router.cpp
0 → 100755
View file @
98afc37f
/**
* router.cpp
*
* for Vivado HLS
*/
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
#include "./router.hpp"
// Set weight
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
)
{
#pragma HLS INLINE
// K. Terada: y = 1~32 (8bit)
ap_uint
<
8
>
y
;
y
=
((
x
&
255
)
>>
3
)
+
1
;
return
y
;
}
// Global values
static
ap_uint
<
7
>
size_x
;
// X
static
ap_uint
<
7
>
size_y
;
// Y
static
ap_uint
<
4
>
size_z
;
// Z
static
ap_uint
<
LINE_BIT
>
line_num
=
0
;
// #Lines
#ifdef DEBUG_PRINT
int
max_queue_length
;
// Max length of priority queue
int
max_search_count
;
// Max count of queue pop
int
max_buffer_length
;
// Max length of line buffer
#endif
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
)
{
#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS
// status(0:Solved, 1:Not solved)
*
status
=
-
1
;
// For all lines
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
];
// Line buffer
// For each line
// Note: Should not partition completely
bool
adjacents
[
MAX_LINES
];
// Line has adjacent terminals?
ap_uint
<
CELL_BIT
>
starts
[
MAX_LINES
];
// Start list
ap_uint
<
CELL_BIT
>
goals
[
MAX_LINES
];
// Goal list
ap_uint
<
BUFF_BIT
>
s_idx
[
MAX_LINES
];
// Start point on line buffer
ap_uint
<
8
>
weights
[
MAX_CELLS
];
// Weight of each cell
// Note: Should not partition weight array
// since each element will be accessed in "random" order
// ================================
// (Step.0) Initialization (BEGIN)
// ================================
// Note: Loop counter -> need an extra bit (for condition determination)
INIT_WEIGHTS:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
weights
[
i
]
=
1
;
}
/// Parse ///
size_x
=
(
boardstr
[
1
]
-
'0'
)
*
10
+
(
boardstr
[
2
]
-
'0'
);
size_y
=
(
boardstr
[
4
]
-
'0'
)
*
10
+
(
boardstr
[
5
]
-
'0'
);
size_z
=
(
boardstr
[
7
]
-
'0'
);
INIT_BOARDS:
for
(
ap_uint
<
CELL_BIT
>
idx
=
8
;
idx
<
(
ap_uint
<
CELL_BIT
>
)(
BOARDSTR_SIZE
);
idx
+=
11
)
{
// NULL-terminated
if
(
boardstr
[
idx
]
==
0
)
break
;
// Start & Goal of each line
ap_uint
<
7
>
s_x
=
(
boardstr
[
idx
+
1
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
2
]
-
'0'
);
ap_uint
<
7
>
s_y
=
(
boardstr
[
idx
+
3
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
4
]
-
'0'
);
ap_uint
<
3
>
s_z
=
(
boardstr
[
idx
+
5
]
-
'0'
)
-
1
;
ap_uint
<
7
>
g_x
=
(
boardstr
[
idx
+
6
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
7
]
-
'0'
);
ap_uint
<
7
>
g_y
=
(
boardstr
[
idx
+
8
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
9
]
-
'0'
);
ap_uint
<
3
>
g_z
=
(
boardstr
[
idx
+
10
]
-
'0'
)
-
1
;
ap_uint
<
CELL_BIT
>
start_id
=
(((
ap_uint
<
CELL_BIT
>
)
s_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
s_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
s_z
;
ap_uint
<
CELL_BIT
>
goal_id
=
(((
ap_uint
<
CELL_BIT
>
)
g_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
g_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
g_z
;
starts
[
line_num
]
=
start_id
;
goals
[
line_num
]
=
goal_id
;
weights
[
start_id
]
=
MAX_WEIGHT
;
weights
[
goal_id
]
=
MAX_WEIGHT
;
// Line has adjacent terminals?
adjacents
[
line_num
]
=
false
;
ap_int
<
8
>
dx
=
(
ap_int
<
8
>
)
g_x
-
(
ap_int
<
8
>
)
s_x
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
8
>
dy
=
(
ap_int
<
8
>
)
g_y
-
(
ap_int
<
8
>
)
s_y
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
4
>
dz
=
(
ap_int
<
4
>
)
g_z
-
(
ap_int
<
4
>
)
s_z
;
// Min: -7, Max: 7 (Signed 4bit)
if
((
dx
==
0
&&
dy
==
0
&&
(
dz
==
1
||
dz
==
-
1
))
||
(
dx
==
0
&&
(
dy
==
1
||
dy
==
-
1
)
&&
dz
==
0
)
||
((
dx
==
1
||
dx
==
-
1
)
&&
dy
==
0
&&
dz
==
0
))
{
adjacents
[
line_num
]
=
true
;
}
line_num
++
;
}
// ================================
// (Step.0) Initialization (END)
// ================================
#ifdef DEBUG_PRINT
max_queue_length
=
0
;
max_search_count
=
0
;
max_buffer_length
=
0
;
#endif
ap_uint
<
BUFF_BIT
>
pointer
=
0
;
// Pointer for line buffer
// ================================
// (Step.1) Initial Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Initial Routing ..."
<<
endl
;
#endif
FIRST_ROUTING:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
s_idx
[
i
]
=
pointer
;
if
(
adjacents
[
i
]
==
true
)
continue
;
// Skip routing
#ifdef DEBUG_PRINT
//cout << "LINE #" << (int)(i + 1) << endl;
#endif
// Routing
pointer
=
search
(
s_idx
[
i
],
paths
,
starts
[
i
],
goals
[
i
],
weights
);
}
// ================================
// (Step.1) Initial Routing (END)
// ================================
// Memories for Overlap Check
ap_uint
<
1
>
overlap_checks
[
MAX_CELLS
];
bool
has_overlap
=
false
;
// ================================
// (Step.2) Rip-up Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Rip-up Routing ..."
<<
endl
;
#endif
ROUTING:
for
(
ap_uint
<
16
>
round
=
0
;
round
<
32768
/* = (2048 * 16) */
;
round
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=32768
// Target line
ap_uint
<
LINE_BIT
>
target
=
round
%
line_num
;
ap_uint
<
LINE_BIT
>
next_target
=
target
+
1
;
if
(
next_target
==
line_num
)
next_target
=
0
;
#ifdef DEBUG_PRINT
//cout << "(round " << round << ") LINE #" << (int)(target + 1);
//cout << " -> " << pointer << endl;
#endif
#ifdef DEBUG_PRINT
int
buffer_length
=
pointer
-
s_idx
[
target
];
if
(
max_buffer_length
<
buffer_length
)
{
max_buffer_length
=
buffer_length
;
}
#endif
// Skip routing
if
(
adjacents
[
target
]
==
true
)
{
s_idx
[
target
]
=
pointer
;
continue
;
}
// (Step.2-1) Reset weights of target line
WEIGHT_RESET:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
target
];
j
!=
s_idx
[
next_target
];
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
weights
[
paths
[
j
]]
=
1
;
}
// (Step.2-2) Set weights of non-target lines and terminals
ap_uint
<
8
>
current_round_weight
=
new_weight
(
round
);
WEIGHT_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
weights
[
paths
[
j
]]
=
current_round_weight
;
}
WEIGHT_TERMINAL:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
weights
[
starts
[
i
]]
=
MAX_WEIGHT
;
weights
[
goals
[
i
]]
=
MAX_WEIGHT
;
}
// Reset weight of start terminal of target line (bug avoiding)
// Restore original settings in (*)
weights
[
starts
[
target
]]
=
1
;
// (Step.2-3) Routing
s_idx
[
target
]
=
pointer
;
pointer
=
search
(
s_idx
[
target
],
paths
,
starts
[
target
],
goals
[
target
],
weights
);
// (*)
weights
[
starts
[
target
]]
=
MAX_WEIGHT
;
#ifdef DEBUG_PRINT
bool
ng
=
false
;
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
if
(
weights
[
starts
[
i
]]
!=
255
||
weights
[
goals
[
i
]]
!=
255
)
{
cout
<<
i
<<
" "
;
ng
=
true
;
}
}
if
(
ng
)
{
cout
<<
endl
;
}
#endif
// (Step.2-4) Overlap check
has_overlap
=
false
;
OVERLAP_RESET:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
overlap_checks
[
i
]
=
0
;
}
OVERLAP_CHECK_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
overlap_checks
[
starts
[
i
]]
=
1
;
overlap_checks
[
goals
[
i
]]
=
1
;
}
OVERLAP_CHECK_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
ap_uint
<
CELL_BIT
>
cell_id
=
paths
[
j
];
if
(
overlap_checks
[
cell_id
])
{
has_overlap
=
true
;
break
;
}
overlap_checks
[
cell_id
]
=
1
;
}
#ifdef DEBUG_PRINT
if
(
!
has_overlap
){
cout
<<
"ROUND: "
<<
round
<<
endl
;
}
#endif
if
(
!
has_overlap
)
break
;
// Finish routing?
}
#ifdef DEBUG_PRINT
cout
<<
"MAX PQ LENGTH: "
<<
max_queue_length
<<
endl
;
cout
<<
"MAX SEARCH COUNT: "
<<
max_search_count
<<
endl
;
cout
<<
"MAX BUFFER: "
<<
max_buffer_length
<<
endl
;
#endif
// Not solved
if
(
has_overlap
)
{
*
status
=
1
;
return
false
;
}
// ================================
// (Step.2) Rip-up Routing (END)
// ================================
// ================================
// (Step.3) Output (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Output ..."
<<
endl
;
#endif
// Init: Blank = 0
OUTPUT_INIT:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
boardstr
[
i
]
=
0
;
}
// Line
OUTPUT_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
boardstr
[
starts
[
i
]]
=
(
i
+
1
);
boardstr
[
goals
[
i
]]
=
(
i
+
1
);
ap_uint
<
BUFF_BIT
>
p1
;
// p1: s_idx of target
ap_uint
<
BUFF_BIT
>
p2
;
// p2: s_idx of next target
p1
=
s_idx
[
i
];
if
(
i
==
(
ap_uint
<
LINE_BIT
>
)(
line_num
-
1
))
{
p2
=
s_idx
[
0
];
}
else
{
p2
=
s_idx
[
i
+
1
];
}
if
((
ap_uint
<
BUFF_BIT
>
)(
p2
-
p1
)
>
8192
){
p2
=
pointer
;
}
OUTPUT_LINE_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
p1
;
j
!=
p2
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
boardstr
[
paths
[
j
]]
=
(
i
+
1
);
}
}
// ================================
// (Step.3) Output (END)
// ================================
*
status
=
0
;
return
true
;
}
// ================================ //
// For Routing
// ================================ //
// Max: 71, Min: 0 (7bit)
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Max: 7, Min: 0 (3bit)
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Reference codes:
// http://lethe2211.hatenablog.com/entry/2014/12/30/011030
// http://www.redblobgames.com/pathfinding/a-star/implementation.html
// Need to modify "array partition factor"
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_CELLS
])
{
ap_uint
<
CELL_BIT
>
dist
[
MAX_CELLS
];
ap_uint
<
CELL_BIT
>
prev
[
MAX_CELLS
];
SEARCH_INIT_DIST
:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
dist
[
i
]
=
65535
;
// = (2^16 - 1)
}
// Priority queue (Circular list)
ap_uint
<
PQ_BIT
>
top
=
1
,
bottom
=
0
;
bool
is_empty
=
true
;
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
];
#pragma HLS ARRAY_PARTITION variable=pq_nodes cyclic factor=32 dim=1
/** Set!: factor=PP **/
#ifdef DEBUG_PRINT
int
queue_length
=
0
;
int
search_count
=
0
;
#endif
// Point of goal terminal
ap_uint
<
13
>
goal_xy
=
(
ap_uint
<
13
>
)(
goal
>>
BITWIDTH_Z
);
ap_uint
<
7
>
goal_x
=
(
ap_uint
<
7
>
)(
goal_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
goal_y
=
(
ap_uint
<
7
>
)(
goal_xy
-
goal_x
*
MAX_WIDTH
);
ap_uint
<
3
>
goal_z
=
(
ap_uint
<
3
>
)(
goal
&
BITMASK_Z
);
dist
[
start
]
=
0
;
pq_push
(
pq_nodes
,
0
,
start
,
&
top
,
&
bottom
,
&
is_empty
);
// push start terminal
SEARCH_PQ
:
while
(
!
is_empty
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=1000
#pragma HLS LOOP_FLATTEN off
ap_uint
<
16
>
prev_cost
;
ap_uint
<
16
>
src
;
// target cell
pq_pop
(
pq_nodes
,
&
prev_cost
,
&
src
,
&
top
,
&
bottom
,
&
is_empty
);
#ifdef DEBUG_PRINT
search_count
++
;
#endif
// End routing
if
(
src
==
goal
)
break
;
// Target cell
ap_uint
<
16
>
dist_src
=
dist
[
src
];
ap_uint
<
8
>
cost
=
w
[
src
];
// Point of target cell
ap_uint
<
13
>
src_xy
=
(
ap_uint
<
13
>
)(
src
>>
BITWIDTH_Z
);
ap_uint
<
7
>
src_x
=
(
ap_uint
<
7
>
)(
src_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
src_y
=
(
ap_uint
<
7
>
)(
src_xy
-
src_x
*
MAX_WIDTH
);
ap_uint
<
3
>
src_z
=
(
ap_uint
<
3
>
)(
src
&
BITMASK_Z
);
// Search adjacent cells
SEARCH_ADJACENTS
:
for
(
ap_uint
<
3
>
a
=
0
;
a
<
6
;
a
++
)
{
ap_int
<
8
>
dest_x
=
(
ap_int
<
8
>
)
src_x
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
8
>
dest_y
=
(
ap_int
<
8
>
)
src_y
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
5
>
dest_z
=
(
ap_int
<
5
>
)
src_z
;
// Min: -1, Max: 8 (Signed 5bit)
if
(
a
==
0
)
{
dest_x
-=
1
;
}
if
(
a
==
1
)
{
dest_x
+=
1
;
}
if
(
a
==
2
)
{
dest_y
-=
1
;
}
if
(
a
==
3
)
{
dest_y
+=
1
;
}
if
(
a
==
4
)
{
dest_z
-=
1
;
}
if
(
a
==
5
)
{
dest_z
+=
1
;
}
// Inside the board ? //
if
(
0
<=
dest_x
&&
dest_x
<
(
ap_int
<
8
>
)
size_x
&&
0
<=
dest_y
&&
dest_y
<
(
ap_int
<
8
>
)
size_y
&&
0
<=
dest_z
&&
dest_z
<
(
ap_int
<
5
>
)
size_z
)
{
// Adjacent cell
ap_uint
<
16
>
dest
=
(((
ap_uint
<
16
>
)
dest_x
*
MAX_WIDTH
+
(
ap_uint
<
16
>
)
dest_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
16
>
)
dest_z
;
ap_uint
<
16
>
dist_new
=
dist_src
+
cost
;
if
(
dist
[
dest
]
>
dist_new
)
{
dist
[
dest
]
=
dist_new
;
// Update dist
prev
[
dest
]
=
src
;
// Recode previous cell
dist_new
+=
abs_uint7
(
dest_x
,
goal_x
)
+
abs_uint7
(
dest_y
,
goal_y
)
+
abs_uint3
(
dest_z
,
goal_z
);
// A* heuristic
pq_push
(
pq_nodes
,
dist_new
,
dest
,
&
top
,
&
bottom
,
&
is_empty
);
// push adjacent cell
}
}
}
#ifdef DEBUG_PRINT
if
(
queue_length
<
(
bottom
-
top
+
1
))
{
queue_length
=
(
bottom
-
top
+
1
);
}
#endif
}
// Output target path
// Note: Do not include start & goal terminals
ap_uint
<
16
>
t
=
prev
[
goal
];
// Backtracking
ap_uint
<
BUFF_BIT
>
p
=
idx
;
// buffer-idx
SEARCH_BACKTRACK
:
while
(
t
!=
start
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
paths
[
p
]
=
t
;
p
++
;
t
=
prev
[
t
];
}
#ifdef DEBUG_PRINT
if
(
max_queue_length
<
queue_length
)
{
max_queue_length
=
queue_length
;
}
if
(
max_search_count
<
search_count
)
{
max_search_count
=
search_count
;
}
#endif
return
p
;
}
// Queue push (Enqueue)
// Need to modify "trip count" (5)
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
)
{
#pragma HLS INLINE
(
*
bottom
)
++
;
if
((
*
bottom
)
==
(
*
top
)
&&
!
(
*
is_empty
))
{
(
*
bottom
)
--
;
}
// Queue is full -> Last element is automatically removed
// Binary search for circular list
ap_uint
<
PQ_BIT
>
t
=
(
*
top
);
ap_uint
<
PQ_BIT
>
b
=
(
*
bottom
);
ap_uint
<
PQ_BIT
>
h
=
((
ap_uint
<
PQ_BIT
>
)(
b
-
t
)
/
2
)
+
t
;
// Note: "h = (t + b) / 2" causes a bug!
PQ_PUSH_BINARY
:
while
(
t
!=
b
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=13
/** Set!: min=0 max=PQ_BIT **/
if
((
ap_uint
<
16
>
)(
pq_nodes
[
h
]
&
PQ_PRIORITY_MASK
)
>=
priority
)
{
b
=
h
;
}
else
{
t
=
h
+
1
;
}
h
=
((
ap_uint
<
PQ_BIT
>
)(
b
-
t
)
/
2
)
+
t
;
}
// Parallel shifting (PP parallel)
ap_uint
<
PQ_BIT
>
shift_count
=
(
*
bottom
)
-
t
;
// # of shifting
ap_uint
<
PQ_BIT
-
LOG_PP
>
bottom_H
=
(
*
bottom
)
/
PP
;
// High-order bits of (*bottom)
ap_uint
<
LOG_PP
>
bottom_L
=
(
*
bottom
)
%
PP
;
// Low-order bits of (*bottom)
ap_uint
<
PQ_BIT
>
p0
=
(
*
bottom
),
p1
;
if
(
shift_count
>
(
ap_uint
<
PQ_BIT
>
)(
bottom_L
))
{
// bottom_L bit shift
PQ_PUSH_SHIFT_H_1
:
for
(
ap_uint
<
LOG_PP
>
k
=
0
;
k
<
(
ap_uint
<
LOG_PP
>
)(
bottom_L
);
k
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=31
/** Set!: min=0 max=PP-1 **/
p0
=
p0
-
1
;
// source
p1
=
p0
+
1
;
// destination
pq_nodes
[
p1
]
=
pq_nodes
[
p0
];
}
shift_count
=
shift_count
-
bottom_L
;
}
else
{
// shift_count bit shift
PQ_PUSH_SHIFT_H_2
:
for
(
ap_uint
<
LOG_PP
>
k
=
0
;
k
<
(
ap_uint
<
LOG_PP
>
)(
shift_count
);
k
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=31
/** Set!: min=0 max=PP-1 **/
p0
=
p0
-
1
;
// source
p1
=
p0
+
1
;
// destination
pq_nodes
[
p1
]
=
pq_nodes
[
p0
];
}
pq_nodes
[
p0
]
=
((
ap_uint
<
32
>
)
data
<<
PQ_PRIORITY_WIDTH
)
|
(
ap_uint
<
32
>
)
priority
;
*
is_empty
=
false
;
return
;
}
ap_uint
<
PQ_BIT
-
LOG_PP
>
shift_count_i
=
shift_count
>>
LOG_PP
;
ap_uint
<
LOG_PP
>
shift_count_j
=
shift_count
-
(
shift_count_i
<<
LOG_PP
);
// Note: Bit adjustment is necessary
// when the values of PP & LOG_PP have been changed.
ap_uint
<
PQ_BIT
>
p
[
PP
+
1
];
#pragma HLS ARRAY_PARTITION variable=p complete dim=1
p
[
0
]
=
bottom_H
*
PP
;
ap_uint
<
32
>
tmp
[
PP
];
#pragma HLS ARRAY_PARTITION variable=tmp complete dim=1
PQ_PUSH_SHIFT_I
:
for
(
ap_uint
<
PQ_BIT
-
LOG_PP
>
i
=
0
;
i
<
(
ap_uint
<
PQ_BIT
-
LOG_PP
>
)(
shift_count_i
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=255
/** Set!: min=0 max=(MAX_PQ)/(PP)-1 **/
p
[
0
]
=
p
[
0
]
-
PP
;
for
(
ap_uint
<
LOG_PP
+
1
>
k
=
1
;
k
<=
(
ap_uint
<
LOG_PP
+
1
>
)(
PP
);
k
++
)
{
#pragma HLS UNROLL
p
[
k
]
=
p
[
0
]
+
k
;
}
for
(
ap_uint
<
LOG_PP
+
1
>
k
=
1
;
k
<=
(
ap_uint
<
LOG_PP
+
1
>
)(
PP
);
k
++
)
{
#pragma HLS UNROLL
tmp
[
k
-
1
]
=
pq_nodes
[
p
[
k
-
1
]];
}
for
(
ap_uint
<
LOG_PP
+
1
>
k
=
1
;
k
<=
(
ap_uint
<
LOG_PP
+
1
>
)(
PP
);
k
++
)
{
#pragma HLS UNROLL
pq_nodes
[
p
[
k
]]
=
tmp
[
k
-
1
];
}
}
PQ_PUSH_SHIFT_J
:
for
(
ap_uint
<
LOG_PP
>
j
=
0
;
j
<
(
ap_uint
<
LOG_PP
>
)(
shift_count_j
);
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=31
/** Set!: min=0 max=PP-1 **/
p
[
0
]
=
p
[
0
]
-
1
;
p
[
1
]
=
p
[
0
]
+
1
;
pq_nodes
[
p
[
1
]]
=
pq_nodes
[
p
[
0
]];
}
pq_nodes
[
p
[
0
]]
=
((
ap_uint
<
32
>
)
data
<<
PQ_PRIORITY_WIDTH
)
|
(
ap_uint
<
32
>
)
priority
;
*
is_empty
=
false
;
}
// Queue pop (Dequeue)
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
)
{
#pragma HLS INLINE
*
ret_priority
=
(
ap_uint
<
16
>
)(
pq_nodes
[(
*
top
)]
&
PQ_PRIORITY_MASK
);
*
ret_data
=
(
ap_uint
<
16
>
)(
pq_nodes
[(
*
top
)]
>>
PQ_PRIORITY_WIDTH
);
(
*
top
)
++
;
if
(((
*
bottom
)
-
(
*
top
)
+
1
)
==
0
)
{
*
is_empty
=
true
;
}
}
hls_2018/router_02/router.hpp
0 → 100755
View file @
98afc37f
/**
* router.hpp
*
* for Vivado HLS
*/
#ifndef __ROUTER_HPP__
#define __ROUTER_HPP__
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
//#define DEBUG_PRINT // for debug
#ifdef DEBUG_PRINT
using
namespace
std
;
#endif
// Parameters
#define MAX_WIDTH 72 // Max of X, Y
#define BITWIDTH_XY 13
#define BITMASK_XY 65528 // 1111 1111 1111 1000
#define MAX_LAYER 8 // Max of Z
#define BITWIDTH_Z 3
#define BITMASK_Z 7 // 0000 0000 0000 0111
#define MAX_CELLS 41472 // Max #cells (16bit)
#define MAX_LINES 1024 // Max #lines (10bit)
#define MAX_PQ 8192 // Queue size (13bit)
#define MAX_BUFFER 16384 // Line buffer size (14bit)
#define CELL_BIT 16
#define LINE_BIT 10
#define PQ_BIT 13
#define BUFF_BIT 14
#define PQ_PRIORITY_WIDTH 16
#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111
#define PQ_DATA_WIDTH 16
#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000
#define MAX_WEIGHT 255 // Max weight
#define BOARDSTR_SIZE 41472 // Size of I/O
#define PP 32 // Parameter for Parallel shifting in queue push
#define LOG_PP 5 // LOG_PP is log2(PP)
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
);
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
);
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
);
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
);
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_WEIGHT
]);
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
);
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
top
,
ap_uint
<
PQ_BIT
>
*
bottom
,
bool
*
is_empty
);
#endif
/* __ROUTER_HPP__ */
hls_2018/router_03/router.cpp
0 → 100755
View file @
98afc37f
/**
* router.cpp
*
* for Vivado HLS
*/
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
#include "./router.hpp"
// Set weight
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
)
{
#pragma HLS INLINE
// K. Terada: y = 1~32 (8bit)
ap_uint
<
8
>
y
;
y
=
((
x
&
255
)
>>
3
)
+
1
;
return
y
;
}
// Global values
static
ap_uint
<
7
>
size_x
;
// X
static
ap_uint
<
7
>
size_y
;
// Y
static
ap_uint
<
4
>
size_z
;
// Z
static
ap_uint
<
LINE_BIT
>
line_num
=
0
;
// #Lines
#ifdef DEBUG_PRINT
int
max_queue_length
;
// Max length of priority queue
int
max_search_count
;
// Max count of queue pop
int
max_buffer_length
;
// Max length of line buffer
#endif
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
)
{
#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS
// status(0:Solved, 1:Not solved)
*
status
=
-
1
;
// For all lines
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
];
// Line buffer
// For each line
// Note: Should not partition completely
bool
adjacents
[
MAX_LINES
];
// Line has adjacent terminals?
ap_uint
<
CELL_BIT
>
starts
[
MAX_LINES
];
// Start list
ap_uint
<
CELL_BIT
>
goals
[
MAX_LINES
];
// Goal list
ap_uint
<
BUFF_BIT
>
s_idx
[
MAX_LINES
];
// Start point on line buffer
ap_uint
<
8
>
weights
[
MAX_CELLS
];
// Weight of each cell
// Note: Should not partition weight array
// since each element will be accessed in "random" order
// ================================
// (Step.0) Initialization (BEGIN)
// ================================
// Note: Loop counter -> need an extra bit (for condition determination)
INIT_WEIGHTS:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
weights
[
i
]
=
1
;
}
/// Parse ///
size_x
=
(
boardstr
[
1
]
-
'0'
)
*
10
+
(
boardstr
[
2
]
-
'0'
);
size_y
=
(
boardstr
[
4
]
-
'0'
)
*
10
+
(
boardstr
[
5
]
-
'0'
);
size_z
=
(
boardstr
[
7
]
-
'0'
);
INIT_BOARDS:
for
(
ap_uint
<
CELL_BIT
>
idx
=
8
;
idx
<
(
ap_uint
<
CELL_BIT
>
)(
BOARDSTR_SIZE
);
idx
+=
11
)
{
// NULL-terminated
if
(
boardstr
[
idx
]
==
0
)
break
;
// Start & Goal of each line
ap_uint
<
7
>
s_x
=
(
boardstr
[
idx
+
1
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
2
]
-
'0'
);
ap_uint
<
7
>
s_y
=
(
boardstr
[
idx
+
3
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
4
]
-
'0'
);
ap_uint
<
3
>
s_z
=
(
boardstr
[
idx
+
5
]
-
'0'
)
-
1
;
ap_uint
<
7
>
g_x
=
(
boardstr
[
idx
+
6
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
7
]
-
'0'
);
ap_uint
<
7
>
g_y
=
(
boardstr
[
idx
+
8
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
9
]
-
'0'
);
ap_uint
<
3
>
g_z
=
(
boardstr
[
idx
+
10
]
-
'0'
)
-
1
;
ap_uint
<
CELL_BIT
>
start_id
=
(((
ap_uint
<
CELL_BIT
>
)
s_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
s_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
s_z
;
ap_uint
<
CELL_BIT
>
goal_id
=
(((
ap_uint
<
CELL_BIT
>
)
g_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
g_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
g_z
;
starts
[
line_num
]
=
start_id
;
goals
[
line_num
]
=
goal_id
;
weights
[
start_id
]
=
MAX_WEIGHT
;
weights
[
goal_id
]
=
MAX_WEIGHT
;
// Line has adjacent terminals?
adjacents
[
line_num
]
=
false
;
ap_int
<
8
>
dx
=
(
ap_int
<
8
>
)
g_x
-
(
ap_int
<
8
>
)
s_x
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
8
>
dy
=
(
ap_int
<
8
>
)
g_y
-
(
ap_int
<
8
>
)
s_y
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
4
>
dz
=
(
ap_int
<
4
>
)
g_z
-
(
ap_int
<
4
>
)
s_z
;
// Min: -7, Max: 7 (Signed 4bit)
if
((
dx
==
0
&&
dy
==
0
&&
(
dz
==
1
||
dz
==
-
1
))
||
(
dx
==
0
&&
(
dy
==
1
||
dy
==
-
1
)
&&
dz
==
0
)
||
((
dx
==
1
||
dx
==
-
1
)
&&
dy
==
0
&&
dz
==
0
))
{
adjacents
[
line_num
]
=
true
;
}
line_num
++
;
}
// ================================
// (Step.0) Initialization (END)
// ================================
#ifdef DEBUG_PRINT
max_queue_length
=
0
;
max_search_count
=
0
;
max_buffer_length
=
0
;
#endif
ap_uint
<
BUFF_BIT
>
pointer
=
0
;
// Pointer for line buffer
// ================================
// (Step.1) Initial Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Initial Routing ..."
<<
endl
;
#endif
FIRST_ROUTING:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
s_idx
[
i
]
=
pointer
;
if
(
adjacents
[
i
]
==
true
)
continue
;
// Skip routing
#ifdef DEBUG_PRINT
//cout << "LINE #" << (int)(i + 1) << endl;
#endif
// Routing
pointer
=
search
(
s_idx
[
i
],
paths
,
starts
[
i
],
goals
[
i
],
weights
);
}
// ================================
// (Step.1) Initial Routing (END)
// ================================
// Memories for Overlap Check
ap_uint
<
1
>
overlap_checks
[
MAX_CELLS
];
bool
has_overlap
=
false
;
// ================================
// (Step.2) Rip-up Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Rip-up Routing ..."
<<
endl
;
#endif
ROUTING:
for
(
ap_uint
<
16
>
round
=
0
;
round
<
32768
/* = (2048 * 16) */
;
round
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=32768
// Target line
ap_uint
<
LINE_BIT
>
target
=
round
%
line_num
;
ap_uint
<
LINE_BIT
>
next_target
=
target
+
1
;
if
(
next_target
==
line_num
)
next_target
=
0
;
#ifdef DEBUG_PRINT
cout
<<
"(round "
<<
round
<<
") LINE #"
<<
(
int
)(
target
+
1
);
cout
<<
" -> "
<<
pointer
<<
endl
;
#endif
#ifdef DEBUG_PRINT
int
buffer_length
=
pointer
-
s_idx
[
target
];
if
(
max_buffer_length
<
buffer_length
)
{
max_buffer_length
=
buffer_length
;
}
#endif
// Skip routing
if
(
adjacents
[
target
]
==
true
)
{
s_idx
[
target
]
=
pointer
;
continue
;
}
// (Step.2-1) Reset weights of target line
WEIGHT_RESET:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
target
];
j
!=
s_idx
[
next_target
];
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
weights
[
paths
[
j
]]
=
1
;
}
// (Step.2-2) Set weights of non-target lines and terminals
ap_uint
<
8
>
current_round_weight
=
new_weight
(
round
);
WEIGHT_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
weights
[
paths
[
j
]]
=
current_round_weight
;
}
WEIGHT_TERMINAL:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
weights
[
starts
[
i
]]
=
MAX_WEIGHT
;
weights
[
goals
[
i
]]
=
MAX_WEIGHT
;
}
// Reset weight of start terminal of target line (bug avoiding)
// Restore original settings in (*)
weights
[
starts
[
target
]]
=
1
;
// (Step.2-3) Routing
s_idx
[
target
]
=
pointer
;
pointer
=
search
(
s_idx
[
target
],
paths
,
starts
[
target
],
goals
[
target
],
weights
);
// (*)
weights
[
starts
[
target
]]
=
MAX_WEIGHT
;
#ifdef DEBUG_PRINT
bool
ng
=
false
;
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
if
(
weights
[
starts
[
i
]]
!=
255
||
weights
[
goals
[
i
]]
!=
255
)
{
cout
<<
i
<<
" "
;
ng
=
true
;
}
}
if
(
ng
)
{
cout
<<
endl
;
}
#endif
// (Step.2-4) Overlap check
has_overlap
=
false
;
OVERLAP_RESET:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
overlap_checks
[
i
]
=
0
;
}
OVERLAP_CHECK_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
overlap_checks
[
starts
[
i
]]
=
1
;
overlap_checks
[
goals
[
i
]]
=
1
;
}
OVERLAP_CHECK_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
ap_uint
<
CELL_BIT
>
cell_id
=
paths
[
j
];
if
(
overlap_checks
[
cell_id
])
{
has_overlap
=
true
;
break
;
}
overlap_checks
[
cell_id
]
=
1
;
}
#ifdef DEBUG_PRINT
if
(
!
has_overlap
){
cout
<<
"ROUND: "
<<
round
<<
endl
;
}
#endif
if
(
!
has_overlap
)
break
;
// Finish routing?
}
#ifdef DEBUG_PRINT
cout
<<
"MAX PQ LENGTH: "
<<
max_queue_length
<<
endl
;
cout
<<
"MAX SEARCH COUNT: "
<<
max_search_count
<<
endl
;
cout
<<
"MAX BUFFER: "
<<
max_buffer_length
<<
endl
;
#endif
// Not solved
if
(
has_overlap
)
{
*
status
=
1
;
return
false
;
}
// ================================
// (Step.2) Rip-up Routing (END)
// ================================
// ================================
// (Step.3) Output (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Output ..."
<<
endl
;
#endif
// Init: Blank = 0
OUTPUT_INIT:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
boardstr
[
i
]
=
0
;
}
// Line
OUTPUT_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
boardstr
[
starts
[
i
]]
=
(
i
+
1
);
boardstr
[
goals
[
i
]]
=
(
i
+
1
);
ap_uint
<
BUFF_BIT
>
p1
;
// p1: s_idx of target
ap_uint
<
BUFF_BIT
>
p2
;
// p2: s_idx of next target
p1
=
s_idx
[
i
];
if
(
i
==
(
ap_uint
<
LINE_BIT
>
)(
line_num
-
1
))
{
p2
=
s_idx
[
0
];
}
else
{
p2
=
s_idx
[
i
+
1
];
}
if
((
ap_uint
<
BUFF_BIT
>
)(
p2
-
p1
)
>
8192
){
p2
=
pointer
;
}
OUTPUT_LINE_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
p1
;
j
!=
p2
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
boardstr
[
paths
[
j
]]
=
(
i
+
1
);
}
}
// ================================
// (Step.3) Output (END)
// ================================
*
status
=
0
;
return
true
;
}
// ================================ //
// For Routing
// ================================ //
// Max: 71, Min: 0 (7bit)
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Max: 7, Min: 0 (3bit)
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Reference codes:
// http://lethe2211.hatenablog.com/entry/2014/12/30/011030
// http://www.redblobgames.com/pathfinding/a-star/implementation.html
// Need to modify "array partition factor"
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_CELLS
])
{
ap_uint
<
CELL_BIT
>
dist
[
MAX_CELLS
];
ap_uint
<
CELL_BIT
>
prev
[
MAX_CELLS
];
SEARCH_INIT_DIST
:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
dist
[
i
]
=
65535
;
// = (2^16 - 1)
}
// Priority queue (Heap)
ap_uint
<
PQ_BIT
>
pq_len
=
0
;
bool
is_empty
=
true
;
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
];
#ifdef DEBUG_PRINT
int
queue_length
=
0
;
int
search_count
=
0
;
#endif
// Point of goal terminal
ap_uint
<
13
>
goal_xy
=
(
ap_uint
<
13
>
)(
goal
>>
BITWIDTH_Z
);
ap_uint
<
7
>
goal_x
=
(
ap_uint
<
7
>
)(
goal_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
goal_y
=
(
ap_uint
<
7
>
)(
goal_xy
-
goal_x
*
MAX_WIDTH
);
ap_uint
<
3
>
goal_z
=
(
ap_uint
<
3
>
)(
goal
&
BITMASK_Z
);
dist
[
start
]
=
0
;
pq_push
(
pq_nodes
,
0
,
start
,
&
pq_len
,
&
is_empty
);
// push start terminal
SEARCH_PQ
:
while
(
!
is_empty
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=1000
#pragma HLS LOOP_FLATTEN off
ap_uint
<
16
>
prev_cost
;
ap_uint
<
16
>
src
;
// target cell
pq_pop
(
pq_nodes
,
&
prev_cost
,
&
src
,
&
pq_len
,
&
is_empty
);
#ifdef DEBUG_PRINT
search_count
++
;
#endif
// End routing
if
(
src
==
goal
)
break
;
// Target cell
ap_uint
<
16
>
dist_src
=
dist
[
src
];
ap_uint
<
8
>
cost
=
w
[
src
];
// Point of target cell
ap_uint
<
13
>
src_xy
=
(
ap_uint
<
13
>
)(
src
>>
BITWIDTH_Z
);
ap_uint
<
7
>
src_x
=
(
ap_uint
<
7
>
)(
src_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
src_y
=
(
ap_uint
<
7
>
)(
src_xy
-
src_x
*
MAX_WIDTH
);
ap_uint
<
3
>
src_z
=
(
ap_uint
<
3
>
)(
src
&
BITMASK_Z
);
// Search adjacent cells
SEARCH_ADJACENTS
:
for
(
ap_uint
<
3
>
a
=
0
;
a
<
6
;
a
++
)
{
ap_int
<
8
>
dest_x
=
(
ap_int
<
8
>
)
src_x
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
8
>
dest_y
=
(
ap_int
<
8
>
)
src_y
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
5
>
dest_z
=
(
ap_int
<
5
>
)
src_z
;
// Min: -1, Max: 8 (Signed 5bit)
if
(
a
==
0
)
{
dest_x
-=
1
;
}
if
(
a
==
1
)
{
dest_x
+=
1
;
}
if
(
a
==
2
)
{
dest_y
-=
1
;
}
if
(
a
==
3
)
{
dest_y
+=
1
;
}
if
(
a
==
4
)
{
dest_z
-=
1
;
}
if
(
a
==
5
)
{
dest_z
+=
1
;
}
// Inside the board ? //
if
(
0
<=
dest_x
&&
dest_x
<
(
ap_int
<
8
>
)
size_x
&&
0
<=
dest_y
&&
dest_y
<
(
ap_int
<
8
>
)
size_y
&&
0
<=
dest_z
&&
dest_z
<
(
ap_int
<
5
>
)
size_z
)
{
// Adjacent cell
ap_uint
<
16
>
dest
=
(((
ap_uint
<
16
>
)
dest_x
*
MAX_WIDTH
+
(
ap_uint
<
16
>
)
dest_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
16
>
)
dest_z
;
ap_uint
<
16
>
dist_new
=
dist_src
+
cost
;
if
(
dist
[
dest
]
>
dist_new
)
{
dist
[
dest
]
=
dist_new
;
// Update dist
prev
[
dest
]
=
src
;
// Recode previous cell
dist_new
+=
abs_uint7
(
dest_x
,
goal_x
)
+
abs_uint7
(
dest_y
,
goal_y
)
+
abs_uint3
(
dest_z
,
goal_z
);
// A* heuristic
pq_push
(
pq_nodes
,
dist_new
,
dest
,
&
pq_len
,
&
is_empty
);
// push adjacent cell
}
}
}
#ifdef DEBUG_PRINT
if
(
queue_length
<
pq_len
)
{
queue_length
=
pq_len
;
}
#endif
}
// Output target path
// Note: Do not include start & goal terminals
ap_uint
<
16
>
t
=
prev
[
goal
];
// Backtracking
ap_uint
<
BUFF_BIT
>
p
=
idx
;
// buffer-idx
SEARCH_BACKTRACK
:
while
(
t
!=
start
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
paths
[
p
]
=
t
;
p
++
;
t
=
prev
[
t
];
}
#ifdef DEBUG_PRINT
if
(
max_queue_length
<
queue_length
)
{
max_queue_length
=
queue_length
;
}
if
(
max_search_count
<
search_count
)
{
max_search_count
=
search_count
;
}
#endif
return
p
;
}
// Queue push (Enqueue)
// Need to modify "trip count" (1)
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
)
{
#pragma HLS INLINE
(
*
pq_len
)
++
;
if
((
*
pq_len
)
==
0
)
{
(
*
pq_len
)
--
;
}
// Queue is full -> Last element is automatically removed
// Binary search for circular list
ap_uint
<
PQ_BIT
>
i
=
(
*
pq_len
);
ap_uint
<
PQ_BIT
>
p
=
(
*
pq_len
)
>>
1
;
// parent node
PQ_PUSH_LOOP
:
while
(
i
>
1
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
p
]
&
PQ_PRIORITY_MASK
)
>=
priority
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=16
/** Set!: min=0 max=PQ_BIT **/
pq_nodes
[
i
]
=
pq_nodes
[
p
];
i
=
p
;
p
=
p
>>
1
;
// parent node
}
pq_nodes
[
i
]
=
((
ap_uint
<
32
>
)
data
<<
PQ_PRIORITY_WIDTH
)
|
(
ap_uint
<
32
>
)
priority
;
*
is_empty
=
false
;
}
// Queue pop (Dequeue)
// Need to modify "trip count" (1)
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
)
{
#pragma HLS INLINE
*
ret_priority
=
(
ap_uint
<
16
>
)(
pq_nodes
[
1
]
&
PQ_PRIORITY_MASK
);
*
ret_data
=
(
ap_uint
<
16
>
)(
pq_nodes
[
1
]
>>
PQ_PRIORITY_WIDTH
);
ap_uint
<
PQ_BIT
>
i
=
1
;
// root node
ap_uint
<
PQ_BIT
>
last_priority
=
(
ap_uint
<
16
>
)(
pq_nodes
[
*
pq_len
]
&
PQ_PRIORITY_MASK
);
// Priority of last element
PQ_POP_LOOP
:
while
(
1
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=16
/** Set!: min=0 max=PQ_BIT **/
ap_uint
<
PQ_BIT
>
c1
=
i
<<
1
;
// child node(left)
ap_uint
<
PQ_BIT
>
c2
=
c1
+
1
;
// child node(right)
if
(
c1
<
*
pq_len
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
c1
]
&
PQ_PRIORITY_MASK
)
<=
last_priority
)
{
if
(
c2
<
*
pq_len
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
c2
]
&
PQ_PRIORITY_MASK
)
<=
(
ap_uint
<
16
>
)(
pq_nodes
[
c1
]
&
PQ_PRIORITY_MASK
))
{
pq_nodes
[
i
]
=
pq_nodes
[
c2
];
i
=
c2
;
}
else
{
pq_nodes
[
i
]
=
pq_nodes
[
c1
];
i
=
c1
;
}
}
else
{
if
(
c2
<
*
pq_len
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
c2
]
&
PQ_PRIORITY_MASK
)
<=
last_priority
)
{
pq_nodes
[
i
]
=
pq_nodes
[
c2
];
i
=
c2
;
}
else
{
break
;
}
}
}
pq_nodes
[
i
]
=
pq_nodes
[
*
pq_len
];
(
*
pq_len
)
--
;
if
((
*
pq_len
)
==
0
)
{
*
is_empty
=
true
;
}
}
hls_2018/router_03/router.hpp
0 → 100755
View file @
98afc37f
/**
* router.hpp
*
* for Vivado HLS
*/
#ifndef __ROUTER_HPP__
#define __ROUTER_HPP__
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
//#define DEBUG_PRINT // for debug
#ifdef DEBUG_PRINT
using
namespace
std
;
#endif
// Parameters
#define MAX_WIDTH 72 // Max of X, Y
#define BITWIDTH_XY 13
#define BITMASK_XY 65528 // 1111 1111 1111 1000
#define MAX_LAYER 8 // Max of Z
#define BITWIDTH_Z 3
#define BITMASK_Z 7 // 0000 0000 0000 0111
#define MAX_CELLS 41472 // Max #cells (16bit)
#define MAX_LINES 1024 // Max #lines (10bit)
#define MAX_PQ 32768 // Queue size (15bit)
#define MAX_BUFFER 16384 // Line buffer size (14bit)
#define CELL_BIT 16
#define LINE_BIT 10
#define PQ_BIT 15
#define BUFF_BIT 14
#define PQ_PRIORITY_WIDTH 16
#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111
#define PQ_DATA_WIDTH 16
#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000
#define MAX_WEIGHT 255 // Max weight
#define BOARDSTR_SIZE 41472 // Size of I/O
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
);
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
);
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
);
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
);
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_WEIGHT
]);
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
);
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
);
#endif
/* __ROUTER_HPP__ */
hls_2018/router_04/router.cpp
0 → 100755
View file @
98afc37f
/**
* router.cpp
*
* for Vivado HLS
*/
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
#include "./router.hpp"
// Set weight
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
)
{
#pragma HLS INLINE
// K. Terada: y = 1~32 (8bit)
ap_uint
<
8
>
y
;
y
=
((
x
&
255
)
>>
3
)
+
1
;
return
y
;
}
// Global values
static
ap_uint
<
7
>
size_x
;
// X
static
ap_uint
<
7
>
size_y
;
// Y
static
ap_uint
<
4
>
size_z
;
// Z
static
ap_uint
<
LINE_BIT
>
line_num
=
0
;
// #Lines
#ifdef DEBUG_PRINT
int
max_queue_length
;
// Max length of priority queue
int
max_search_count
;
// Max count of queue pop
int
max_buffer_length
;
// Max length of line buffer
#endif
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
)
{
#pragma HLS INTERFACE s_axilite port=boardstr bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=seed bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=status bundle=AXI4LS
#pragma HLS INTERFACE s_axilite port=return bundle=AXI4LS
// status(0:Solved, 1:Not solved)
*
status
=
-
1
;
// For all lines
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
];
// Line buffer
// For each line
// Note: Should not partition completely
bool
adjacents
[
MAX_LINES
];
// Line has adjacent terminals?
ap_uint
<
CELL_BIT
>
starts
[
MAX_LINES
];
// Start list
ap_uint
<
CELL_BIT
>
goals
[
MAX_LINES
];
// Goal list
ap_uint
<
BUFF_BIT
>
s_idx
[
MAX_LINES
];
// Start point on line buffer
ap_uint
<
8
>
weights
[
MAX_CELLS
];
// Weight of each cell
// Note: Should not partition weight array
// since each element will be accessed in "random" order
// ================================
// (Step.0) Initialization (BEGIN)
// ================================
// Note: Loop counter -> need an extra bit (for condition determination)
INIT_WEIGHTS:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
weights
[
i
]
=
1
;
}
/// Parse ///
size_x
=
(
boardstr
[
1
]
-
'0'
)
*
10
+
(
boardstr
[
2
]
-
'0'
);
size_y
=
(
boardstr
[
4
]
-
'0'
)
*
10
+
(
boardstr
[
5
]
-
'0'
);
size_z
=
(
boardstr
[
7
]
-
'0'
);
INIT_BOARDS:
for
(
ap_uint
<
CELL_BIT
>
idx
=
8
;
idx
<
(
ap_uint
<
CELL_BIT
>
)(
BOARDSTR_SIZE
);
idx
+=
11
)
{
// NULL-terminated
if
(
boardstr
[
idx
]
==
0
)
break
;
// Start & Goal of each line
ap_uint
<
7
>
s_x
=
(
boardstr
[
idx
+
1
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
2
]
-
'0'
);
ap_uint
<
7
>
s_y
=
(
boardstr
[
idx
+
3
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
4
]
-
'0'
);
ap_uint
<
3
>
s_z
=
(
boardstr
[
idx
+
5
]
-
'0'
)
-
1
;
ap_uint
<
7
>
g_x
=
(
boardstr
[
idx
+
6
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
7
]
-
'0'
);
ap_uint
<
7
>
g_y
=
(
boardstr
[
idx
+
8
]
-
'0'
)
*
10
+
(
boardstr
[
idx
+
9
]
-
'0'
);
ap_uint
<
3
>
g_z
=
(
boardstr
[
idx
+
10
]
-
'0'
)
-
1
;
ap_uint
<
CELL_BIT
>
start_id
=
(((
ap_uint
<
CELL_BIT
>
)
s_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
s_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
s_z
;
ap_uint
<
CELL_BIT
>
goal_id
=
(((
ap_uint
<
CELL_BIT
>
)
g_x
*
MAX_WIDTH
+
(
ap_uint
<
CELL_BIT
>
)
g_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
CELL_BIT
>
)
g_z
;
starts
[
line_num
]
=
start_id
;
goals
[
line_num
]
=
goal_id
;
weights
[
start_id
]
=
MAX_WEIGHT
;
weights
[
goal_id
]
=
MAX_WEIGHT
;
// Line has adjacent terminals?
adjacents
[
line_num
]
=
false
;
ap_int
<
8
>
dx
=
(
ap_int
<
8
>
)
g_x
-
(
ap_int
<
8
>
)
s_x
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
8
>
dy
=
(
ap_int
<
8
>
)
g_y
-
(
ap_int
<
8
>
)
s_y
;
// Min: -71, Max: 71 (Signed 8bit)
ap_int
<
4
>
dz
=
(
ap_int
<
4
>
)
g_z
-
(
ap_int
<
4
>
)
s_z
;
// Min: -7, Max: 7 (Signed 4bit)
if
((
dx
==
0
&&
dy
==
0
&&
(
dz
==
1
||
dz
==
-
1
))
||
(
dx
==
0
&&
(
dy
==
1
||
dy
==
-
1
)
&&
dz
==
0
)
||
((
dx
==
1
||
dx
==
-
1
)
&&
dy
==
0
&&
dz
==
0
))
{
adjacents
[
line_num
]
=
true
;
}
line_num
++
;
}
// ================================
// (Step.0) Initialization (END)
// ================================
#ifdef DEBUG_PRINT
max_queue_length
=
0
;
max_search_count
=
0
;
max_buffer_length
=
0
;
#endif
ap_uint
<
BUFF_BIT
>
pointer
=
0
;
// Pointer for line buffer
// ================================
// (Step.1) Initial Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Initial Routing ..."
<<
endl
;
#endif
FIRST_ROUTING:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
s_idx
[
i
]
=
pointer
;
if
(
adjacents
[
i
]
==
true
)
continue
;
// Skip routing
#ifdef DEBUG_PRINT
//cout << "LINE #" << (int)(i + 1) << endl;
#endif
// Routing
pointer
=
search
(
s_idx
[
i
],
paths
,
starts
[
i
],
goals
[
i
],
weights
);
}
// ================================
// (Step.1) Initial Routing (END)
// ================================
// Memories for Overlap Check
ap_uint
<
1
>
overlap_checks
[
MAX_CELLS
];
bool
has_overlap
=
false
;
// ================================
// (Step.2) Rip-up Routing (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Rip-up Routing ..."
<<
endl
;
#endif
ROUTING:
for
(
ap_uint
<
16
>
round
=
0
;
round
<
32768
/* = (2048 * 16) */
;
round
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=32768
// Target line
ap_uint
<
LINE_BIT
>
target
=
round
%
line_num
;
ap_uint
<
LINE_BIT
>
next_target
=
target
+
1
;
if
(
next_target
==
line_num
)
next_target
=
0
;
#ifdef DEBUG_PRINT
cout
<<
"(round "
<<
round
<<
") LINE #"
<<
(
int
)(
target
+
1
);
cout
<<
" -> "
<<
pointer
<<
endl
;
#endif
#ifdef DEBUG_PRINT
int
buffer_length
=
pointer
-
s_idx
[
target
];
if
(
max_buffer_length
<
buffer_length
)
{
max_buffer_length
=
buffer_length
;
}
#endif
// Skip routing
if
(
adjacents
[
target
]
==
true
)
{
s_idx
[
target
]
=
pointer
;
continue
;
}
// (Step.2-1) Reset weights of target line
WEIGHT_RESET:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
target
];
j
!=
s_idx
[
next_target
];
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
weights
[
paths
[
j
]]
=
1
;
}
// (Step.2-2) Set weights of non-target lines and terminals
ap_uint
<
8
>
current_round_weight
=
new_weight
(
round
);
WEIGHT_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
weights
[
paths
[
j
]]
=
current_round_weight
;
}
WEIGHT_TERMINAL:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
weights
[
starts
[
i
]]
=
MAX_WEIGHT
;
weights
[
goals
[
i
]]
=
MAX_WEIGHT
;
}
// Reset weight of start terminal of target line (bug avoiding)
// Restore original settings in (*)
weights
[
starts
[
target
]]
=
1
;
// (Step.2-3) Routing
s_idx
[
target
]
=
pointer
;
pointer
=
search
(
s_idx
[
target
],
paths
,
starts
[
target
],
goals
[
target
],
weights
);
// (*)
weights
[
starts
[
target
]]
=
MAX_WEIGHT
;
#ifdef DEBUG_PRINT
bool
ng
=
false
;
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
if
(
weights
[
starts
[
i
]]
!=
255
||
weights
[
goals
[
i
]]
!=
255
)
{
cout
<<
i
<<
" "
;
ng
=
true
;
}
}
if
(
ng
)
{
cout
<<
endl
;
}
#endif
// (Step.2-4) Overlap check
has_overlap
=
false
;
OVERLAP_RESET:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
overlap_checks
[
i
]
=
0
;
}
OVERLAP_CHECK_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
overlap_checks
[
starts
[
i
]]
=
1
;
overlap_checks
[
goals
[
i
]]
=
1
;
}
OVERLAP_CHECK_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
s_idx
[
next_target
];
j
!=
pointer
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=8192
ap_uint
<
CELL_BIT
>
cell_id
=
paths
[
j
];
if
(
overlap_checks
[
cell_id
])
{
has_overlap
=
true
;
break
;
}
overlap_checks
[
cell_id
]
=
1
;
}
#ifdef DEBUG_PRINT
if
(
!
has_overlap
){
cout
<<
"ROUND: "
<<
round
<<
endl
;
}
#endif
if
(
!
has_overlap
)
break
;
// Finish routing?
}
#ifdef DEBUG_PRINT
cout
<<
"MAX PQ LENGTH: "
<<
max_queue_length
<<
endl
;
cout
<<
"MAX SEARCH COUNT: "
<<
max_search_count
<<
endl
;
cout
<<
"MAX BUFFER: "
<<
max_buffer_length
<<
endl
;
#endif
// Not solved
if
(
has_overlap
)
{
*
status
=
1
;
return
false
;
}
// ================================
// (Step.2) Rip-up Routing (END)
// ================================
// ================================
// (Step.3) Output (BEGIN)
// ================================
#ifdef DEBUG_PRINT
cout
<<
"Output ..."
<<
endl
;
#endif
// Init: Blank = 0
OUTPUT_INIT:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
boardstr
[
i
]
=
0
;
}
// Line
OUTPUT_LINE:
for
(
ap_uint
<
LINE_BIT
>
i
=
0
;
i
<
(
ap_uint
<
LINE_BIT
>
)(
line_num
);
i
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=2 max=999
boardstr
[
starts
[
i
]]
=
(
i
+
1
);
boardstr
[
goals
[
i
]]
=
(
i
+
1
);
ap_uint
<
BUFF_BIT
>
p1
;
// p1: s_idx of target
ap_uint
<
BUFF_BIT
>
p2
;
// p2: s_idx of next target
p1
=
s_idx
[
i
];
if
(
i
==
(
ap_uint
<
LINE_BIT
>
)(
line_num
-
1
))
{
p2
=
s_idx
[
0
];
}
else
{
p2
=
s_idx
[
i
+
1
];
}
if
((
ap_uint
<
BUFF_BIT
>
)(
p2
-
p1
)
>
8192
){
p2
=
pointer
;
}
OUTPUT_LINE_PATH:
for
(
ap_uint
<
BUFF_BIT
>
j
=
p1
;
j
!=
p2
;
j
++
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
boardstr
[
paths
[
j
]]
=
(
i
+
1
);
}
}
// ================================
// (Step.3) Output (END)
// ================================
*
status
=
0
;
return
true
;
}
// ================================ //
// For Routing
// ================================ //
// Max: 71, Min: 0 (7bit)
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Max: 7, Min: 0 (3bit)
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
)
{
#pragma HLS INLINE
if
(
a
<
b
)
{
return
b
-
a
;
}
else
{
return
a
-
b
;
}
}
// Reference codes:
// http://lethe2211.hatenablog.com/entry/2014/12/30/011030
// http://www.redblobgames.com/pathfinding/a-star/implementation.html
// Need to modify "array partition factor"
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_CELLS
])
{
ap_uint
<
CELL_BIT
>
dist
[
MAX_CELLS
];
ap_uint
<
CELL_BIT
>
prev
[
MAX_CELLS
];
SEARCH_INIT_DIST
:
for
(
ap_uint
<
CELL_BIT
>
i
=
0
;
i
<
(
ap_uint
<
CELL_BIT
>
)(
MAX_CELLS
);
i
++
)
{
dist
[
i
]
=
65535
;
// = (2^16 - 1)
}
// Priority queue (Heap)
ap_uint
<
PQ_BIT
>
pq_len
=
0
;
bool
is_empty
=
true
;
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
];
#ifdef DEBUG_PRINT
int
queue_length
=
0
;
int
search_count
=
0
;
#endif
// Point of goal terminal
ap_uint
<
13
>
goal_xy
=
(
ap_uint
<
13
>
)(
goal
>>
BITWIDTH_Z
);
ap_uint
<
7
>
goal_x
=
(
ap_uint
<
7
>
)(
goal_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
goal_y
=
(
ap_uint
<
7
>
)(
goal_xy
-
goal_x
*
MAX_WIDTH
);
ap_uint
<
3
>
goal_z
=
(
ap_uint
<
3
>
)(
goal
&
BITMASK_Z
);
dist
[
start
]
=
0
;
pq_push
(
pq_nodes
,
0
,
start
,
&
pq_len
,
&
is_empty
);
// push start terminal
SEARCH_PQ
:
while
(
!
is_empty
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=1000
#pragma HLS LOOP_FLATTEN off
ap_uint
<
16
>
prev_cost
;
ap_uint
<
16
>
src
;
// target cell
pq_pop
(
pq_nodes
,
&
prev_cost
,
&
src
,
&
pq_len
,
&
is_empty
);
#ifdef DEBUG_PRINT
search_count
++
;
#endif
// End routing
if
(
src
==
goal
)
break
;
// Target cell
ap_uint
<
16
>
dist_src
=
dist
[
src
];
ap_uint
<
8
>
cost
=
w
[
src
];
// Point of target cell
ap_uint
<
13
>
src_xy
=
(
ap_uint
<
13
>
)(
src
>>
BITWIDTH_Z
);
ap_uint
<
7
>
src_x
=
(
ap_uint
<
7
>
)(
src_xy
/
MAX_WIDTH
);
ap_uint
<
7
>
src_y
=
(
ap_uint
<
7
>
)(
src_xy
-
src_x
*
MAX_WIDTH
);
ap_uint
<
3
>
src_z
=
(
ap_uint
<
3
>
)(
src
&
BITMASK_Z
);
// Search adjacent cells
SEARCH_ADJACENTS
:
for
(
ap_uint
<
3
>
a
=
0
;
a
<
6
;
a
++
)
{
ap_int
<
8
>
dest_x
=
(
ap_int
<
8
>
)
src_x
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
8
>
dest_y
=
(
ap_int
<
8
>
)
src_y
;
// Min: -1, Max: 72 (Signed 8bit)
ap_int
<
5
>
dest_z
=
(
ap_int
<
5
>
)
src_z
;
// Min: -1, Max: 8 (Signed 5bit)
if
(
a
==
0
)
{
dest_x
-=
1
;
}
if
(
a
==
1
)
{
dest_x
+=
1
;
}
if
(
a
==
2
)
{
dest_y
-=
1
;
}
if
(
a
==
3
)
{
dest_y
+=
1
;
}
if
(
a
==
4
)
{
dest_z
-=
1
;
}
if
(
a
==
5
)
{
dest_z
+=
1
;
}
// Inside the board ? //
if
(
0
<=
dest_x
&&
dest_x
<
(
ap_int
<
8
>
)
size_x
&&
0
<=
dest_y
&&
dest_y
<
(
ap_int
<
8
>
)
size_y
&&
0
<=
dest_z
&&
dest_z
<
(
ap_int
<
5
>
)
size_z
)
{
// Adjacent cell
ap_uint
<
16
>
dest
=
(((
ap_uint
<
16
>
)
dest_x
*
MAX_WIDTH
+
(
ap_uint
<
16
>
)
dest_y
)
<<
BITWIDTH_Z
)
|
(
ap_uint
<
16
>
)
dest_z
;
ap_uint
<
16
>
dist_new
=
dist_src
+
cost
;
if
(
dist
[
dest
]
>
dist_new
)
{
dist
[
dest
]
=
dist_new
;
// Update dist
prev
[
dest
]
=
src
;
// Recode previous cell
dist_new
+=
abs_uint7
(
dest_x
,
goal_x
)
+
abs_uint7
(
dest_y
,
goal_y
)
+
abs_uint3
(
dest_z
,
goal_z
);
// A* heuristic
pq_push
(
pq_nodes
,
dist_new
,
dest
,
&
pq_len
,
&
is_empty
);
// push adjacent cell
}
}
}
#ifdef DEBUG_PRINT
if
(
queue_length
<
pq_len
)
{
queue_length
=
pq_len
;
}
#endif
}
// Output target path
// Note: Do not include start & goal terminals
ap_uint
<
16
>
t
=
prev
[
goal
];
// Backtracking
ap_uint
<
BUFF_BIT
>
p
=
idx
;
// buffer-idx
SEARCH_BACKTRACK
:
while
(
t
!=
start
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=256
paths
[
p
]
=
t
;
p
++
;
t
=
prev
[
t
];
}
#ifdef DEBUG_PRINT
if
(
max_queue_length
<
queue_length
)
{
max_queue_length
=
queue_length
;
}
if
(
max_search_count
<
search_count
)
{
max_search_count
=
search_count
;
}
#endif
return
p
;
}
// Queue push (Enqueue)
// Need to modify "trip count" (1)
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
)
{
#pragma HLS INLINE
(
*
pq_len
)
++
;
if
((
*
pq_len
)
==
0
)
{
(
*
pq_len
)
--
;
}
// Queue is full -> Last element is automatically removed
// Binary search for circular list
ap_uint
<
PQ_BIT
>
i
=
(
*
pq_len
);
ap_uint
<
PQ_BIT
>
p
=
(
*
pq_len
)
>>
1
;
// parent node
PQ_PUSH_LOOP
:
while
(
i
>
1
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
p
]
&
PQ_PRIORITY_MASK
)
>=
priority
)
{
#pragma HLS LOOP_TRIPCOUNT min=0 max=16
/** Set!: min=0 max=PQ_BIT **/
pq_nodes
[
i
]
=
pq_nodes
[
p
];
i
=
p
;
p
=
p
>>
1
;
// parent node
}
pq_nodes
[
i
]
=
((
ap_uint
<
32
>
)
data
<<
PQ_PRIORITY_WIDTH
)
|
(
ap_uint
<
32
>
)
priority
;
*
is_empty
=
false
;
}
// Queue pop (Dequeue)
// Need to modify "trip count" (1)
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
)
{
#pragma HLS INLINE
*
ret_priority
=
(
ap_uint
<
16
>
)(
pq_nodes
[
1
]
&
PQ_PRIORITY_MASK
);
*
ret_data
=
(
ap_uint
<
16
>
)(
pq_nodes
[
1
]
>>
PQ_PRIORITY_WIDTH
);
ap_uint
<
PQ_BIT
>
i
=
1
;
// root node
ap_uint
<
PQ_BIT
>
last_priority
=
(
ap_uint
<
16
>
)(
pq_nodes
[
*
pq_len
]
&
PQ_PRIORITY_MASK
);
// Priority of last element
PQ_POP_LOOP
:
while
(
1
)
{
#pragma HLS LOOP_TRIPCOUNT min=1 max=16
/** Set!: min=0 max=PQ_BIT **/
ap_uint
<
PQ_BIT
>
c1
=
i
<<
1
;
// child node(left)
ap_uint
<
PQ_BIT
>
c2
=
c1
+
1
;
// child node(right)
if
(
c1
<
*
pq_len
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
c1
]
&
PQ_PRIORITY_MASK
)
<=
last_priority
)
{
if
(
c2
<
*
pq_len
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
c2
]
&
PQ_PRIORITY_MASK
)
<=
(
ap_uint
<
16
>
)(
pq_nodes
[
c1
]
&
PQ_PRIORITY_MASK
))
{
pq_nodes
[
i
]
=
pq_nodes
[
c2
];
i
=
c2
;
}
else
{
pq_nodes
[
i
]
=
pq_nodes
[
c1
];
i
=
c1
;
}
}
else
{
if
(
c2
<
*
pq_len
&&
(
ap_uint
<
16
>
)(
pq_nodes
[
c2
]
&
PQ_PRIORITY_MASK
)
<=
last_priority
)
{
pq_nodes
[
i
]
=
pq_nodes
[
c2
];
i
=
c2
;
}
else
{
break
;
}
}
}
pq_nodes
[
i
]
=
pq_nodes
[
*
pq_len
];
(
*
pq_len
)
--
;
if
((
*
pq_len
)
==
0
)
{
*
is_empty
=
true
;
}
}
hls_2018/router_04/router.hpp
0 → 100755
View file @
98afc37f
/**
* router.hpp
*
* for Vivado HLS
*/
#ifndef __ROUTER_HPP__
#define __ROUTER_HPP__
#ifdef SOFTWARE
#include "ap_int.h"
#else
#include <ap_int.h>
#endif
//#define DEBUG_PRINT // for debug
#ifdef DEBUG_PRINT
using
namespace
std
;
#endif
// Parameters
#define MAX_WIDTH 72 // Max of X, Y
#define BITWIDTH_XY 13
#define BITMASK_XY 65528 // 1111 1111 1111 1000
#define MAX_LAYER 8 // Max of Z
#define BITWIDTH_Z 3
#define BITMASK_Z 7 // 0000 0000 0000 0111
#define MAX_CELLS 41472 // Max #cells (16bit)
#define MAX_LINES 1024 // Max #lines (10bit)
#define MAX_PQ 32768 // Queue size (15bit)
#define MAX_BUFFER 16384 // Line buffer size (14bit)
#define CELL_BIT 16
#define LINE_BIT 10
#define PQ_BIT 15
#define BUFF_BIT 14
#define PQ_PRIORITY_WIDTH 16
#define PQ_PRIORITY_MASK 65535 // 0000 0000 0000 0000 1111 1111 1111 1111
#define PQ_DATA_WIDTH 16
#define PQ_DATA_MASK 4294901760 // 1111 1111 1111 1111 0000 0000 0000 0000
#define MAX_WEIGHT 255 // Max weight
#define BOARDSTR_SIZE 41472 // Size of I/O
ap_uint
<
8
>
new_weight
(
ap_uint
<
16
>
x
);
bool
pynqrouter
(
char
boardstr
[
BOARDSTR_SIZE
],
ap_uint
<
32
>
seed
,
ap_int
<
32
>
*
status
);
ap_uint
<
7
>
abs_uint7
(
ap_uint
<
7
>
a
,
ap_uint
<
7
>
b
);
ap_uint
<
3
>
abs_uint3
(
ap_uint
<
3
>
a
,
ap_uint
<
3
>
b
);
ap_uint
<
BUFF_BIT
>
search
(
ap_uint
<
BUFF_BIT
>
idx
,
ap_uint
<
CELL_BIT
>
paths
[
MAX_BUFFER
],
ap_uint
<
CELL_BIT
>
start
,
ap_uint
<
CELL_BIT
>
goal
,
ap_uint
<
8
>
w
[
MAX_WEIGHT
]);
void
pq_push
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
priority
,
ap_uint
<
16
>
data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
);
void
pq_pop
(
ap_uint
<
32
>
pq_nodes
[
MAX_PQ
],
ap_uint
<
16
>
*
ret_priority
,
ap_uint
<
16
>
*
ret_data
,
ap_uint
<
PQ_BIT
>
*
pq_len
,
bool
*
is_empty
);
#endif
/* __ROUTER_HPP__ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment