#include "solver.hpp" static unsigned int lfsr, lfsr_x, lfsr_y; static void lfsr_random_init(unsigned int seed, unsigned int seed_x, unsigned int seed_y) { #pragma HLS INLINE lfsr = seed; lfsr_x = seed_x; lfsr_y = seed_y; } static unsigned int lfsr_random() { #pragma HLS INLINE bool b_32 = false, b_22 = false, b_2 = false, b_1 = false; if((lfsr >> 0) % 2 == 1) b_32 = true; if((lfsr >> 10) % 2 == 1) b_22 = true; if((lfsr >> 30) % 2 == 1) b_2 = true; if((lfsr >> 31) % 2 == 1) b_1 = true; bool new_bit = b_32 ^ b_22 ^ b_2 ^ b_1; lfsr >>= 1; if(new_bit) lfsr |= 0x80000000; return lfsr; } static unsigned int lfsr_x_random() { #pragma HLS INLINE bool b_32 = false, b_22 = false, b_2 = false, b_1 = false; if((lfsr_x >> 0) % 2 == 1) b_32 = true; if((lfsr_x >> 10) % 2 == 1) b_22 = true; if((lfsr_x >> 30) % 2 == 1) b_2 = true; if((lfsr_x >> 31) % 2 == 1) b_1 = true; bool new_bit = b_32 ^ b_22 ^ b_2 ^ b_1; lfsr_x >>= 1; if(new_bit) lfsr_x |= 0x80000000; return lfsr_x; } static unsigned int lfsr_y_random() { #pragma HLS INLINE bool b_32 = false, b_22 = false, b_2 = false, b_1 = false; if((lfsr_y >> 0) % 2 == 1) b_32 = true; if((lfsr_y >> 10) % 2 == 1) b_22 = true; if((lfsr_y >> 30) % 2 == 1) b_2 = true; if((lfsr_y >> 31) % 2 == 1) b_1 = true; bool new_bit = b_32 ^ b_22 ^ b_2 ^ b_1; lfsr_y >>= 1; if(new_bit) lfsr_y |= 0x80000000; return lfsr_y; } bool solver(hls::stream >& state, unsigned int seed[3], short int *pbnum, short int *plnum, short int block_info[MAX_BLOCKS+1][5][3], short int *pwi, short int *phi, short int block_place_global[MAX_BLOCKS+1][2], short int opt_result[MAX_CELLS]) { #pragma HLS INTERFACE s_axilite port=return #pragma HLS INTERFACE ap_hs register port=state #pragma HLS INTERFACE m_axi depth=1 port=seed offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=pbnum offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=plnum offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=block_info offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=pwi offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=phi offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=block_place_global offset=slave bundle=io_data #pragma HLS INTERFACE m_axi depth=1 port=opt_result offset=slave bundle=io_data state.write(0b0000); short int blocks = *pbnum; short int line_num = *plnum; short int wi, hi; lfsr_random_init(seed[0], seed[1], seed[2]); bool status; state.write(0b0001); status = local_placement_1(line_num, blocks, &wi, &hi, block_info, block_place_global, opt_result); if(!status) { // Cannot route state.write(0b0010); return false; } state.write(0b0011); status = local_placement_2(line_num, blocks, &wi, &hi, block_info, opt_result); if(!status) { state.write(0b0100); return false; } else { state.write(0b0111); } *pwi = wi; *phi = hi; return true; } bool local_placement_1(short int line_num, short int blocks, short int *pwi, short int *phi, short int block_info[MAX_BLOCKS+1][5][3], short int block_place_global[MAX_BLOCKS+1][2], short int opt_result[MAX_CELLS]) { short int wi, hi; // board size (w, h) // size of each slot (w, h) short int slot_w[SLOT_MAX_SIZE]; short int slot_h[SLOT_MAX_SIZE]; // basic position of each slot (x, y) short int slot_w_t[SLOT_MAX_SIZE]; short int slot_h_t[SLOT_MAX_SIZE]; short int block_place_slack[MAX_BLOCKS+1][2]; short int block_place_basis[MAX_BLOCKS+1][2]; short int block_place_delta[MAX_BLOCKS+1][2]; short int block_place_opt[MAX_BLOCKS+1][2]; short int board_str[MAX_CELLS]; #pragma HLS ARRAY_PARTITION variable=board_str cyclic factor=4 dim=1 // load a placement result short int min_x = SLOT_MAX_SIZE, min_y = SLOT_MAX_SIZE, max_x = 0, max_y = 0; PLACE_1_LOAD_1: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int p = block_place_global[i][0]; short int q = block_place_global[i][1]; if(p < min_x) { min_x = p; } if(q < min_y) { min_y = q; } if(p > max_x) { max_x = p; } if(q > max_y) { max_y = q; } } PLACE_1_LOAD_2: for(short int p = min_x; p <= max_x; p++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=64 slot_w[p] = 0; } PLACE_1_LOAD_3: for(short int q = min_y; q <= max_y; q++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=64 slot_h[q] = 0; } PLACE_1_LOAD_4: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int sx = block_info[i][0][0]; short int sy = block_info[i][0][1]; short int p = block_place_global[i][0]; short int q = block_place_global[i][1]; if(sx > slot_w[p]) { slot_w[p] = sx; } if(sy > slot_h[q]) { slot_h[q] = sy; } } wi = INTER_BLOCK_MARGIN; PLACE_1_LOAD_5: for(short int p = min_x; p <= max_x; p++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=64 slot_w_t[p] = wi; wi += slot_w[p] + INTER_BLOCK_MARGIN; } hi = INTER_BLOCK_MARGIN; PLACE_1_LOAD_6: for(short int q = min_y; q <= max_y; q++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=64 slot_h_t[q] = hi; hi += slot_h[q] + INTER_BLOCK_MARGIN; } #ifdef DEBUG_PRINT cout << "== Local placement with routing (1) ==" << endl; cout << "Board size = " << wi << "X" << hi << endl; #endif // calculate slack for each block PLACE_1_CALC_SLACK: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int sx = block_info[i][0][0]; short int sy = block_info[i][0][1]; short int p = block_place_global[i][0]; short int q = block_place_global[i][1]; block_place_basis[i][0] = slot_w_t[p]; block_place_basis[i][1] = slot_h_t[q]; block_place_slack[i][0] = slot_w[p] - sx; block_place_slack[i][1] = slot_h[q] - sy; } bool find_route = false; int min_status = MAX_CELLS; PLACE_1_SEARCH: for(int counter = 0; counter < LOOP; counter++) { #ifdef DEBUG_PRINT cout << "."; fflush(stdout); #endif PLACE_1_SEARCH_INIT: for(int s = 0; s < MAX_CELLS; s++) { #pragma HLS UNROLL factor=8 board_str[s] = 0; } PLACE_1_SEARCH_SET: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int x = block_place_basis[i][0]; short int y = block_place_basis[i][1]; short int dx = lfsr_x_random() % (block_place_slack[i][0] + 1); short int dy = lfsr_y_random() % (block_place_slack[i][1] + 1); block_place_delta[i][0] = dx; block_place_delta[i][1] = dy; PLACE_1_SEARCH_SET_1: for(int it = 1; it < 5; it++) { short int tx = block_info[i][it][0]; short int ty = block_info[i][it][1]; short int tn = block_info[i][it][2]; if(tn < 0) break; short int p = x + dx + tx; short int q = y + dy + ty; short int idx = q * MAX_SIZE + p; if(tn > 0) { board_str[idx] = tn; } else { board_str[idx] = -1; } } } int status = router(wi, hi, line_num, board_str); // routing if(status >= 0 && status < min_status) { min_status = status; find_route = true; PLACE_1_SEARCH_OUT_1: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_place_opt[i][0] = block_place_basis[i][0] + block_place_delta[i][0]; block_place_opt[i][1] = block_place_basis[i][1] + block_place_delta[i][1]; } PLACE_1_SEARCH_OUT_2_Y: for(short int y = 0; y < hi; y++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 PLACE_1_SEARCH_OUT_2_X: for(short int x = 0; x < wi; x++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = y * MAX_SIZE + x; opt_result[idx] = board_str[idx]; } } } } #ifdef DEBUG_PRINT cout << endl; #endif if(!find_route) return false; // Update PLACE_1_UPDATE_I: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_place_basis[i][0] = block_place_opt[i][0]; block_place_basis[i][1] = block_place_opt[i][1]; } /// print for debug /// #ifdef DEBUG_PRINT cout << "Min-wire status: " << min_status << endl; show_result(line_num, blocks, wi, hi, opt_result); #endif /// print for debug /// //cout << "== Local placement with routing (1+) ==" << endl; bool remove_flag; short int deleted_line_w = 0, deleted_line_h = 0; PLACE_1_UPDATE_Y: for(short int q = hi - 1; q >= 0; q--) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 remove_flag = true; PLACE_1_UPDATE_Y_X: for(short int p = 0; p < wi; p++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = q * MAX_SIZE + p; if(opt_result[idx] != 0) { remove_flag = false; break; } } if(!remove_flag) continue; PLACE_1_UPDATE_Y_B: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 if(block_place_basis[i][1] > q) { block_place_basis[i][1] -= 1; } } deleted_line_h++; } PLACE_1_UPDATE_X: for(short int p = wi - 1; p >= 0; p--) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 remove_flag = true; PLACE_1_UPDATE_X_Y: for(short int q = 0; q < hi; q++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = q * MAX_SIZE + p; if(opt_result[idx] != 0) { remove_flag = false; break; } } if(!remove_flag) continue; PLACE_1_UPDATE_X_B: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 if(block_place_basis[i][0] > p) { block_place_basis[i][0] -= 1; } } deleted_line_w++; } hi -= deleted_line_h; wi -= deleted_line_w; PLACE_1_REROUTE_INIT: for(int s = 0; s < MAX_CELLS; s++) { #pragma HLS UNROLL factor=8 board_str[s] = 0; } PLACE_1_REROUTE_SET: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int x = block_place_basis[i][0]; short int y = block_place_basis[i][1]; PLACE_1_REROUTE_SET_1: for(int it = 1; it < 5; it++) { short int tx = block_info[i][it][0]; short int ty = block_info[i][it][1]; short int tn = block_info[i][it][2]; if(tn < 0) break; short int p = x + tx; short int q = y + ty; short int idx = q * MAX_SIZE + p; if(tn > 0) { board_str[idx] = tn; } else { board_str[idx] = -1; } } } int counter = 0; PLACE_1_REROUTE_LOOP: while(router(wi, hi, line_num, board_str) < 0) { #pragma HLS LOOP_TRIPCOUNT min=1 max=5 if(counter++ >= LOOP) return false; } PLACE_1_O_1_Y: for(short int y = 0; y < hi; y++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 PLACE_1_O_1_X: for(short int x = 0; x < wi; x++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = y * MAX_SIZE + x; opt_result[idx] = board_str[idx]; } } PLACE_1_O_2: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_info[i][0][0] = block_place_basis[i][0]; block_info[i][0][1] = block_place_basis[i][1]; } *pwi = wi; *phi = hi; return true; } bool local_placement_2(short int line_num, short int blocks, short int *pwi, short int *phi, short int block_info[MAX_BLOCKS+1][5][3], short int opt_result[MAX_CELLS]) { short int wi = *pwi, hi = *phi; short int block_place_slack[MAX_BLOCKS+1][2]; short int block_place_basis[MAX_BLOCKS+1][2]; short int block_place_delta[MAX_BLOCKS+1][2]; short int block_place_opt[MAX_BLOCKS+1][2]; short int board_str[MAX_CELLS]; #pragma HLS ARRAY_PARTITION variable=board_str cyclic factor=4 dim=1 PLACE_2_I: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_place_basis[i][0] = block_info[i][0][0]; block_place_basis[i][1] = block_info[i][0][1]; } #ifdef DEBUG_PRINT cout << "== Local placement with routing (2) ==" << endl; #endif int direction = 0; // 0: X, 1: Y int try_count = 1; int no_update = 0; if(hi > wi) direction = 1; PLACE_2_BODY: while(try_count <= TRY_LIMIT) { #pragma HLS LOOP_TRIPCOUNT min=10 max=50 #ifdef DEBUG_PRINT cout << "Phase " << try_count << ": direction: "; switch(direction) { case 0: cout << "X" << endl; break; case 1: cout << "Y" << endl; break; } cout << "Board size = " << wi << "X" << hi << endl; #endif // calculate slack for each block PLACE_2_CALC_SLACK: for(int t = 1; t <= blocks; t++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_place_slack[t][0] = 0; block_place_slack[t][1] = 0; PLACE_2_CALC_SLACK_I: for(int s = 0; s < MAX_CELLS; s++) { #pragma HLS UNROLL factor=8 board_str[s] = 0; } PLACE_2_CALC_SLACK_1: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 if(i == t) continue; short int x = block_place_basis[i][0]; short int y = block_place_basis[i][1]; PLACE_2_CALC_SLACK_1_1: for(int it = 1; it < 5; it++) { short int tx = block_info[i][it][0]; short int ty = block_info[i][it][1]; short int tn = block_info[i][it][2]; if(tn < 0) break; short int p = x + tx; short int q = y + ty; short int idx = q * MAX_SIZE + p; if(tn > 0) { board_str[idx] = tn; } else { board_str[idx] = -1; } } } bool move_x = true, move_y = true; PLACE_2_CALC_SLACK_2: // X direction while(direction == 0) { #pragma HLS LOOP_TRIPCOUNT min=0 max=10 short int x = block_place_basis[t][0]; short int y = block_place_basis[t][1]; PLACE_2_CALC_SLACK_2_1: for(int it = 1; it < 5; it++) { short int tx = block_info[t][it][0]; short int ty = block_info[t][it][1]; short int tn = block_info[t][it][2]; if(tn < 0) break; short int p = x + tx - (block_place_slack[t][0]+1); short int q = y + ty; short int idx = q * MAX_SIZE + p; if(p < 0 || board_str[idx] != 0) { move_x = false; break; } } if(move_x) { if(++block_place_slack[t][0] >= 9) break; } else { break; } } PLACE_2_CALC_SLACK_3: // Y direction while(direction == 1) { #pragma HLS LOOP_TRIPCOUNT min=0 max=10 short int x = block_place_basis[t][0]; short int y = block_place_basis[t][1]; PLACE_2_CALC_SLACK_3_1: for(int it = 1; it < 5; it++) { short int tx = block_info[t][it][0]; short int ty = block_info[t][it][1]; short int tn = block_info[t][it][2]; if(tn < 0) break; short int p = x + tx; short int q = y + ty - (block_place_slack[t][1]+1); short int idx = q * MAX_SIZE + p; if(q < 0 || board_str[idx] != 0) { move_y = false; break; } } if(move_y) { if(++block_place_slack[t][1] >= 9) break; } else { break; } } } no_update++; bool find_route = false; int d_sum_max = 0; PLACE_2_SEARCH: for(int counter = 0; counter < LOOP; counter++) { #ifdef DEBUG_PRINT cout << "."; fflush(stdout); #endif PLACE_2_SEARCH_INIT: for(int s = 0; s < MAX_CELLS; s++) { #pragma HLS UNROLL factor=8 board_str[s] = 0; } int d_sum = 0; PLACE_2_SEARCH_SET: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int x = block_place_basis[i][0]; short int y = block_place_basis[i][1]; short int dx = 0; short int dy = 0; switch(direction) { case 0: dx = lfsr_x_random() % (block_place_slack[i][0] + 1); break; case 1: dy = lfsr_y_random() % (block_place_slack[i][1] + 1); break; } if(lfsr_random() % 16 >= 2) { dx = 0; dy = 0; } block_place_delta[i][0] = dx; block_place_delta[i][1] = dy; d_sum += dx + dy; PLACE_2_SEARCH_SET_1: for(int it = 1; it < 5; it++) { short int tx = block_info[i][it][0]; short int ty = block_info[i][it][1]; short int tn = block_info[i][it][2]; if(tn < 0) break; short int p = x - dx + tx; short int q = y - dy + ty; short int idx = q * MAX_SIZE + p; if(tn > 0) { board_str[idx] = tn; } else{ board_str[idx] = -1; } } } int status = router(wi, hi, line_num, board_str); if(status >= 0 && d_sum > d_sum_max) { d_sum_max = d_sum; find_route = true; PLACE_2_SEARCH_OUT_1: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_place_opt[i][0] = block_place_basis[i][0] - block_place_delta[i][0]; block_place_opt[i][1] = block_place_basis[i][1] - block_place_delta[i][1]; } PLACE_2_SEARCH_OUT_2_Y: for(short int y = 0; y < hi; y++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 PLACE_2_SEARCH_OUT_2_X: for(short int x = 0; x < wi; x++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = y * MAX_SIZE + x; opt_result[idx] = board_str[idx]; } } } } #ifdef DEBUG_PRINT cout << endl; #endif if(find_route) { no_update = 0; // reset // Update PLACE_2_UPDATE_I: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_place_basis[i][0] = block_place_opt[i][0]; block_place_basis[i][1] = block_place_opt[i][1]; } /// print for debug /// #ifdef DEBUG_PRINT cout << "Max dSum: " << d_sum_max << endl; show_result(line_num, blocks, wi, hi, opt_result); #endif /// print for debug /// bool remove_flag; short int deleted_line_w = 0, deleted_line_h = 0; PLACE_2_UPDATE_Y: for(short int q = hi - 1; q >= 0; q--) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 remove_flag = true; PLACE_2_UPDATE_Y_X: for(short int p = 0; p < wi; p++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = q * MAX_SIZE + p; if(opt_result[idx] != 0) { remove_flag = false; break; } } if(!remove_flag) continue; PLACE_2_UPDATE_Y_B: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 if(block_place_basis[i][1] > q) { block_place_basis[i][1] -= 1; } } deleted_line_h++; } PLACE_2_UPDATE_X: for(short int p = wi - 1; p >= 0; p--) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 remove_flag = true; PLACE_2_UPDATE_X_Y: for(short int q = 0; q < hi; q++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = q * MAX_SIZE + p; if(opt_result[idx] != 0) { remove_flag = false; break; } } if(!remove_flag) continue; PLACE_2_UPDATE_X_B: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 if(block_place_basis[i][0] > p) { block_place_basis[i][0] -= 1; } } deleted_line_w++; } hi -= deleted_line_h; wi -= deleted_line_w; PLACE_2_REROUTE_INIT: for(int s = 0; s < MAX_CELLS; s++) { #pragma HLS UNROLL factor=8 board_str[s] = 0; } PLACE_2_REROUTE_SET: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int x = block_place_basis[i][0]; short int y = block_place_basis[i][1]; PLACE_2_REROUTE_SET_1: for(int it = 1; it < 5; it++) { short int tx = block_info[i][it][0]; short int ty = block_info[i][it][1]; short int tn = block_info[i][it][2]; if(tn < 0) break; short int p = x + tx; short int q = y + ty; short int idx = q * MAX_SIZE + p; if(tn > 0) { board_str[idx] = tn; } else { board_str[idx] = -1; } } } int counter = 0; PLACE_2_REROUTE_LOOP: while(router(wi, hi, line_num, board_str) < 0) { #pragma HLS LOOP_TRIPCOUNT min=1 max=5 if(counter++ >= LOOP) return false; } PLACE_2_REROUTE_OUT_Y: for(short int y = 0; y < hi; y++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 PLACE_2_REROUTE_OUT_X: for(short int x = 0; x < wi; x++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int idx = y * MAX_SIZE + x; opt_result[idx] = board_str[idx]; } } } // End update if(no_update >= NO_MOVE) break; try_count++; switch(direction) { case 0: direction = 1; break; case 1: direction = 0; break; } } #ifdef DEBUG_PRINT cout << "#Tries: " << try_count << endl; #endif PLACE_2_O_2: for(int i = 1; i <= blocks; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 block_info[i][0][0] = block_place_basis[i][0]; block_info[i][0][1] = block_place_basis[i][1]; } *pwi = wi; *phi = hi; return true; } unsigned short int new_weight(unsigned short int x) { #pragma HLS INLINE unsigned short int y = (x & 0x00FF) + 1; return y; } unsigned short int my_abs(short int x) { #pragma HLS INLINE if(x < 0) x *= -1; return x; } // DATA_BIT: bit length of cell idx // PRIO_BIT: bit length of cost(priority) int router(short int size_x, short int size_y, short int line_num, short int board_str[MAX_CELLS]) { if(line_num == 0) return 0; short int terminals[MAX_LINES][2]; short int paths_size[MAX_LINES]; unsigned short int paths[MAX_LINES][MAX_PATH]; bool adjacents[MAX_LINES]; unsigned short int weights[MAX_CELLS]; SET_LOOP_1: for(int i = 0; i < line_num; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=256 terminals[i][0] = -1; terminals[i][1] = -1; paths_size[i] = 0; adjacents[i] = false; } SET_LOOP_2_Y: for(short int y = 0; y < size_y; y++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 SET_LOOP_2_X: for(short int x = 0; x < size_x; x++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int j = y * MAX_SIZE + x; short int num = board_str[j]; switch(num) { case 0: weights[j] = 1; break; case -1: weights[j] = PRIO_MAX; break; default: weights[j] = PRIO_MAX; if(terminals[num-1][0] >= 0) { terminals[num-1][1] = j; } else { terminals[num-1][0] = j; } break; } } } #ifdef PRINT_BOARD cout << "== Print board ==" << endl; for(short int y = 0; y < size_y; y++) { for(short int x = 0; x < size_x; x++) { short int j = y * MAX_SIZE + x; short int num = board_str[j]; if(num == 0) { cout << "--"; } else if(num == -1) { cout << "XX"; } else{ cout << setfill('0') << setw(2) << hex << num; } if(x != size_x-1) { cout << " "; } } cout << endl; } cout << setfill(' '); cout << dec; #endif SET_LOOP_3: for(int i = 0; i < line_num; i++) { #pragma HLS UNROLL factor=2 #pragma HLS LOOP_TRIPCOUNT min=1 max=256 short int t1 = terminals[i][0]; short int t2 = terminals[i][1]; short int t1_y = t1 / MAX_SIZE; short int t1_x = t1 - MAX_SIZE * t1_y; short int t2_y = t2 / MAX_SIZE; short int t2_x = t2 - MAX_SIZE * t2_y; unsigned short int dist = my_abs(t1_x - t2_x) + my_abs(t1_y - t2_y); if(dist == 1) { adjacents[i] = true; #ifdef PRINT_BOARD cout << "Line #" << i + 1 << " needs no routing" << endl; #endif } } // Step 1 // ROUTING_1: for(int i = 0; i < line_num; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=256 if(adjacents[i]) continue; #ifdef PRINT_SEARCH cout << "Line #" << i + 1 << endl; #endif short int t1 = terminals[i][0]; short int t2 = terminals[i][1]; weights[t1] = 1; if(search(size_x, size_y, &paths_size[i], paths[i], t1, t2, weights) < 0) { return -1; } weights[t1] = PRIO_MAX; } bool has_overlap; short int overlap_checks[MAX_CELLS]; short int last_target = -1; // Step 2 // ROUTING_2: for(int round = 1; round <= ROUND_LIMIT; round++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=4096 short int target = lfsr_random() % line_num; if(adjacents[target]) continue; if(target == last_target) continue; last_target = target; unsigned short int round_weight = new_weight(round); #ifdef PRINT_SEARCH cout << "Line #" << target + 1 << "(round: " << round << ", weight: " << round_weight << ")" << endl; #endif short int t1 = terminals[target][0]; short int t2 = terminals[target][1]; WEIGHT_UPDATE_1: for(short int p = 0; p < paths_size[target]; p++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 weights[paths[target][p]] = 1; } WEIGHT_UPDATE_2: for(int i = 0; i < line_num; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=256 if(i == target) continue; WEIGHT_UPDATE_2_1: for(short int p = 0; p < paths_size[i]; p++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 weights[paths[i][p]] = round_weight; } } weights[t1] = 1; search(size_x, size_y, &paths_size[target], paths[target], t1, t2, weights); weights[t1] = PRIO_MAX; has_overlap = false; OVERLAP_CHECK_1: for(short int y = 0; y < size_y; y++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 OVERLAP_CHECK_1_1: for(short int x = 0; x < size_x; x++) { #pragma HLS LOOP_TRIPCOUNT min=10 max=128 short int j = y * MAX_SIZE + x; overlap_checks[j] = 0; } } OVERLAP_CHECK_2: for(int i = 0; i < line_num && !has_overlap; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=256 OVERLAP_CHECK_2_1: for(short int p = 0; p < paths_size[i]; p++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int cell_id = paths[i][p]; if(overlap_checks[cell_id] == 1) { has_overlap = true; break; } overlap_checks[cell_id] = 1; } } if(!has_overlap) break; } if(has_overlap) { // Cannot solve return -1; } int total_wire_length = 0; OUTPUT_1: for(short int i = 0; i < line_num; i++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=256 total_wire_length += paths_size[i]; OUTPUT_1_1: for(short int p = 0; p < paths_size[i]; p++) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 short int cell_id = paths[i][p]; board_str[cell_id] = i + 1; } } #ifdef PRINT_BOARD cout << "== Print answer ==" << endl; for(short int y = 0; y < size_y; y++) { for(short int x = 0; x < size_x; x++) { short int j = y * MAX_SIZE + x; short int num = board_str[j]; if(num == 0) { cout << "--"; } else if(num == -1) { cout << "XX"; } else{ cout << setfill('0') << setw(2) << hex << num; } if(x != size_x-1) { cout << " "; } } cout << endl; } cout << setfill(' '); cout << dec; #endif return total_wire_length; } int search(short int size_x, short int size_y, short int *path_size, unsigned short int path[MAX_PATH], unsigned short int start, unsigned short int goal, unsigned short int w[MAX_CELLS]) { unsigned short int dist[MAX_CELLS]; #pragma HLS ARRAY_PARTITION variable=dist cyclic factor=16 dim=1 unsigned short int prev[MAX_CELLS]; #pragma HLS ARRAY_PARTITION variable=prev cyclic factor=16 dim=1 SEARCH_INIT_DIST: for(int j = 0; j < MAX_CELLS; j++) { #pragma HLS UNROLL factor=32 dist[j] = PRIO_MAX; } ap_uint pq_len = 0; bool is_empty = true; unsigned int pq_nodes[MAX_PQ]; short int goal_y = goal / MAX_SIZE; short int goal_x = goal - MAX_SIZE * goal_y; dist[start] = 0; enqueue(pq_nodes, 0, start, &pq_len, &is_empty); bool find_path = false; SEARCH_BODY: while(!is_empty) { #pragma HLS LOOP_TRIPCOUNT min=1 max=1000 unsigned short int prev_cost; unsigned short int s; dequeue(pq_nodes, &prev_cost, &s, &pq_len, &is_empty); if(s == goal) { find_path = true; break; } unsigned short int dist_s = dist[s]; unsigned short int cost = w[s]; unsigned short int dist_d = dist_s + cost; short int s_y = s / MAX_SIZE; short int s_x = s - MAX_SIZE * s_y; SEARCH_ADJACENTS: for(int a = 0; a < 4; a++) { #pragma HLS UNROLL short int d_x = s_x; short int d_y = s_y; switch(a) { case 0: d_x -= 1; break; case 1: d_x += 1; break; case 2: d_y -= 1; break; case 3: d_y += 1; break; } if(d_x >= 0 && d_x < size_x && d_y >= 0 && d_y < size_y) { unsigned short int d = d_y * MAX_SIZE + d_x; if(w[d] == PRIO_MAX && d != goal) continue; if(dist_d < dist[d]) { dist[d] = dist_d; prev[d] = s; unsigned short int new_dist = dist_d + my_abs(d_x - goal_x) + my_abs(d_y - goal_y); enqueue(pq_nodes, new_dist, d, &pq_len, &is_empty); } } } } if(!find_path){ return -1; } unsigned short int t = prev[goal]; short int p = 0; #ifdef PRINT_SEARCH cout << "Path: "; #endif SEARCH_BACKTRACK: while(t != start) { #pragma HLS LOOP_TRIPCOUNT min=1 max=128 #ifdef PRINT_SEARCH cout << "(" << t % MAX_SIZE << ", " << t / MAX_SIZE << ")"; #endif path[p] = t; p++; t = prev[t]; } #ifdef PRINT_SEARCH cout << endl; #endif *path_size = p; return 0; } // Enqueue (Insert an element) void enqueue(unsigned int pq_nodes[MAX_PQ], unsigned short int queue_priority, unsigned short int queue_data, ap_uint *pq_len, bool *is_empty) { #pragma HLS INLINE (*pq_len)++; if ((*pq_len) == 0) { (*pq_len)--; } // Queue is full -> Last element is automatically removed // Note that last element is not always the lowest priority one. // ap_uint i = (*pq_len); ap_uint p = (*pq_len) >> 1; // parent node ENQUEUE_LOOP: while (i > 1 && (unsigned short int)(pq_nodes[p] >> DATA_BIT) >= queue_priority) { #pragma HLS LOOP_TRIPCOUNT min=1 max=12 pq_nodes[i] = pq_nodes[p]; i = p; p = p >> 1; // parent node } pq_nodes[i] = ((unsigned int)queue_priority << DATA_BIT) | (unsigned int)queue_data; *is_empty = false; } // Dequeue (Extract and remove the top element) void dequeue(unsigned int pq_nodes[MAX_PQ], unsigned short int *ret_priority, unsigned short int *ret_data, ap_uint *pq_len, bool *is_empty) { #pragma HLS INLINE *ret_priority = (unsigned short int)(pq_nodes[1] >> DATA_BIT); *ret_data = (unsigned short int)(pq_nodes[1] & DATA_MASK); ap_uint i = 1; // root node unsigned short int last_priority = (unsigned short int)(pq_nodes[*pq_len] >> DATA_BIT); // Priority of last element DEQUEUE_LOOP: while (!(i >> (PQ_BIT-1))) { #pragma HLS LOOP_TRIPCOUNT min=1 max=12 ap_uint c1 = i << 1; // child node (L) ap_uint c2 = c1 + 1; // child node (R) if (c1 < *pq_len && (unsigned short int)(pq_nodes[c1] >> DATA_BIT) <= last_priority) { if (c2 < *pq_len && (unsigned short int)(pq_nodes[c2] >> DATA_BIT) <= (unsigned short int)(pq_nodes[c1] >> DATA_BIT)) { pq_nodes[i] = pq_nodes[c2]; i = c2; } else { pq_nodes[i] = pq_nodes[c1]; i = c1; } } else { if (c2 < *pq_len && (unsigned short int)(pq_nodes[c2] >> DATA_BIT) <= last_priority) { pq_nodes[i] = pq_nodes[c2]; i = c2; } else { break; } } } pq_nodes[i] = pq_nodes[*pq_len]; (*pq_len)--; if ((*pq_len) == 0) { *is_empty = true; } }