#include "solver.hpp"

static uint32_t lfsr, lfsr_x, lfsr_y;

int solver(uint32_t seed[3], short int W, short int H, short int blocks, short int line_num, short int block_info[][5][3], short int line_info[][2][5], short int *pwi, short int *phi, short int opt_result[]) {
	
	lfsr_random_init(seed[0], seed[1], seed[2]);
	
	// block positions on slot matrix
	pair<short int,short int> block_place_global[MAX_BLOCKS+1];
	
	while(1) {
		global_placement(W, H, line_num, blocks, block_info, line_info, block_place_global);
		if(local_placement_with_routing_1(line_num, blocks, pwi, phi, block_info, block_place_global, opt_result)) break;
	}
	bool result = local_placement_with_routing_2(line_num, blocks, pwi, phi, block_info, opt_result);
	
	if(result) return 1;
	return 0;
}

void lfsr_random_init(uint32_t seed, uint32_t seed_x, uint32_t seed_y) {
	lfsr = seed;
	lfsr_x = seed_x;
	lfsr_y = seed_y;
}

uint32_t lfsr_random() {
	bool b_32 = false, b_22 = false, b_2 = false, b_1 = false;
	if((lfsr >>  0) % 2 == 1) b_32 = true;
	if((lfsr >> 10) % 2 == 1) b_22 = true;
	if((lfsr >> 30) % 2 == 1) b_2 = true;
	if((lfsr >> 31) % 2 == 1) b_1 = true;
	bool new_bit = b_32 ^ b_22 ^ b_2 ^ b_1;
	lfsr >>= 1;
	if(new_bit) lfsr |= 0x80000000;
	return lfsr;
}
uint32_t lfsr_x_random() {
	bool b_32 = false, b_22 = false, b_2 = false, b_1 = false;
	if((lfsr_x >>  0) % 2 == 1) b_32 = true;
	if((lfsr_x >> 10) % 2 == 1) b_22 = true;
	if((lfsr_x >> 30) % 2 == 1) b_2 = true;
	if((lfsr_x >> 31) % 2 == 1) b_1 = true;
	bool new_bit = b_32 ^ b_22 ^ b_2 ^ b_1;
	lfsr_x >>= 1;
	if(new_bit) lfsr_x |= 0x80000000;
	return lfsr_x;
}
uint32_t lfsr_y_random() {
	bool b_32 = false, b_22 = false, b_2 = false, b_1 = false;
	if((lfsr_y >>  0) % 2 == 1) b_32 = true;
	if((lfsr_y >> 10) % 2 == 1) b_22 = true;
	if((lfsr_y >> 30) % 2 == 1) b_2 = true;
	if((lfsr_y >> 31) % 2 == 1) b_1 = true;
	bool new_bit = b_32 ^ b_22 ^ b_2 ^ b_1;
	lfsr_y >>= 1;
	if(new_bit) lfsr_y |= 0x80000000;
	return lfsr_y;
}

float calcPlaceCost(short int line_num, short int line_info[][2][5], pair<short int,short int> block_place_global[]) {
	
	short int block1, block2; // block idx
	short int x1, y1; // 1st block
	short int x2, y2; // 2nd block
	short int delta_x, delta_y;
	// delta_x > 0: 1st block is located in the East of 2nd block
	// delta_x < 0: 1st block is located in the West of 2nd block
	// delta_y > 0: 1st block is located in the South of 2nd block
	// delta_y < 0: 1st block is located in the North of 2nd block
	short int dist_x, dist_y; // distance between 2 blocks
	float dir_x1, dir_x2; // X-direction from 1st block to 2nd block and ...
	float dir_y1, dir_y2; // Y-direction from 1st block to 2nd block and ...
	
	float cost = 0.0;
	for(int l = 1; l <= line_num; l++) { // Calc cost
		block1 = line_info[l][0][0]; // 1st block including line#L
		block2 = line_info[l][1][0]; // 2nd block including line#L
		x1 = block_place_global[block1].first;
		y1 = block_place_global[block1].second;
		x2 = block_place_global[block2].first;
		y2 = block_place_global[block2].second;
		delta_x = x1 - x2;
		delta_y = y1 - y2;
		dist_x = abs(delta_x);
		dist_y = abs(delta_y);
		
		dir_x1 = 0.0;
		dir_x2 = 0.0;
		if(delta_x > 0) {
			dir_x1 = (float)line_info[l][0][1] / 2;
			dir_x2 = (float)line_info[l][1][3] / 2;
		}
		else if(delta_x < 0) {
			dir_x1 = (float)line_info[l][0][3] / 2;
			dir_x2 = (float)line_info[l][1][1] / 2;
		}
		dir_y1 = 0.0;
		dir_y2 = 0.0;
		if(delta_y > 0) {
			dir_y1 = (float)line_info[l][0][2] / 2;
			dir_y2 = (float)line_info[l][1][4] / 2;
		}
		else if(delta_y < 0) {
			dir_y1 = (float)line_info[l][0][4] / 2;
			dir_y2 = (float)line_info[l][1][2] / 2;
		}
		cost += dist_x * dir_x1 * dir_x2 + dist_y * dir_y1 * dir_y2;
	}
	
	return cost;
}

void global_placement(short int W, short int H, short int line_num, short int blocks, short int block_info[][5][3], short int line_info[][2][5], pair<short int,short int> block_place_global[]) {
	
	cout << "== Global placement ==" << endl;
	short int sx_sum = 0, sy_sum = 0;
	for(int i = 1; i <= blocks; i++) {
		sx_sum += block_info[i][0][0]; // block size (x)
		sy_sum += block_info[i][0][1]; // block size (y)
	}
	// define #slots
	short int max_x = W * blocks / sx_sum - 1;
	short int max_y = H * blocks / sy_sum - 1;
	while(((max_x + 1) * (max_y + 1)) < blocks) {
		max_x += 1;
		max_y += 1;
	}
	cout << "#Slots for X-axis: " << max_x + 1 << endl;
	cout << "#Slots for Y-axis: " << max_y + 1 << endl;
	
	// init. placement on global slot
	short int global_slot[SLOT_MAX_SIZE][SLOT_MAX_SIZE];
	short int i = 1;
	for(short int q = 0; q <= max_y; q++) {
		for(short int p = 0; p <= max_x; p++) {
			if(i <= blocks) {
				block_place_global[i] = make_pair(p, q);
				global_slot[q][p] = i;
				//cout << "Block#" << i << ": (" << block_place_global[i].first << ", " << block_place_global[i].second << ")" << endl;
				i++;
			}
			else {
				global_slot[q][p] = 0;
			}
		}
	}
	
	// cost calculation (may be inefficient)
	float cost = calcPlaceCost(line_num, line_info, block_place_global);
	cout << "Cost = " << cost << " -> ";
	
	// SA setup
	float temperature = TEMP_S;
	float delta = powf(((float)(TEMP_E)/(float)(TEMP_S)), (1./(float)(SA_O)));
	//cout << "delta = " << delta << endl;
	
	// SA
	for(int counter = 0; counter < SA_O; counter++) {
		temperature *= delta; // Update temperature
		
		// trgt_i, trgt_j: moved block idx (trgt_j == 0, no block is placed on slot(j_x,j_y))
		// i_x, i_y, j_x, j_y
		for(int counter_in = 0; counter_in < SA_I; counter_in++) {
			short int trgt_i = lfsr_random() % blocks + 1;
			short int i_x = block_place_global[trgt_i].first;
			short int i_y = block_place_global[trgt_i].second;
			short int j_x = lfsr_x_random() % SLOT_MAX_SIZE;
			short int j_y = lfsr_y_random() % SLOT_MAX_SIZE;
			if(j_x > max_x || j_y > max_y) continue;
			short int trgt_j = global_slot[j_y][j_x]; // slot -> block
		
			block_place_global[trgt_i].first = j_x;
			block_place_global[trgt_i].second = j_y;
			global_slot[j_y][j_x] = trgt_i;
			block_place_global[trgt_j].first = i_x;
			block_place_global[trgt_j].second = i_y;
			global_slot[i_y][i_x] = trgt_j;
		
			float new_cost = calcPlaceCost(line_num, line_info, block_place_global);
			
			if(new_cost > cost && (float)lfsr_random()/LFSR_RAND_MAX > expf(-(new_cost-cost)/temperature)) {
				block_place_global[trgt_i].first = i_x;
				block_place_global[trgt_i].second = i_y;
				global_slot[i_y][i_x] = trgt_i;
				block_place_global[trgt_j].first = j_x;
				block_place_global[trgt_j].second = j_y;
				global_slot[j_y][j_x] = trgt_j;
			}
			else if(new_cost != cost) {
				cost = new_cost;
			}
		}
	}
	
	//for(i = 1; i <= blocks; i++) { // final placement on global slot
	//	cout << "Block#" << i << ": (" << block_place_global[i].first << ", " << block_place_global[i].second << ")" << endl;
	//}
	cout << cost << endl;
	
	//for(q = 0; q <= max_y; q++) {
	//	for(p = 0; p <= max_x; p++) {
	//		cout << global_slot[q][p] << " ";
	//	}
	//	cout << endl;
	//}
}

pair<short int,short int> add_pair(pair<short int,short int> x, pair<short int,short int> y) {
	return make_pair(x.first+y.first, x.second+y.second);
}

pair<short int,short int> sub_pair(pair<short int,short int> x, pair<short int,short int> y) {
	return make_pair(x.first-y.first, x.second-y.second);
}

bool local_placement_with_routing_1(short int line_num, short int blocks, short int *pwi, short int *phi, short int block_info[][5][3], pair<short int,short int> block_place_global[], short int opt_result[]) {
	
	short int wi, hi; // board size (w, h)
	
	// size of each slot (w, h)
	short int slot_w[SLOT_MAX_SIZE], slot_h[SLOT_MAX_SIZE];
	// basic position of each slot (x, y)
	short int slot_w_t[SLOT_MAX_SIZE], slot_h_t[SLOT_MAX_SIZE];
	
	short int block_place_slack[MAX_BLOCKS+1][2];
	pair<short int,short int> block_place_basis[MAX_BLOCKS+1];
	pair<short int,short int> block_place_delta[MAX_BLOCKS+1];
	pair<short int,short int> block_place_opt[MAX_BLOCKS+1];
	
	short int board_str[MAX_CELLS];
	
	// load a placement result
	short int min_x = SLOT_MAX_SIZE, min_y = SLOT_MAX_SIZE, max_x = 0, max_y = 0;
	for(int i = 1; i <= blocks; i++) {
		short int p = block_place_global[i].first;
		short int q = block_place_global[i].second;
		if(p < min_x) { min_x = p; }
		if(q < min_y) { min_y = q; }
		if(p > max_x) { max_x = p; }
		if(q > max_y) { max_y = q; }
	}
	for(short int p = min_x; p <= max_x; p++) {
		slot_w[p] = 0;
	}
	for(short int q = min_y; q <= max_y; q++) {
		slot_h[q] = 0;
	}
	for(int i = 1; i <= blocks; i++) {
		short int sx = block_info[i][0][0];
		short int sy = block_info[i][0][1];
		short int p = block_place_global[i].first;
		short int q = block_place_global[i].second;
		if(sx > slot_w[p]) { slot_w[p] = sx; }
		if(sy > slot_h[q]) { slot_h[q] = sy; }
	}
	wi = INTER_BLOCK_MARGIN;
	for(short int p = min_x; p <= max_x; p++) {
		slot_w_t[p] = wi;
		wi += slot_w[p] + INTER_BLOCK_MARGIN;
	}
	hi = INTER_BLOCK_MARGIN;
	for(short int q = min_y; q <= max_y; q++) {
		slot_h_t[q] = hi;
		hi += slot_h[q] + INTER_BLOCK_MARGIN;
	}
	
	cout << "== Local placement with routing (1) ==" << endl;
	cout << "Board size = " << wi << "X" << hi << endl;
	
	// calculate slack for each block
	for(int i = 1; i <= blocks; i++) {
		short int sx = block_info[i][0][0];
		short int sy = block_info[i][0][1];
		short int p = block_place_global[i].first;
		short int q = block_place_global[i].second;
		block_place_basis[i] = make_pair(slot_w_t[p], slot_h_t[q]);
		block_place_slack[i][0] = slot_w[p] - sx;
		block_place_slack[i][1] = slot_h[q] - sy;
		//cout << "Block#" << i << ": (" << block_place_basis[i].first << "," << block_place_basis[i].second << ")" << endl;
		//cout << "Slack(x)=" << block_place_slack[i][0] << ", Slack(y)=" << block_place_slack[i][1] << endl;
	}
	
	bool find_route = false;
	int min_status = wi * hi + 1;
	for(int counter = 0; counter < LOOP; counter++) {
		cout << "."; fflush(stdout);
		for(int s = 0; s < MAX_CELLS; s++) {
			board_str[s] = 0;
		}
		for(int i = 1; i <= blocks; i++) {
			short int x = block_place_basis[i].first;
			short int y = block_place_basis[i].second;
			short int dx = lfsr_x_random() % (block_place_slack[i][0] + 1);
			short int dy = lfsr_y_random() % (block_place_slack[i][1] + 1);
			block_place_delta[i] = make_pair(dx, dy);
			for(int it = 1; it < 5; it++) {
				short int tx = block_info[i][it][0];
				short int ty = block_info[i][it][1];
				short int tn = block_info[i][it][2];
				if(tn < 0) break;
				short int p = x + dx + tx;
				short int q = y + dy + ty;
				short int idx = q * MAX_SIZE + p;
				if(tn > 0) { board_str[idx] = tn; }
				else { board_str[idx] = -1; }
			}
		}
		
		int status = router(wi, hi, line_num, board_str); // routing
		
		if(status >= 0 && status < min_status) {
			min_status = status;
			find_route = true;
			for(int i = 1; i <= blocks; i++) {
				block_place_opt[i] = add_pair(block_place_basis[i], block_place_delta[i]);
			}
			for(int s = 0; s < MAX_CELLS; s++) {
				opt_result[s] = board_str[s];
			}
		}
	}
	cout << endl;
	
	if(!find_route) return false;
	
	// Update
	for(int i = 1; i <= blocks; i++) {
		block_place_basis[i] = block_place_opt[i];
	}
	
	/// print for debug ///
	cout << "Min-wire status: " << min_status << endl;
	show_result(line_num, blocks, wi, hi, opt_result, block_place_basis);
	/// print for debug ///
	
	//cout << "== Local placement with routing (1+) ==" << endl;
	bool remove_flag;
	short int deleted_line_w = 0, deleted_line_h = 0;
	for(short int q = hi - 1; q >= 0; q--) {
		remove_flag = true;
		for(short int p = 0; p < wi; p++) {
			short int idx = q * MAX_SIZE + p;
			if(opt_result[idx] != 0) {
				remove_flag = false;
				break;
			}
		}
		if(remove_flag) {
			for(int i = 1; i <= blocks; i++) {
				if(block_place_basis[i].second > q) { block_place_basis[i].second -= 1; }
			}
			deleted_line_h++;
		}
	}
	for(short int p = wi - 1; p >= 0; p--) {
		remove_flag = true;
		for(short int q = 0; q < hi; q++) {
			short int idx = q * MAX_SIZE + p;
			if(opt_result[idx] != 0) {
				remove_flag = false;
				break;
			}
		}
		if(remove_flag) {
			for(int i = 1; i <= blocks; i++) {
				if(block_place_basis[i].first > p) { block_place_basis[i].first -= 1; }
			}
			deleted_line_w++;
		}
	}
	hi -= deleted_line_h;
	wi -= deleted_line_w;
	
	for(int s = 0; s < MAX_CELLS; s++) {
		board_str[s] = 0;
	}
	for(int i = 1; i <= blocks; i++) {
		short int x = block_place_basis[i].first;
		short int y = block_place_basis[i].second;
		for(int it = 1; it < 5; it++) {
			short int tx = block_info[i][it][0];
			short int ty = block_info[i][it][1];
			short int tn = block_info[i][it][2];
			if(tn < 0) break;
			short int p = x + tx;
			short int q = y + ty;
			short int idx = q * MAX_SIZE + p;
			if(tn > 0) { board_str[idx] = tn; }
			else { board_str[idx] = -1; }
		}
	}
	
	int counter = 0;
	while(router(wi, hi, line_num, board_str) < 0) {
		if(counter++ >= LOOP) return false;
	}
	for(int s = 0; s < MAX_CELLS; s++) {
		opt_result[s] = board_str[s];
	}
	
	for(int i = 1; i <= blocks; i++) {
		block_info[i][0][0] = block_place_basis[i].first;
		block_info[i][0][1] = block_place_basis[i].second;
	}
	*pwi = wi;
	*phi = hi;
	
	return true;
}

bool local_placement_with_routing_2(short int line_num, short int blocks, short int *pwi, short int *phi, short int block_info[][5][3], short int opt_result[]) {
	
	short int wi = *pwi, hi = *phi;
	
	short int block_place_slack[MAX_BLOCKS+1][2];
	pair<short int,short int> block_place_basis[MAX_BLOCKS+1];
	pair<short int,short int> block_place_delta[MAX_BLOCKS+1];
	pair<short int,short int> block_place_opt[MAX_BLOCKS+1];
	
	short int board_str[MAX_CELLS];
	
	for(int i = 1; i <= blocks; i++) {
		block_place_basis[i] = make_pair(block_info[i][0][0], block_info[i][0][1]);
	}
	
	cout << "== Local placement with routing (2) ==" << endl;
	int direction = 0; // 0: X, 1: Y
	int try_count = 1;
	int no_update = 0;
	
	if(hi > wi) direction = 1;
	while(try_count <= TRY_LIMIT) {
		cout << "Phase " << try_count << ": direction: ";
		switch(direction) {
			case 0: cout << "X" << endl; break;
			case 1: cout << "Y" << endl; break;
		}
		cout << "Board size = " << wi << "X" << hi << endl;
		
		// calculate slack for each block
		for(int t = 1; t <= blocks; t++) {
			block_place_slack[t][0] = 0;
			block_place_slack[t][1] = 0;
			for(int s = 0; s < MAX_CELLS; s++) {
				board_str[s] = 0;
			}
			for(int i = 1; i <= blocks; i++) {
				if(i == t) continue;
				short int x = block_place_basis[i].first;
				short int y = block_place_basis[i].second;
				for(int it = 1; it < 5; it++) {
					short int tx = block_info[i][it][0];
					short int ty = block_info[i][it][1];
					short int tn = block_info[i][it][2];
					if(tn < 0) break;
					short int p = x + tx;
					short int q = y + ty;
					short int idx = q * MAX_SIZE + p;
					if(tn > 0) { board_str[idx] = tn; }
					else { board_str[idx] = -1; }
				}
			}
			bool move_x = true, move_y = true;
			while(1) { // X direction
				short int x = block_place_basis[t].first;
				short int y = block_place_basis[t].second;
				for(int it = 1; it < 5; it++) {
					short int tx = block_info[t][it][0];
					short int ty = block_info[t][it][1];
					short int tn = block_info[t][it][2];
					if(tn < 0) break;
					short int p = x + tx - (block_place_slack[t][0]+1);
					short int q = y + ty;
					short int idx = q * MAX_SIZE + p;
					if(p < 0 || board_str[idx] != 0) {
						move_x = false;
						break;
					}
				}
				if(move_x) {
					if(++block_place_slack[t][0] >= 9) break;
				}
				else {
					break;
				}
			}
			while(1) { // Y direction
				short int x = block_place_basis[t].first;
				short int y = block_place_basis[t].second;
				for(int it = 1; it < 5; it++) {
					short int tx = block_info[t][it][0];
					short int ty = block_info[t][it][1];
					short int tn = block_info[t][it][2];
					if(tn < 0) break;
					short int p = x + tx;
					short int q = y + ty - (block_place_slack[t][1]+1);
					short int idx = q * MAX_SIZE + p;
					if(q < 0 || board_str[idx] != 0) {
						move_y = false;
						break;
					}
				}
				if(move_y) {
					if(++block_place_slack[t][1] >= 9) break;
				}
				else {
					break;
				}
			}
			//cout << "Block#" << t << ": (" << block_place_basis[t].first << "," << block_place_basis[t].second << ")" << endl;
			//cout << "Slack(x)=" << block_place_slack[t][0] << ", Slack(y)=" << block_place_slack[t][1] << endl;
		}
		
		no_update++;
		bool find_route = false;
		int d_sum_max = 0;
		for(int counter = 0; counter < LOOP; counter++) {
			cout << "."; fflush(stdout);
			for(int s = 0; s < MAX_CELLS; s++) {
				board_str[s] = 0;
			}
			int d_sum = 0;
			for(int i = 1; i <= blocks; i++) {
				short int x = block_place_basis[i].first;
				short int y = block_place_basis[i].second;
				short int dx = 0;
				short int dy = 0;
				switch(direction) {
					case 0: dx = lfsr_x_random() % (block_place_slack[i][0] + 1); break;
					case 1: dy = lfsr_y_random() % (block_place_slack[i][1] + 1); break;
				}
				if(lfsr_random() % 16 >= 2) {
					dx = 0;
					dy = 0;
				}
				block_place_delta[i] = make_pair(dx, dy);
				d_sum += dx + dy;
				for(int it = 1; it < 5; it++) {
					short int tx = block_info[i][it][0];
					short int ty = block_info[i][it][1];
					short int tn = block_info[i][it][2];
					if(tn < 0) break;
					short int p = x - dx + tx;
					short int q = y - dy + ty;
					short int idx = q * MAX_SIZE + p;
					if(tn > 0) { board_str[idx] = tn; }
					else{ board_str[idx] = -1; }
				}
			}
			
			int status = router(wi, hi, line_num, board_str);
			
			if(status >= 0 && d_sum > d_sum_max) {
				d_sum_max = d_sum;
				find_route = true;
				for(int i = 1; i <= blocks; i++) {
					block_place_opt[i] = sub_pair(block_place_basis[i], block_place_delta[i]);
				}
				for(int s = 0; s < MAX_CELLS; s++) {
					opt_result[s] = board_str[s];
				}
			}
		}
		cout << endl;
		
		if(find_route) {
			
			no_update = 0; // reset
			
			// Update
			for(int i = 1; i <= blocks; i++) {
				block_place_basis[i] = block_place_opt[i];
			}
			
			/// print for debug ///
			cout << "Max dSum: " << d_sum_max << endl;
			show_result(line_num, blocks, wi, hi, opt_result, block_place_basis);
			/// print for debug ///
			
			bool remove_flag;
			short int deleted_line_w = 0, deleted_line_h = 0;
			for(short int q = hi - 1; q >= 0; q--) {
				remove_flag = true;
				for(short int p = 0; p < wi; p++) {
					short int idx = q * MAX_SIZE + p;
					if(opt_result[idx] != 0) {
						remove_flag = false;
						break;
					}
				}
				if(remove_flag) {
					for(int i = 1; i <= blocks; i++) {
						if(block_place_basis[i].second > q) { block_place_basis[i].second -= 1; }
					}
					deleted_line_h++;
				}
			}
			for(short int p = wi - 1; p >= 0; p--) {
				remove_flag = true;
				for(short int q = 0; q < hi; q++) {
					short int idx = q * MAX_SIZE + p;
					if(opt_result[idx] != 0) {
						remove_flag = false;
						break;
					}
				}
				if(remove_flag) {
					for(int i = 1; i <= blocks; i++) {
						if(block_place_basis[i].first > p) { block_place_basis[i].first -= 1; }
					}
					deleted_line_w++;
				}
			}
			hi -= deleted_line_h;
			wi -= deleted_line_w;
			
			for(int s = 0; s < MAX_CELLS; s++) {
				board_str[s] = 0;
			}
			for(int i = 1; i <= blocks; i++) {
				short int x = block_place_basis[i].first;
				short int y = block_place_basis[i].second;
				for(int it = 1; it < 5; it++) {
					short int tx = block_info[i][it][0];
					short int ty = block_info[i][it][1];
					short int tn = block_info[i][it][2];
					if(tn < 0) break;
					short int p = x + tx;
					short int q = y + ty;
					short int idx = q * MAX_SIZE + p;
					if(tn > 0) { board_str[idx] = tn; }
					else { board_str[idx] = -1; }
				}
			}
			
			int counter = 0;
			while(router(wi, hi, line_num, board_str) < 0) {
				if(counter++ >= LOOP) return false;
			}
			for(int s = 0; s < MAX_CELLS; s++) {
				opt_result[s] = board_str[s];
			}
		}
		
		if(no_update >= NO_MOVE) break;
		
		try_count++;
		switch(direction) {
			case 0: direction = 1; break;
			case 1: direction = 0; break;
		}
	}
	cout << "#Tries: " << try_count << endl;
	*pwi = wi;
	*phi = hi;
	for(int i = 1; i <= blocks; i++) {
		block_info[i][0][0] = block_place_basis[i].first;
		block_info[i][0][1] = block_place_basis[i].second;
	}
	
	return true;
}







unsigned short int new_weight(unsigned short int x) {
	unsigned short int y = (x & 0x00FF) + 1;
	return y;
}

// DATA_BIT: bit length of cell idx
// PRIO_BIT: bit length of cost(priority)
int router(short int size_x, short int size_y, short int line_num, short int board_str[MAX_CELLS]) {
	
	if(line_num == 0) return 0;
	
	short int terminals[MAX_LINES][2];
	short int paths_size[MAX_LINES];
	unsigned short int paths[MAX_LINES][MAX_PATH];
	bool adjacents[MAX_LINES];
	
	unsigned short int weights[MAX_CELLS];
	
	SET_LOOP_1:
	for(int i = 0; i < line_num; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=256
		terminals[i][0] = -1;
		terminals[i][1] = -1;
		paths_size[i] = 0;
		adjacents[i] = false;
	}
	SET_LOOP_2:
	for(short int j = 0; j < MAX_CELLS; j++) {
		short int y = j / MAX_SIZE;
		short int x = j - MAX_SIZE * y;
		if(x >= size_x || y >= size_y) continue;
		short int num = board_str[j];
		if(num > 0) {
			if(terminals[num-1][0] < 0) { terminals[num-1][0] = j; }
			else if(terminals[num-1][1] < 0) { terminals[num-1][1] = j; }
			else { return -2; }
			weights[j] = PRIO_MAX;
		}
		else if(num < 0) {
			weights[j] = PRIO_MAX;
		}
		else {
			weights[j] = 1;
		}
	}
	
#ifdef PRINT_BOARD
	cout << "== Print board ==" << endl;
	for(short int y = 0; y < size_y; y++) {
		for(short int x = 0; x < size_x; x++) {
			short int j = y * MAX_SIZE + x;
			short int num = board_str[j];
			if(num == 0) { cout << "--"; }
			else if(num == -1) { cout << "XX"; }
			else{ cout << setfill('0') << setw(2) << hex << num; }
			if(x != size_x-1) { cout << " "; }
		}
		cout << endl;
	}
	cout << setfill(' ');
	cout << dec;
#endif
	
	SET_LOOP_3:
	for(int i = 0; i < line_num; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=256
		short int t1 = terminals[i][0];
		short int t2 = terminals[i][1];
		short int t1_y = t1 / MAX_SIZE;
		short int t1_x = t1 - MAX_SIZE * t1_y;
		short int t2_y = t2 / MAX_SIZE;
		short int t2_x = t2 - MAX_SIZE * t2_y;
		short int dist = abs(t1_x - t2_x) + abs(t1_y - t2_y);
		if(dist == 1) {
			adjacents[i] = true;
#ifdef PRINT_BOARD
			cout << "Line #" << i + 1 << " needs no routing" << endl;
#endif
		}
	}
	
	// Step 1 //
	ROUTING_1:
	for(int i = 0; i < line_num; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=256
		if(adjacents[i]) continue;
#ifdef PRINT_SEARCH
		cout << "Line #" << i + 1 << endl;
#endif
		short int t1 = terminals[i][0];
		short int t2 = terminals[i][1];
		weights[t1] = 1;
		if(search(size_x, size_y, &paths_size[i], paths[i], t1, t2, weights) < 0) { return -1; }
		weights[t1] = PRIO_MAX;
	}
	
	bool has_overlap;
	short int overlap_checks[MAX_CELLS];
	short int last_target = -1;
	
	// Step 2 //
	ROUTING_2:
	for(int round = 1; round <= ROUND_LIMIT; round++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=4096
		short int target = lfsr_random() % line_num;
		if(adjacents[target]) continue;
		if(target == last_target) continue;
		last_target = target;
		unsigned short int round_weight = new_weight(round);

#ifdef PRINT_SEARCH		
		cout << "Line #" << target + 1 << "(round: " << round << ", weight: " << round_weight << ")" << endl;
#endif
		
		short int t1 = terminals[target][0];
		short int t2 = terminals[target][1];
		
		WEIGHT_UPDATE_1:
		for(short int p = 0; p < paths_size[target]; p++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=128
			weights[paths[target][p]] = 1;
		}
		WEIGHT_UPDATE_2:
		for(int i = 0; i < line_num; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=256
			if(i == target) continue;
			WEIGHT_UPDATE_2_1:
			for(short int p = 0; p < paths_size[i]; p++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=128
				weights[paths[i][p]] = round_weight;
			}
		}
		weights[t1] = 1;
		search(size_x, size_y, &paths_size[target], paths[target], t1, t2, weights);
		weights[t1] = PRIO_MAX;
		
		has_overlap = false;
		OVERLAP_CHECK_1:
		for(short int j = 0; j < MAX_CELLS; j++) {
			short int y = j / MAX_SIZE;
			short int x = j - MAX_SIZE * y;
			if(x >= size_x || y >= size_y) continue;
			short int num = board_str[j];
			if(num > 0 || num < 0) { overlap_checks[j] = 1; }
			else { overlap_checks[j] = 0; }
		}
		OVERLAP_CHECK_2:
		for(int i = 0; i < line_num; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=256
			OVERLAP_CHECK_2_1:
			for(short int p = 0; p < paths_size[i]; p++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=128
				short int cell_id = paths[i][p];
				if(overlap_checks[cell_id] == 1) {
					has_overlap = true;
					break;
				}
				overlap_checks[cell_id] = 1;
			}
			if(has_overlap) break;
		}
		if(!has_overlap) break;
	}
	
	if(has_overlap) { // Cannot solve
		return -1;
	}
	
	int total_wire_length = 0;
	OUTPUT_1:
	for(short int i = 0; i < line_num; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=256
		total_wire_length += paths_size[i];
		OUTPUT_1_1:
		for(short int p = 0; p < paths_size[i]; p++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=128
			short int cell_id = paths[i][p];
			board_str[cell_id] = i + 1;
		}
	}
	
#ifdef PRINT_BOARD
	cout << "== Print answer ==" << endl;
	for(short int y = 0; y < size_y; y++) {
		for(short int x = 0; x < size_x; x++) {
			short int j = y * MAX_SIZE + x;
			short int num = board_str[j];
			if(num == 0) { cout << "--"; }
			else if(num == -1) { cout << "XX"; }
			else{ cout << setfill('0') << setw(2) << hex << num; }
			if(x != size_x-1) { cout << " "; }
		}
		cout << endl;
	}
	cout << setfill(' ');
	cout << dec;
#endif
	
	return total_wire_length;
}

int search(short int size_x, short int size_y, short int *path_size, unsigned short int path[MAX_PATH], unsigned short int start, unsigned short int goal, unsigned short int w[MAX_CELLS]) {
	
	unsigned short int dist[MAX_CELLS];
	unsigned short int prev[MAX_CELLS];
	
	SEARCH_INIT_DIST:
	for(int j = 0; j < MAX_CELLS; j++) {
//#pragma HLS UNROLL factor=2
		dist[j] = PRIO_MAX;
	}
	
	unsigned short int pq_len = 0;
    bool is_empty = true;
    unsigned int pq_nodes[MAX_PQ];
	
	short int goal_y = goal / MAX_SIZE;
	short int goal_x = goal - MAX_SIZE * goal_y;
	
	dist[start] = 0;
	enqueue(pq_nodes, 0, start, &pq_len, &is_empty);
	
	bool find_path = false;
	SEARCH_BODY:
	while(!is_empty) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=1000
		
		unsigned short int prev_cost;
		unsigned short int s;
		dequeue(pq_nodes, &prev_cost, &s, &pq_len, &is_empty);
		
		unsigned short int dist_s = dist[s];
		
		if(s == goal) {
			find_path = true;
			break;
		}
		
		unsigned short int cost = w[s];
		short int s_y = s / MAX_SIZE;
		short int s_x = s - MAX_SIZE * s_y;
		
		SEARCH_ADJACENTS:
		for(int a = 0; a < 4; a++) {
			short int d_x = s_x;
			short int d_y = s_y;
			switch(a) {
				case 0: d_x -= 1; break;
				case 1: d_x += 1; break;
				case 2: d_y -= 1; break;
				case 3: d_y += 1; break;
			}
			if(d_x >= 0 && d_x < size_x && d_y >= 0 && d_y < size_y) {
				unsigned short int d = d_y * MAX_SIZE + d_x;
				if(w[d] == PRIO_MAX && d != goal) continue;
				
				unsigned short int dist_d = dist_s + cost;
				if(dist_d < dist[d]) {
					dist[d] = dist_d;
					prev[d] = s;
					dist_d += abs(d_x - goal_x) + abs(d_y - goal_y);
					enqueue(pq_nodes, dist_d, d, &pq_len, &is_empty);
				}
			}
		}
	}
	if(!find_path){ return -1; }
	
	unsigned short int t = prev[goal];
	short int p = 0;
#ifdef PRINT_SEARCH
	cout << "Path: ";
#endif
	SEARCH_BACKTRACK:
	while(t != start) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=128
#ifdef PRINT_SEARCH
		cout << "(" << t % MAX_SIZE << ", " << t / MAX_SIZE << ")";
#endif
		path[p] = t;
		p++;
		t = prev[t];
	}
#ifdef PRINT_SEARCH
	cout << endl;
#endif
	*path_size = p;
	
	return 0;
}

// Enqueue (Insert an element)
void enqueue(unsigned int pq_nodes[MAX_PQ], unsigned short int priority, unsigned short int data, unsigned short int *pq_len, bool *is_empty) {
//#pragma HLS INLINE

	(*pq_len)++;
	if ((*pq_len) == 0) { (*pq_len)--; } // Queue is full -> Last element is automatically removed
	// Note that last element is not always the lowest priority one. //

	unsigned short int i = (*pq_len);
	unsigned short int p = (*pq_len) >> 1; // parent node
	
	ENQUEUE_LOOP:
	while (i > 1 && (unsigned short int)(pq_nodes[p] >> DATA_BIT) >= priority) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=12
		pq_nodes[i] = pq_nodes[p];
		i = p;
		p = p >> 1; // parent node
	}
	pq_nodes[i] = ((unsigned int)priority << DATA_BIT) | (unsigned int)data;
	*is_empty = false;
}

// Dequeue (Extract and remove the top element)
void dequeue(unsigned int pq_nodes[MAX_PQ], unsigned short int *ret_priority, unsigned short int *ret_data, unsigned short int *pq_len, bool *is_empty) {
//#pragma HLS INLINE

	*ret_priority = (unsigned short int)(pq_nodes[1] >> DATA_BIT);
	*ret_data     = (unsigned short int)(pq_nodes[1] & DATA_MASK);
	
	unsigned short int i = 1; // root node
	unsigned short int last_priority = (unsigned short int)(pq_nodes[*pq_len] >> DATA_BIT); // Priority of last element

	DEQUEUE_LOOP:
	while (!(i >> (PQ_BIT-1))) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=12
		unsigned short int c1 = i << 1; // child node (L)
		unsigned short int c2 = c1 + 1; // child node (R)
		if (c1 < *pq_len && (unsigned short int)(pq_nodes[c1] >> DATA_BIT) <= last_priority) {
			if (c2 < *pq_len && (unsigned short int)(pq_nodes[c2] >> DATA_BIT) <= (unsigned short int)(pq_nodes[c1] >> DATA_BIT)) {
				pq_nodes[i] = pq_nodes[c2];
				i = c2;
			}
			else {
				pq_nodes[i] = pq_nodes[c1];
				i = c1;
			}
		}
		else {
			if (c2 < *pq_len && (unsigned short int)(pq_nodes[c2] >> DATA_BIT) <= last_priority) {
				pq_nodes[i] = pq_nodes[c2];
				i = c2;
			}
			else {
				break;
			}
		}
	}
	pq_nodes[i] = pq_nodes[*pq_len];
	(*pq_len)--;
	if ((*pq_len) == 0) { *is_empty = true; }
}
