/*------------------------------------------------------------------------- * (function: split_multiplier_a) * * This function works to split the "a" input of a multiplier into * several smaller multipliers to better "fit" with the available * resources in a targeted FPGA architecture. * * This function is at the lowest level since it simply receives * a multiplier and is told how to split it. The end result is: * * a1a0 * b => a0 * b + a1 * b => c * * Note that for the addition we need to perform sign extension, * but this should not be a problem since the sign extension is always * extending NOT contracting. * *-----------------------------------------------------------------------*/ void split_multiplier_a(nnode_t *node, int a0, int a1, int b) { nnode_t *a0b, *a1b, *addsmall; int i; /* Check for a legitimate split */ oassert(node->input_port_sizes[0] == (a0 + a1)); oassert(node->input_port_sizes[1] == b); /* New node for a0b multiply */ a0b = allocate_nnode(); a0b->name = (char *)malloc(strlen(node->name) + 3); strcpy(a0b->name, node->name); strcat(a0b->name, "-0"); init_split_multiplier(node, a0b, 0, a0, 0, b); mult_list = insert_in_vptr_list(mult_list, a0b); /* New node for a1b multiply */ a1b = allocate_nnode(); a1b->name = (char *)malloc(strlen(node->name) + 3); strcpy(a1b->name, node->name); strcat(a1b->name, "-1"); init_split_multiplier(node, a1b, a0, a1, 0, b); mult_list = insert_in_vptr_list(mult_list, a1b); /* New node for the add */ addsmall = allocate_nnode(); addsmall->name = (char *)malloc(strlen(node->name) + 6); strcpy(addsmall->name, node->name); strcat(addsmall->name, "-add0"); init_cascade_adder(addsmall, a1b, a1 + b); /* Connect pins for addsmall */ for (i = a0; i < a0b->output_port_sizes[0]; i++) connect_nodes(a0b, i, addsmall, i-a0); for (i = a0b->output_port_sizes[0] - a0; i < a1+b; i++) /* Sign extend */ connect_nodes(a0b, a0b->output_port_sizes[0]-1, addsmall, i); for (i = b+a1; i < (2 * (a1 + b)); i++) connect_nodes(a1b, i-(b+a1), addsmall, i); /* Move original output pins for multiply to new outputs */ for (i = 0; i < a0; i++) remap_pin_to_new_node(node->output_pins[i], a0b, i); for (i = a0; i < node->num_output_pins; i++) remap_pin_to_new_node(node->output_pins[i], addsmall, i-a0); /* Probably more to do here in freeing the old node! */ free(node->name); free(node->input_port_sizes); free(node->output_port_sizes); /* Free arrays NOT the pins since relocated! */ free(node->input_pins); free(node->output_pins); free(node); return; }
/*------------------------------------------------------------------------- * (function: split_multiplier_b) * * This function works to split the "b" input of a multiplier into * several smaller multipliers to better "fit" with the available * resources in a targeted FPGA architecture. * * This function is at the lowest level since it simply receives * a multiplier and is told how to split it. The end result is: * * a * b1b0 => a * b1 + a * b0 => c * * Note that for the addition we need to perform sign extension, * but this should not be a problem since the sign extension is always * extending NOT contracting. * *-----------------------------------------------------------------------*/ void split_multiplier_b(nnode_t *node, int a, int b1, int b0) { nnode_t *ab0, *ab1, *addsmall; int i; /* Check for a legitimate split */ oassert(node->input_port_sizes[0] == a); oassert(node->input_port_sizes[1] == (b0 + b1)); /* New node for ab0 multiply */ ab0 = allocate_nnode(); ab0->name = (char *)malloc(strlen(node->name) + 3); strcpy(ab0->name, node->name); strcat(ab0->name, "-0"); init_split_multiplier(node, ab0, 0, a, 0, b0); mult_list = insert_in_vptr_list(mult_list, ab0); /* New node for ab1 multiply */ ab1 = allocate_nnode(); ab1->name = (char *)malloc(strlen(node->name) + 3); strcpy(ab1->name, node->name); strcat(ab1->name, "-1"); init_split_multiplier(node, ab1, 0, a, b0, b1); mult_list = insert_in_vptr_list(mult_list, ab1); /* New node for the add */ addsmall = allocate_nnode(); addsmall->name = (char *)malloc(strlen(node->name) + 6); strcpy(addsmall->name, node->name); strcat(addsmall->name, "-add0"); init_cascade_adder(addsmall, ab1, a + b1); /* Connect pins for addsmall */ for (i = b0; i < ab0->output_port_sizes[0]; i++) connect_nodes(ab0, i, addsmall, i-b0); for (i = ab0->output_port_sizes[0] - b0; i < a+b1; i++) /* Sign extend */ connect_nodes(ab0, ab0->output_port_sizes[0]-1, addsmall, i); for (i = b1+a; i < (2 * (a + b1)); i++) connect_nodes(ab1, i-(b1+a), addsmall, i); /* Move original output pins for multiply to new outputs */ for (i = 0; i < b0; i++) remap_pin_to_new_node(node->output_pins[i], ab0, i); for (i = b0; i < node->num_output_pins; i++) remap_pin_to_new_node(node->output_pins[i], addsmall, i-b0); /* Probably more to do here in freeing the old node! */ free(node->name); free(node->input_port_sizes); free(node->output_port_sizes); /* Free arrays NOT the pins since relocated! */ free(node->input_pins); free(node->output_pins); free(node); return; }
/* * Pads the width of a single port memory to that specified in the arch file. */ void pad_sp_memory_width(nnode_t *node, netlist_t *netlist) { oassert(node->type == MEMORY); oassert(single_port_rams != NULL); pad_memory_input_port (node, netlist, single_port_rams, "data"); pad_memory_output_port(node, netlist, single_port_rams, "out"); sp_memory_list = insert_in_vptr_list(sp_memory_list, node); }
/* * Pads the width of a dual port memory to that specified in the arch file. */ void pad_dp_memory_width(nnode_t *node, netlist_t *netlist) { oassert(node->type == MEMORY); oassert(dual_port_rams != NULL); pad_memory_input_port(node, netlist, dual_port_rams, "data1"); pad_memory_input_port(node, netlist, dual_port_rams, "data2"); pad_memory_output_port(node, netlist, dual_port_rams, "out1"); pad_memory_output_port(node, netlist, dual_port_rams, "out2"); dp_memory_list = insert_in_vptr_list(dp_memory_list, node); }
/*------------------------------------------------------------------------- * (function: init_cascade_adder) * * This function is used to initialize an adder that is within * a split multiplier. *-----------------------------------------------------------------------*/ void init_cascade_adder(nnode_t *node, nnode_t *a, int b) { int i, size; node->type = ADD; node->related_ast_node = a->related_ast_node; node->traverse_visited = a->traverse_visited; node->node_data = NULL; /* Set size to be the maximum input size */ size = a->output_port_sizes[0]; size = (size < b) ? b : size; /* Set new port sizes and parameters */ node->num_input_port_sizes = 2; node->input_port_sizes = (int *)malloc(2 * sizeof(int)); node->input_port_sizes[0] = a->output_port_sizes[0]; node->input_port_sizes[1] = b; node->num_output_port_sizes = 1; node->output_port_sizes = (int *)malloc(sizeof(int)); node->output_port_sizes[0] = size; /* Set the number of input pins and clear pin entries */ node->num_input_pins = a->output_port_sizes[0] + b; node->input_pins = (npin_t**)malloc(sizeof(void *) * (a->output_port_sizes[0] + b)); for (i = 0; i < a->output_port_sizes[0] + b; i++) node->input_pins[i] = NULL; /* Set the number of output pins and clear pin entries */ node->num_output_pins = size; node->output_pins = (npin_t**)malloc(sizeof(void *) * size); for (i = 0; i < size; i++) node->output_pins[i] = NULL; add_list = insert_in_vptr_list(add_list, node); return; }
/*------------------------------------------------------------------------- * (function: split_multiplier) * * This function works to split a multiplier into several smaller * multipliers to better "fit" with the available resources in a * targeted FPGA architecture. * * This function is at the lowest level since it simply receives * a multiplier and is told how to split it. The end result is: * * a1a0 * b1b0 => a0 * b0 + a0 * b1 + a1 * b0 + a1 * b1 => c1c0 => c * * If we "balance" the additions, we can actually remove one of the * addition operations since we know that a0 * b0 and a1 * b1 will * not overlap in bits. This allows us to skip the addition between * these two terms and simply concat the results together. Giving us * the resulting logic: * * ((a1 * b1) . (a0 * b0)) + ((a0 * b1) + (a1 * b0)) ==> Result * * Note that for some of the additions we need to perform sign extensions, * but this should not be a problem since the sign extension is always * extending NOT contracting. * *-----------------------------------------------------------------------*/ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1) { nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig; int i, size; /* Check for a legitimate split */ oassert(node->input_port_sizes[0] == (a0 + a1)); oassert(node->input_port_sizes[1] == (b0 + b1)); /* New node for small multiply */ a0b0 = allocate_nnode(); a0b0->name = (char *)malloc(strlen(node->name) + 3); strcpy(a0b0->name, node->name); strcat(a0b0->name, "-0"); init_split_multiplier(node, a0b0, 0, a0, 0, b0); mult_list = insert_in_vptr_list(mult_list, a0b0); /* New node for big multiply */ a1b1 = allocate_nnode(); a1b1->name = (char *)malloc(strlen(node->name) + 3); strcpy(a1b1->name, node->name); strcat(a1b1->name, "-3"); init_split_multiplier(node, a1b1, a0, a1, b0, b1); mult_list = insert_in_vptr_list(mult_list, a1b1); /* New node for 2nd multiply */ a0b1 = allocate_nnode(); a0b1->name = (char *)malloc(strlen(node->name) + 3); strcpy(a0b1->name, node->name); strcat(a0b1->name, "-1"); init_split_multiplier(node, a0b1, 0, a0, b0, b1); mult_list = insert_in_vptr_list(mult_list, a0b1); /* New node for 3rd multiply */ a1b0 = allocate_nnode(); a1b0->name = (char *)malloc(strlen(node->name) + 3); strcpy(a1b0->name, node->name); strcat(a1b0->name, "-2"); init_split_multiplier(node, a1b0, a0, a1, 0, b0); mult_list = insert_in_vptr_list(mult_list, a1b0); /* New node for the initial add */ addsmall = allocate_nnode(); addsmall->name = (char *)malloc(strlen(node->name) + 6); strcpy(addsmall->name, node->name); strcat(addsmall->name, "-add0"); init_cascade_adder(addsmall, a1b0, a0b1->output_port_sizes[0]); /* New node for the BIG add */ addbig = allocate_nnode(); addbig->name = (char *)malloc(strlen(node->name) + 6); strcpy(addbig->name, node->name); strcat(addbig->name, "-add1"); init_cascade_adder(addbig, addsmall, a0b0->output_port_sizes[0] + a1b1->output_port_sizes[0]); /* Insert temporary pins for addsmall */ for (i = 0; i < a0b1->output_port_sizes[0]; i++) connect_nodes(a0b1, i, addsmall, i); for (i = 0; i < a1b0->output_port_sizes[0]; i++) connect_nodes(a1b0, i, addsmall, i+a0b1->output_port_sizes[0]); /* Insert temporary pins for addbig */ size = addsmall->output_port_sizes[0]; for (i = 0; i < size; i++) connect_nodes(addsmall, i, addbig, i); for (i = 0; i < a1b1->output_port_sizes[0]; i++) connect_nodes(a1b1, i, addbig, i + size); size = size + a1b1->output_port_sizes[0]; for (i = 0; i < a0b0->output_port_sizes[0]; i++) connect_nodes(a0b0, i, addbig, i + size); /* Move original output pins for multiply to addbig */ for (i = 0; i < addbig->num_output_pins; i++) remap_pin_to_new_node(node->output_pins[i], addbig, i); /* Probably more to do here in freeing the old node! */ free(node->name); free(node->input_port_sizes); free(node->output_port_sizes); /* Free arrays NOT the pins since relocated! */ free(node->input_pins); free(node->output_pins); free(node); return; }
void *my_chunk_malloc(size_t size, t_chunk *chunk_info) { /* This routine should be used for allocating fairly small data * * structures where memory-efficiency is crucial. This routine allocates * * large "chunks" of data, and parcels them out as requested. Whenever * * it mallocs a new chunk it adds it to the linked list pointed to by * * chunk_info->chunk_ptr_head. This list can be used to free the * * chunked memory. * * Information about the currently open "chunk" must be stored by the * * user program. chunk_info->mem_avail_ptr points to an int storing * * how many bytes are left in the current chunk, while * * chunk_info->next_mem_loc_ptr is the address of a pointer to the * * next free bytes in the chunk. To start a new chunk, simply set * * chunk_info->mem_avail_ptr = 0. Each independent set of data * * structures should use a new chunk. */ /* To make sure the memory passed back is properly aligned, I must * * only send back chunks in multiples of the worst-case alignment * * restriction of the machine. On most machines this should be * * a long, but on 64-bit machines it might be a long long or a * * double. Change the typedef below if this is the case. */ typedef long Align; #define CHUNK_SIZE 32768 #define FRAGMENT_THRESHOLD 100 char *tmp_ptr; int aligned_size; assert(chunk_info->mem_avail >= 0); if ((size_t) (chunk_info->mem_avail) < size) { /* Need to malloc more memory. */ if (size > CHUNK_SIZE) { /* Too big, use standard routine. */ tmp_ptr = (char *) my_malloc(size); /* When debugging, uncomment the code below to see if memory allocation size */ /* makes sense */ /*#ifdef DEBUG vpr_printf("NB: my_chunk_malloc got a request for %d bytes.\n", size); vpr_printf("You should consider using my_malloc for such big requests.\n"); #endif */ assert(chunk_info != NULL); chunk_info->chunk_ptr_head = insert_in_vptr_list( chunk_info->chunk_ptr_head, tmp_ptr); return (tmp_ptr); } if (chunk_info->mem_avail < FRAGMENT_THRESHOLD) { /* Only a small scrap left. */ chunk_info->next_mem_loc_ptr = (char *) my_malloc(CHUNK_SIZE); chunk_info->mem_avail = CHUNK_SIZE; assert(chunk_info != NULL); chunk_info->chunk_ptr_head = insert_in_vptr_list( chunk_info->chunk_ptr_head, chunk_info->next_mem_loc_ptr); } /* Execute else clause only when the chunk we want is pretty big, * * and would leave too big an unused fragment. Then we use malloc * * to allocate normally. */ else { tmp_ptr = (char *) my_malloc(size); assert(chunk_info != NULL); chunk_info->chunk_ptr_head = insert_in_vptr_list( chunk_info->chunk_ptr_head, tmp_ptr); return (tmp_ptr); } } /* Find the smallest distance to advance the memory pointer and keep * * everything aligned. */ if (size % sizeof(Align) == 0) { aligned_size = size; } else { aligned_size = size + sizeof(Align) - size % sizeof(Align); } tmp_ptr = chunk_info->next_mem_loc_ptr; chunk_info->next_mem_loc_ptr += aligned_size; chunk_info->mem_avail -= aligned_size; return (tmp_ptr); }
void *my_chunk_malloc (size_t size, struct s_linked_vptr **chunk_ptr_head, int *mem_avail_ptr, char **next_mem_loc_ptr) { /* This routine should be used for allocating fairly small data * * structures where memory-efficiency is crucial. This routine allocates * * large "chunks" of data, and parcels them out as requested. Whenever * * it mallocs a new chunk it adds it to the linked list pointed to by * * chunk_ptr_head. This list can be used to free the chunked memory. * * If chunk_ptr_head is NULL, no list of chunked memory blocks will be kept * * -- this is useful for data structures that you never intend to free as * * it means you don't have to keep track of the linked lists. * * Information about the currently open "chunk" is must be stored by the * * user program. mem_avail_ptr points to an int storing how many bytes are * * left in the current chunk, while next_mem_loc_ptr is the address of a * * pointer to the next free bytes in the chunk. To start a new chunk, * * simply set *mem_avail_ptr = 0. Each independent set of data structures * * should use a new chunk. */ /* To make sure the memory passed back is properly aligned, I must * * only send back chunks in multiples of the worst-case alignment * * restriction of the machine. On most machines this should be * * a long, but on 64-bit machines it might be a long long or a * * double. Change the typedef below if this is the case. */ typedef size_t Align; #define CHUNK_SIZE 32768 #define FRAGMENT_THRESHOLD 100 char *tmp_ptr; int aligned_size; if (*mem_avail_ptr < size) { /* Need to malloc more memory. */ if (size > CHUNK_SIZE) { /* Too big, use standard routine. */ tmp_ptr = my_malloc (size); #ifdef DEBUG printf("NB: my_chunk_malloc got a request for %d bytes.\n", size); printf("You should consider using my_malloc for such big requests.\n"); #endif if (chunk_ptr_head != NULL) *chunk_ptr_head = insert_in_vptr_list (*chunk_ptr_head, tmp_ptr); return (tmp_ptr); } if (*mem_avail_ptr < FRAGMENT_THRESHOLD) { /* Only a small scrap left. */ *next_mem_loc_ptr = my_malloc (CHUNK_SIZE); *mem_avail_ptr = CHUNK_SIZE; if (chunk_ptr_head != NULL) *chunk_ptr_head = insert_in_vptr_list (*chunk_ptr_head, *next_mem_loc_ptr); } /* Execute else clause only when the chunk we want is pretty big, * * and would leave too big an unused fragment. Then we use malloc * * to allocate normally. */ else { tmp_ptr = my_malloc (size); if (chunk_ptr_head != NULL) *chunk_ptr_head = insert_in_vptr_list (*chunk_ptr_head, tmp_ptr); return (tmp_ptr); } } /* Find the smallest distance to advance the memory pointer and keep * * everything aligned. */ if (size % sizeof (Align) == 0) { aligned_size = size; } else { aligned_size = size + sizeof(Align) - size % sizeof(Align); } tmp_ptr = *next_mem_loc_ptr; *next_mem_loc_ptr += aligned_size; *mem_avail_ptr -= aligned_size; return (tmp_ptr); }
/* * Width-splits the given memory up into chunks the of the * width specified in the arch file. */ void split_sp_memory_to_arch_width(nnode_t *node) { char *port_name = "data"; t_model *model = single_port_rams; int data_port_number = get_input_port_index_from_mapping(node, port_name); oassert(data_port_number != -1); int data_port_size = node->input_port_sizes[data_port_number]; // Get the target width from the arch. t_model_ports *ports = get_model_port(model->inputs, port_name); int target_size = ports->size; int num_memories = ceil((double)data_port_size / (double)target_size); if (data_port_size > target_size) { int i; int data_pins_moved = 0; int output_pins_moved = 0; for (i = 0; i < num_memories; i++) { nnode_t *new_node = allocate_nnode(); new_node->name = append_string(node->name, "-%d",i); sp_memory_list = insert_in_vptr_list(sp_memory_list, new_node); /* Copy properties from the original node */ new_node->type = node->type; new_node->related_ast_node = node->related_ast_node; new_node->traverse_visited = node->traverse_visited; new_node->node_data = NULL; int j; for (j = 0; j < node->num_input_port_sizes; j++) add_input_port_information(new_node, 0); add_output_port_information(new_node, 0); int index = 0; int old_index = 0; for (j = 0; j < node->num_input_port_sizes; j++) { // Move this node's share of data pins out of the data port of the original node. if (j == data_port_number) { // Skip over data pins we've already moved. old_index += data_pins_moved; int k; for (k = 0; k < target_size && data_pins_moved < data_port_size; k++) { allocate_more_node_input_pins(new_node, 1); new_node->input_port_sizes[j]++; remap_pin_to_new_node(node->input_pins[old_index], new_node, index); index++; old_index++; data_pins_moved++; } int remaining_data_pins = data_port_size - data_pins_moved; // Skip over pins we have yet to copy. old_index += remaining_data_pins; } else { int k; for (k = 0; k < node->input_port_sizes[j]; k++) { allocate_more_node_input_pins(new_node, 1); new_node->input_port_sizes[j]++; // Copy pins for all but the last memory. the last one get the original pins moved to it. if (i < num_memories - 1) add_a_input_pin_to_node_spot_idx(new_node, copy_input_npin(node->input_pins[old_index]), index); else remap_pin_to_new_node(node->input_pins[old_index], new_node, index); index++; old_index++; } } } index = 0; old_index = 0; old_index += output_pins_moved; int k; for (k = 0; k < target_size && output_pins_moved < data_port_size; k++) { allocate_more_node_output_pins(new_node, 1); new_node->output_port_sizes[0]++; remap_pin_to_new_node(node->output_pins[old_index], new_node, index); index++; old_index++; output_pins_moved++; } } // Free the original node. free_nnode(node); } else { sp_memory_list = insert_in_vptr_list(sp_memory_list, node); } }
/*------------------------------------------------------------------------- * (function: split_dp_memory_width) * * This function works to split the width of a memory into several smaller * memories. *------------------------------------------------------------------------ */ void split_dp_memory_width(nnode_t *node) { int data_port1, data_port2; int i, j, k, idx, old_idx, data_diff1, data_diff2; nnode_t *new_node; char *tmp_name; oassert(node->type == MEMORY); /* Find which port is the data port on the input! */ idx = 0; data_port1 = -1; data_port2 = -1; data_diff1 = 0; data_diff2 = 0; for (i = 0; i < node->num_input_port_sizes; i++) { if (strcmp("data1", node->input_pins[idx]->mapping) == 0) { data_port1 = i; data_diff1 = node->input_port_sizes[data_port1] - 1; } if (strcmp("data2", node->input_pins[idx]->mapping) == 0) { data_port2 = i; data_diff2 = node->input_port_sizes[data_port2] - 1; } idx += node->input_port_sizes[i]; } if (data_port1 == -1) { error_message(1, 0, -1, "No \"data1\" port on dual port RAM"); return; } /* Need to create a new node for every data bit */ for (i = 1; i < node->input_port_sizes[data_port1]; i++) { char BUF[10]; new_node = allocate_nnode(); dp_memory_list = insert_in_vptr_list(dp_memory_list, new_node); new_node->name = (char *)malloc(strlen(node->name) + 10); strcpy(new_node->name, node->name); strcat(new_node->name, "-"); sprintf(BUF, "%d", i); strcat(new_node->name, BUF); /* Copy properties from the original node */ new_node->type = node->type; new_node->related_ast_node = node->related_ast_node; new_node->traverse_visited = node->traverse_visited; new_node->node_data = NULL; new_node->num_input_port_sizes = node->num_input_port_sizes; new_node->input_port_sizes = (int *)malloc(node->num_input_port_sizes * sizeof(int)); for (j = 0; j < node->num_input_port_sizes; j++) new_node->input_port_sizes[j] = node->input_port_sizes[j]; new_node->input_port_sizes[data_port1] = 1; if (data_port2 != -1) new_node->input_port_sizes[data_port2] = 1; if (data_port2 == -1) { new_node->num_output_port_sizes = 1; new_node->output_port_sizes = (int *)malloc(sizeof(int)); } else { new_node->num_output_port_sizes = 2; new_node->output_port_sizes = (int *)malloc(sizeof(int)*2); new_node->output_port_sizes[1] = 1; } new_node->output_port_sizes[0] = 1; /* Set the number of input pins and pin entires */ new_node->num_input_pins = node->num_input_pins - data_diff1 - data_diff2; new_node->input_pins = (npin_t**)malloc(sizeof(void *) * new_node->num_input_pins); idx = 0; old_idx = 0; for (j = 0; j < new_node->num_input_port_sizes; j++) { if (j == data_port1) { new_node->input_pins[idx] = node->input_pins[old_idx + i]; node->input_pins[old_idx+i] = NULL; new_node->input_pins[idx]->node = new_node; new_node->input_pins[idx]->pin_node_idx = idx; old_idx = old_idx + node->input_port_sizes[data_port1]; idx++; } else if (j == data_port2) { new_node->input_pins[idx] = node->input_pins[old_idx + i]; node->input_pins[old_idx+i] = NULL; new_node->input_pins[idx]->node = new_node; new_node->input_pins[idx]->pin_node_idx = idx; old_idx = old_idx + node->input_port_sizes[data_port2]; idx++; } else { for (k = 0; k < new_node->input_port_sizes[j]; k++) { new_node->input_pins[idx] = copy_input_npin(node->input_pins[old_idx]); new_node->input_pins[idx]->pin_node_idx = idx; new_node->input_pins[idx]->node = new_node; idx++; old_idx++; } } } /* Set the number of output pins and pin entry */ if (data_port2 == -1) { new_node->num_output_pins = 1; new_node->output_pins = (npin_t **)malloc(sizeof(void*)); new_node->output_pins[0] = node->output_pins[i]; node->output_pins[i] = NULL; new_node->output_pins[0]->pin_node_idx = 0; new_node->output_pins[0]->node = new_node; } else { new_node->num_output_pins = 2; new_node->output_pins = (npin_t **)malloc(sizeof(void*)*2); new_node->output_pins[0] = node->output_pins[i]; node->output_pins[i] = NULL; new_node->output_pins[0]->pin_node_idx = 0; new_node->output_pins[0]->node = new_node; new_node->output_pins[1] = node->output_pins[i+data_diff1+1]; node->output_pins[i+data_diff1+1] = NULL; new_node->output_pins[1]->pin_node_idx = 1; new_node->output_pins[1]->node = new_node; } } /* Now need to clean up the original to do 1 bit output - first bit */ /* Name the node to show first bit! */ tmp_name = (char *)malloc(strlen(node->name) + 3); strcpy(tmp_name, node->name); strcat(tmp_name, "-0"); free(node->name); node->name = tmp_name; /* free the additional output pins */ if (data_port2 == -1) { for (i = 1; i < node->num_output_pins; i++) free_npin(node->output_pins[i]); node->num_output_pins = 1; node->output_pins = realloc(node->output_pins, sizeof(npin_t *) * node->num_output_pins); node->output_port_sizes[0] = 1; } else { for (i = 1; i < (node->num_output_pins/2); i++) free_npin(node->output_pins[i]); node->output_pins[1] = node->output_pins[data_diff1 + 1]; node->output_pins[data_diff1 + 1] = NULL; node->output_pins[1]->pin_node_idx = 1; for (; i < node->num_output_pins; i++) free_npin(node->output_pins[i]); node->num_output_pins = 2; node->output_pins = realloc(node->output_pins, sizeof(npin_t *) * node->num_output_pins); node->output_port_sizes[0] = 1; node->output_port_sizes[1] = 1; } /* Shuffle the input pins on account of removed input pins */ idx = old_idx = 0; node->input_port_sizes[data_port1] = 1; if (data_port2 != -1) node->input_port_sizes[data_port2] = 1; for (i = 0; i < node->num_input_port_sizes; i++) { for (j = 0; j < node->input_port_sizes[i]; j++) { node->input_pins[idx] = node->input_pins[old_idx]; node->input_pins[idx]->pin_node_idx = idx; idx++; old_idx++; } if (i == data_port1) old_idx = old_idx + data_diff1; if (i == data_port2) old_idx = old_idx + data_diff2; } node->num_input_pins = node->num_input_pins - data_diff1; if (data_port2 != -1) node->num_input_pins = node->num_input_pins - data_diff2; node->input_pins = realloc(node->input_pins, sizeof(npin_t *) * node->num_input_pins); dp_memory_list = insert_in_vptr_list(dp_memory_list, node); return; }
/*------------------------------------------------------------------------- * (function: split_sp_memory_width) * * This function works to split the width of a memory into several smaller * memories. *------------------------------------------------------------------------ */ void split_sp_memory_width(nnode_t *node) { int data_port; int i, j, k, idx, old_idx, diff; nnode_t *new_node; oassert(node->type == MEMORY); /* Find which port is the data port on the input! */ idx = 0; data_port = -1; for (i = 0; i < node->num_input_port_sizes; i++) { if (strcmp("data", node->input_pins[idx]->mapping) == 0) data_port = i; idx += node->input_port_sizes[i]; } if (data_port == -1) { error_message(1, 0, -1, "No \"data\" port on single port RAM"); } diff = node->input_port_sizes[data_port]; /* Need to create a new node for every data bit */ for (i = 1; i < node->input_port_sizes[data_port]; i++) { char BUF[10]; new_node = allocate_nnode(); sp_memory_list = insert_in_vptr_list(sp_memory_list, new_node); new_node->name = (char *)malloc(strlen(node->name) + 10); strcpy(new_node->name, node->name); strcat(new_node->name, "-"); sprintf(BUF, "%d", i); strcat(new_node->name, BUF); /* Copy properties from the original node */ new_node->type = node->type; new_node->related_ast_node = node->related_ast_node; new_node->traverse_visited = node->traverse_visited; new_node->node_data = NULL; new_node->num_input_port_sizes = node->num_input_port_sizes; new_node->input_port_sizes = (int *)malloc(node->num_input_port_sizes * sizeof(int)); for (j = 0; j < node->num_input_port_sizes; j++) new_node->input_port_sizes[j] = node->input_port_sizes[j]; new_node->input_port_sizes[data_port] = 1; new_node->num_output_port_sizes = 1; new_node->output_port_sizes = (int *)malloc(sizeof(int)); new_node->output_port_sizes[0] = 1; /* Set the number of input pins and pin entires */ new_node->num_input_pins = node->num_input_pins - diff + 1; new_node->input_pins = (npin_t**)malloc(sizeof(void *) * new_node->num_input_pins); idx = 0; old_idx = 0; for (j = 0; j < new_node->num_input_port_sizes; j++) { if (j == data_port) { new_node->input_pins[idx] = node->input_pins[idx + i]; node->input_pins[idx+i] = NULL; new_node->input_pins[idx]->node = new_node; new_node->input_pins[idx]->pin_node_idx = idx; old_idx = old_idx + node->input_port_sizes[data_port]; idx++; } else { for (k = 0; k < new_node->input_port_sizes[j]; k++) { new_node->input_pins[idx] = copy_input_npin(node->input_pins[old_idx]); new_node->input_pins[idx]->pin_node_idx = idx; new_node->input_pins[idx]->node = new_node; idx++; old_idx++; } } } /* Set the number of output pins and pin entry */ new_node->num_output_pins = 1; new_node->output_pins = (npin_t **)malloc(sizeof(void*)); new_node->output_pins[0] = copy_output_npin(node->output_pins[i]); add_a_driver_pin_to_net(node->output_pins[i]->net, new_node->output_pins[0]); free_npin(node->output_pins[i]); node->output_pins[i] = NULL; new_node->output_pins[0]->pin_node_idx = 0; new_node->output_pins[0]->node = new_node; } /* Now need to clean up the original to do 1 bit output - first bit */ /* Name the node to show first bit! */ { char *new_name = append_string(node->name, "-0"); free(node->name); node->name = new_name; } /* free the additional output pins */ for (i = 1; i < node->num_output_pins; i++) free_npin(node->output_pins[i]); node->num_output_pins = 1; node->output_pins = realloc(node->output_pins, sizeof(npin_t *) * 1); node->output_port_sizes[0] = 1; /* Shuffle the input pins on account of removed input pins */ idx = old_idx = 0; node->input_port_sizes[data_port] = 1; for (i = 0; i < node->num_input_port_sizes; i++) { for (j = 0; j < node->input_port_sizes[i]; j++) { node->input_pins[idx] = node->input_pins[old_idx]; node->input_pins[idx]->pin_node_idx = idx; idx++; old_idx++; } if (i == data_port) old_idx = old_idx + diff - 1; } node->num_input_pins = node->num_input_pins - diff + 1; sp_memory_list = insert_in_vptr_list(sp_memory_list, node); return; }
/*------------------------------------------------------------------------- * (function: split_dp_memory_depth) * * This function works to split the depth of a dual port memory into * several smaller memories. *------------------------------------------------------------------------ */ void split_dp_memory_depth(nnode_t *node) { int addr1_port = -1; int addr2_port = -1; int we1_port = -1; int we2_port = -1; int logical_size; int i, j; int idx; int addr1_pin_idx = 0; int we1_pin_idx = 0; int addr2_pin_idx = 0; int we2_pin_idx = 0; nnode_t *new_mem_node; nnode_t *and1_node, *not1_node, *ff1_node, *mux1_node; nnode_t *and2_node, *not2_node, *ff2_node, *mux2_node; npin_t *addr1_pin = NULL; npin_t *addr2_pin = NULL; npin_t *we1_pin = NULL; npin_t *we2_pin = NULL; npin_t *twe_pin, *taddr_pin; npin_t *clk_pin = NULL; npin_t *tdout_pin; oassert(node->type == MEMORY); /* Find which ports are the addr1 and addr2 ports */ idx = 0; for (i = 0; i < node->num_input_port_sizes; i++) { if (strcmp("addr1", node->input_pins[idx]->mapping) == 0) { addr1_port = i; addr1_pin_idx = idx; addr1_pin = node->input_pins[idx]; } else if (strcmp("addr2", node->input_pins[idx]->mapping) == 0) { addr2_port = i; addr2_pin_idx = idx; addr2_pin = node->input_pins[idx]; } else if (strcmp("we1", node->input_pins[idx]->mapping) == 0) { we1_port = i; we1_pin = node->input_pins[idx]; we1_pin_idx = idx; } else if (strcmp("we2", node->input_pins[idx]->mapping) == 0) { we2_port = i; we2_pin = node->input_pins[idx]; we2_pin_idx = idx; } else if (strcmp("clk", node->input_pins[idx]->mapping) == 0) { clk_pin = node->input_pins[idx]; } idx += node->input_port_sizes[i]; } if (addr1_port == -1) { error_message(1, 0, -1, "No \"addr1\" port on dual port RAM"); } /* Jason Luu HACK: Logical memory depth determination messed up, forced to use this method */ for(i = 0; i < node->input_port_sizes[addr1_port]; i++) { if(strcmp(node->input_pins[addr1_pin_idx + i]->name, "top^ZERO_PAD_ZERO") == 0) break; } logical_size = i; /* Check that the memory needs to be split */ if (logical_size <= split_size) { dp_memory_list = insert_in_vptr_list(dp_memory_list, node); return; } /* Let's remove the address1 line from the memory */ for (i = addr1_pin_idx; i < node->num_input_pins - 1; i++) { node->input_pins[i] = node->input_pins[i+1]; node->input_pins[i]->pin_node_idx--; } node->input_port_sizes[addr1_port]--; node->input_pins = realloc(node->input_pins, sizeof(npin_t *) * --node->num_input_pins); if ((we1_port != -1) && (we1_pin_idx >= addr1_pin_idx)) we1_pin_idx--; if ((we2_port != -1) && (we2_pin_idx >= addr1_pin_idx)) we2_pin_idx--; if ((addr2_port != -1) && (addr2_pin_idx >= addr1_pin_idx)) addr2_pin_idx--; /* Let's remove the address2 line from the memory */ if (addr2_port != -1) { for (i = addr2_pin_idx; i < node->num_input_pins - 1; i++) { node->input_pins[i] = node->input_pins[i+1]; node->input_pins[i]->pin_node_idx--; } node->input_port_sizes[addr2_port]--; node->input_pins = realloc(node->input_pins, sizeof(npin_t *) * --node->num_input_pins); if ((we1_port != -1) && (we1_pin_idx >= addr2_pin_idx)) we1_pin_idx--; if ((we2_port != -1) && (we2_pin_idx >= addr2_pin_idx)) we2_pin_idx--; if (addr1_pin_idx >= addr2_pin_idx) addr1_pin_idx--; } /* Create the new memory node */ new_mem_node = allocate_nnode(); // Append the new name with an __H new_mem_node->name = append_string(node->name, "__H"); { // Append the old name with an __S char *new_name = append_string(node->name, "__S"); free(node->name); node->name = new_name; } /* Copy properties from the original memory node */ new_mem_node->type = node->type; new_mem_node->related_ast_node = node->related_ast_node; new_mem_node->traverse_visited = node->traverse_visited; // Copy over the port sizes for the new memory for (j = 0; j < node->num_output_port_sizes; j++) add_output_port_information(new_mem_node, node->output_port_sizes[j]); for (j = 0; j < node->num_input_port_sizes; j++) add_input_port_information (new_mem_node, node->input_port_sizes[j]); // allocate space for pins. allocate_more_node_output_pins (new_mem_node, node->num_output_pins); allocate_more_node_input_pins (new_mem_node, node->num_input_pins); // Copy over the pins for the new memory for (j = 0; j < node->num_input_pins; j++) add_a_input_pin_to_node_spot_idx(new_mem_node, copy_input_npin(node->input_pins[j]), j); if (we1_pin != NULL) { and1_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, node->traverse_visited); twe_pin = copy_input_npin(we1_pin); add_a_input_pin_to_node_spot_idx(and1_node, twe_pin, 1); taddr_pin = copy_input_npin(addr1_pin); add_a_input_pin_to_node_spot_idx(and1_node, taddr_pin, 0); connect_nodes(and1_node, 0, node, we1_pin_idx); node->input_pins[we1_pin_idx]->mapping = we1_pin->mapping; } if (we2_pin != NULL) { and2_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, node->traverse_visited); twe_pin = copy_input_npin(we2_pin); add_a_input_pin_to_node_spot_idx(and2_node, twe_pin, 1); taddr_pin = copy_input_npin(addr2_pin); add_a_input_pin_to_node_spot_idx(and2_node, taddr_pin, 0); connect_nodes(and2_node, 0, node, we2_pin_idx); node->input_pins[we2_pin_idx]->mapping = we2_pin->mapping; } if (we1_pin != NULL) { taddr_pin = copy_input_npin(addr1_pin); not1_node = make_not_gate_with_input(taddr_pin, new_mem_node, new_mem_node->traverse_visited); and1_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node, new_mem_node->traverse_visited); connect_nodes(not1_node, 0, and1_node, 0); add_a_input_pin_to_node_spot_idx(and1_node, we1_pin, 1); connect_nodes(and1_node, 0, new_mem_node, we1_pin_idx); new_mem_node->input_pins[we1_pin_idx]->mapping = we1_pin->mapping; } if (we2_pin != NULL) { taddr_pin = copy_input_npin(addr2_pin); not2_node = make_not_gate_with_input(taddr_pin, new_mem_node, new_mem_node->traverse_visited); and2_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node, new_mem_node->traverse_visited); connect_nodes(not2_node, 0, and2_node, 0); add_a_input_pin_to_node_spot_idx(and2_node, we2_pin, 1); connect_nodes(and2_node, 0, new_mem_node, we2_pin_idx); new_mem_node->input_pins[we2_pin_idx]->mapping = we2_pin->mapping; } if (node->num_output_pins > 0) /* There is an "out1" output */ { ff1_node = make_2port_gate(FF_NODE, 1, 1, 1, node, node->traverse_visited); add_a_input_pin_to_node_spot_idx(ff1_node, addr1_pin, 0); add_a_input_pin_to_node_spot_idx(ff1_node, copy_input_npin(clk_pin), 1); /* Copy over the output pins for the new memory */ for (j = 0; j < node->output_port_sizes[0]; j++) { mux1_node = make_2port_gate(MUX_2, 2, 2, 1, node, node->traverse_visited); connect_nodes(ff1_node, 0, mux1_node, 0); not1_node = make_not_gate(node, node->traverse_visited); connect_nodes(ff1_node, 0, not1_node, 0); connect_nodes(not1_node, 0, mux1_node, 1); tdout_pin = node->output_pins[j]; remap_pin_to_new_node(tdout_pin, mux1_node, 0); connect_nodes(node, j, mux1_node, 2); node->output_pins[j]->mapping = tdout_pin->mapping; connect_nodes(new_mem_node, j, mux1_node, 3); new_mem_node->output_pins[j]->mapping = tdout_pin->mapping; tdout_pin->mapping = NULL; mux1_node->output_pins[0]->name = mux1_node->name; } } if (node->num_output_pins > node->output_port_sizes[0]) /* There is an "out2" output */ { ff2_node = make_2port_gate(FF_NODE, 1, 1, 1, node, node->traverse_visited); add_a_input_pin_to_node_spot_idx(ff2_node, addr2_pin, 0); add_a_input_pin_to_node_spot_idx(ff2_node, copy_input_npin(clk_pin), 1); /* Copy over the output pins for the new memory */ for (j = 0; j < node->output_port_sizes[0]; j++) { mux2_node = make_2port_gate(MUX_2, 2, 2, 1, node, node->traverse_visited); connect_nodes(ff2_node, 0, mux2_node, 0); not2_node = make_not_gate(node, node->traverse_visited); connect_nodes(ff2_node, 0, not2_node, 0); connect_nodes(not2_node, 0, mux2_node, 1); tdout_pin = node->output_pins[node->output_port_sizes[0] + j]; remap_pin_to_new_node(tdout_pin, mux2_node, 0); connect_nodes(node, node->output_port_sizes[0] + j, mux2_node, 2); node->output_pins[node->output_port_sizes[0] + j]->mapping = tdout_pin->mapping; connect_nodes(new_mem_node, node->output_port_sizes[0] + j, mux2_node, 3); new_mem_node->output_pins[node->output_port_sizes[0] + j]->mapping = tdout_pin->mapping; tdout_pin->mapping = NULL; mux2_node->output_pins[0]->name = mux2_node->name; } } /* must recurse on new memory if it's too small */ if (logical_size <= split_size) { dp_memory_list = insert_in_vptr_list(dp_memory_list, new_mem_node); dp_memory_list = insert_in_vptr_list(dp_memory_list, node); } else { split_dp_memory_depth(node); split_dp_memory_depth(new_mem_node); } return; }
/*------------------------------------------------------------------------- * (function: split_sp_memory_depth) * * This function works to split the depth of a single port memory into * several smaller memories. *------------------------------------------------------------------------ */ void split_sp_memory_depth(nnode_t *node) { int data_port = -1; int clk_port = -1; int addr_port = -1; int we_port = -1; int logical_size; int i, j; int idx; int addr_pin_idx = 0; int we_pin_idx = 0; nnode_t *new_mem_node; nnode_t *and_node, *not_node, *mux_node, *ff_node; npin_t *addr_pin = NULL; npin_t *we_pin = NULL; npin_t *clk_pin = NULL; npin_t *tdout_pin; oassert(node->type == MEMORY); // Find which port is the addr port idx = 0; for (i = 0; i < node->num_input_port_sizes; i++) { //printf("%s\n", node->input_pins[idx]->mapping); if (strcmp("addr", node->input_pins[idx]->mapping) == 0) { addr_port = i; addr_pin_idx = idx; addr_pin = node->input_pins[idx]; } else if (strcmp("data", node->input_pins[idx]->mapping) == 0) { data_port = i; } else if (strcmp("we", node->input_pins[idx]->mapping) == 0) { we_port = i; we_pin = node->input_pins[idx]; we_pin_idx = idx; } else if (strcmp("clk", node->input_pins[idx]->mapping) == 0) { clk_port = i; clk_pin = node->input_pins[idx]; } idx += node->input_port_sizes[i]; } if (data_port == -1) { error_message(1, 0, -1, "No \"data\" port on single port RAM"); } if (addr_port == -1) { error_message(1, 0, -1, "No \"addr\" port on single port RAM"); } if (we_port == -1) { error_message(1, 0, -1, "No \"we\" port on single port RAM"); } if (clk_port == -1) { error_message(1, 0, -1, "No \"clk\" port on single port RAM"); } // Check that the memory needs to be split // Jason Luu HACK: Logical memory depth determination messed up, forced to use this method for(i = 0; i < node->input_port_sizes[addr_port]; i++) { if(strcmp(node->input_pins[addr_pin_idx + i]->name, "top^ZERO_PAD_ZERO") == 0) break; } logical_size = i; if (split_size <= 0) { printf("Unsupported feature! Split size must be a positive number\n"); exit(1); } if ((split_size > 0) && (logical_size <= split_size)) { sp_memory_list = insert_in_vptr_list(sp_memory_list, node); return; } // Let's remove the address line from the memory for (i = addr_pin_idx; i < node->num_input_pins - 1; i++) { node->input_pins[i] = node->input_pins[i+1]; node->input_pins[i]->pin_node_idx--; } node->input_port_sizes[addr_port]--; node->input_pins = realloc(node->input_pins, sizeof(npin_t *) * --node->num_input_pins); if (we_pin_idx >= addr_pin_idx) we_pin_idx--; // Create the new memory node new_mem_node = allocate_nnode(); // Append the new name with an __H new_mem_node->name = append_string(node->name, "__H"); { // Append the old name with an __S char *new_name = append_string(node->name, "__S"); free(node->name); node->name = new_name; } // Copy properties from the original memory node new_mem_node->type = node->type; new_mem_node->related_ast_node = node->related_ast_node; new_mem_node->traverse_visited = node->traverse_visited; add_output_port_information(new_mem_node, node->num_output_pins); allocate_more_node_output_pins (new_mem_node, node->num_output_pins); for (j = 0; j < node->num_input_port_sizes; j++) add_input_port_information(new_mem_node, node->input_port_sizes[j]); // Copy over the input pins for the new memory, excluding we allocate_more_node_input_pins (new_mem_node, node->num_input_pins); for (j = 0; j < node->num_input_pins; j++) { if (j != we_pin_idx) add_a_input_pin_to_node_spot_idx(new_mem_node, copy_input_npin(node->input_pins[j]), j); } and_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, node, node->traverse_visited); add_a_input_pin_to_node_spot_idx(and_node, we_pin, 1); add_a_input_pin_to_node_spot_idx(and_node, addr_pin, 0); connect_nodes(and_node, 0, node, we_pin_idx); node->input_pins[we_pin_idx]->mapping = we_pin->mapping; not_node = make_not_gate_with_input(copy_input_npin(addr_pin), new_mem_node, new_mem_node->traverse_visited); and_node = make_2port_gate(LOGICAL_AND, 1, 1, 1, new_mem_node, new_mem_node->traverse_visited); connect_nodes(not_node, 0, and_node, 0); add_a_input_pin_to_node_spot_idx(and_node, copy_input_npin(we_pin), 1); connect_nodes(and_node, 0, new_mem_node, we_pin_idx); new_mem_node->input_pins[we_pin_idx]->mapping = we_pin->mapping; ff_node = make_2port_gate(FF_NODE, 1, 1, 1, node, node->traverse_visited); add_a_input_pin_to_node_spot_idx(ff_node, copy_input_npin(addr_pin), 0); add_a_input_pin_to_node_spot_idx(ff_node, copy_input_npin(clk_pin), 1); // Copy over the output pins for the new memory for (j = 0; j < node->num_output_pins; j++) { mux_node = make_2port_gate(MUX_2, 2, 2, 1, node, node->traverse_visited); connect_nodes(ff_node, 0, mux_node, 0); not_node = make_not_gate(node, node->traverse_visited); connect_nodes(ff_node, 0, not_node, 0); connect_nodes(not_node, 0, mux_node, 1); tdout_pin = node->output_pins[j]; remap_pin_to_new_node(tdout_pin, mux_node, 0); connect_nodes(node, j, mux_node, 2); node->output_pins[j]->mapping = tdout_pin->mapping; connect_nodes(new_mem_node, j, mux_node, 3); new_mem_node->output_pins[j]->mapping = tdout_pin->mapping; tdout_pin->mapping = NULL; mux_node->output_pins[0]->name = mux_node->name; } // must recurse on new memory if it's too small if (logical_size <= split_size) { sp_memory_list = insert_in_vptr_list(sp_memory_list, new_mem_node); sp_memory_list = insert_in_vptr_list(sp_memory_list, node); } else { split_sp_memory_depth(node); split_sp_memory_depth(new_mem_node); } return; }