SENNA_CHK *SENNA_CHK_new(const char *path, const char *subpath) { SENNA_CHK *chk = SENNA_malloc(sizeof(SENNA_CHK), 1); FILE *f; float dummy; memset(chk, 0, sizeof(SENNA_CHK)); f = SENNA_fopen(path, subpath, "rb"); SENNA_fread(&chk->window_size, sizeof(int), 1, f); SENNA_fread_tensor_2d(&chk->ll_word_weight, &chk->ll_word_size, &chk->ll_word_max_idx, f); SENNA_fread_tensor_2d(&chk->ll_caps_weight, &chk->ll_caps_size, &chk->ll_caps_max_idx, f); SENNA_fread_tensor_2d(&chk->ll_posl_weight, &chk->ll_posl_size, &chk->ll_posl_max_idx, f); SENNA_fread_tensor_2d(&chk->l1_weight, &chk->input_state_size, &chk->hidden_state_size, f); SENNA_fread_tensor_1d(&chk->l1_bias, &chk->hidden_state_size, f); SENNA_fread_tensor_2d(&chk->l2_weight, &chk->hidden_state_size, &chk->output_state_size, f); SENNA_fread_tensor_1d(&chk->l2_bias, &chk->output_state_size, f); SENNA_fread_tensor_1d(&chk->viterbi_score_init, &chk->output_state_size, f); SENNA_fread_tensor_2d(&chk->viterbi_score_trans, &chk->output_state_size, &chk->output_state_size, f); SENNA_fread(&chk->ll_word_padding_idx, sizeof(int), 1, f); SENNA_fread(&chk->ll_caps_padding_idx, sizeof(int), 1, f); SENNA_fread(&chk->ll_posl_padding_idx, sizeof(int), 1, f); SENNA_fread(&dummy, sizeof(float), 1, f); SENNA_fclose(f); if ((int)dummy != 777) SENNA_error("chk: data corrupted (or not IEEE floating computer)"); chk->input_state = NULL; chk->hidden_state = SENNA_malloc(sizeof(float), chk->hidden_state_size); chk->output_state = NULL; chk->labels = NULL; /* some info if you want verbose */ SENNA_message("chk: window size: %d", chk->window_size); SENNA_message("chk: vector size in word lookup table: %d", chk->ll_word_size); SENNA_message("chk: word lookup table size: %d", chk->ll_word_max_idx); SENNA_message("chk: vector size in caps lookup table: %d", chk->ll_caps_size); SENNA_message("chk: caps lookup table size: %d", chk->ll_caps_max_idx); SENNA_message("chk: vector size in pos lookup table: %d", chk->ll_posl_size); SENNA_message("chk: pos lookup table size: %d", chk->ll_posl_max_idx); SENNA_message("chk: number of hidden units: %d", chk->hidden_state_size); SENNA_message("chk: number of classes: %d", chk->output_state_size); return chk; }
void SENNA_nn_viterbi(int *path, float *init, float *transition, float *emission, int N, int T) { float *delta, *deltap; int *phi; int i, j, t; /* misc allocations */ delta = SENNA_malloc(sizeof(float), N); deltap = SENNA_malloc(sizeof(float), N); phi = SENNA_malloc(sizeof(float), N * T); /* init */ for (i = 0; i < N; i++) deltap[i] = init[i] + emission[i]; /* recursion */ for (t = 1; t < T; t++) { float *deltan = delta; for (j = 0; j < N; j++) { float maxValue = -FLT_MAX; int maxIndex = 0; for (i = 0; i < N; i++) { float z = deltap[i] + transition[i + j * N]; if (z > maxValue) { maxValue = z; maxIndex = i; } } delta[j] = maxValue + emission[j + t * N]; phi[j + t * N] = maxIndex; } delta = deltap; deltap = deltan; } { float maxValue = -FLT_MAX; int maxIndex = 0; for (j = 0; j < N; j++) { if (deltap[j] > maxValue) { maxValue = deltap[j]; maxIndex = j; } } path[T - 1] = maxIndex; } for (t = T - 2; t >= 0; t--) path[t] = phi[path[t + 1] + (t + 1) * N]; SENNA_free(delta); SENNA_free(deltap); SENNA_free(phi); }
SENNA_Hash* SENNA_Hash_new(const char *path, const char *filename) { FILE *f; SENNA_Hash *hash; char **keys = NULL; int n_keys; char key[MAX_KEY_SIZE]; //@AureDi temporary array of key, which stores the characters from filenames. int i; SENNA_message("loading hash: %s%s", (path ? path : ""), (filename ? filename : "")); //@AureDi Count the number of key (maximum is 256 characters) f = SENNA_fopen(path, filename, "rt"); /* the t is to comply with Windows */ n_keys = 0; while(fgets(key, MAX_KEY_SIZE, f)) n_keys++; //@AureDi Remember reading how many times SENNA_fclose(f); keys = SENNA_malloc(n_keys, sizeof(char*)); //@AureDi f = SENNA_fopen(path, filename, "rt"); /* the t is to comply with Windows */ n_keys = 0; while(fgets(key, MAX_KEY_SIZE, f)) { int key_size = strlen(key); //@ We can assume that the length of each line is not exceeded 255. key[key_size-1] = '\0'; /* discard the newline */ //@AureDi \0 is the sign of the end. Because this method is applied to process single word, so we can assume the end word is newline character . keys[n_keys] = SENNA_malloc(key_size, sizeof(char)); strcpy(keys[n_keys], key); n_keys++; } SENNA_fclose(f); hash = SENNA_malloc(sizeof(SENNA_Hash), 1); hash->keys = keys; //@AureDi keys is the pointer of second array. hash->size = n_keys; //@AureDi n_keys is the length of second aaay hash->is_admissible_key = NULL; //@AUreDi admissible /* sorted or unsorted hash ? */ /* (unsorted cannot return an index for a key) */ hash->is_sorted = 1; // uper is roght for(i = 0; i < n_keys-1; i++) { if(strcmp(keys[i], keys[i+1]) >= 0) { hash->is_sorted = 0; break; } } return hash; }
FILE *SENNA_fopen(const char *path, const char *subpath, const char *mode) { FILE *f; char *complete_path = NULL; if(!path && !subpath) SENNA_error("SENNA_fopen: path or subpath should be non NULL"); if(path && subpath) { int pathsize = strlen(path); int subpathsize = strlen(subpath); complete_path = SENNA_malloc(sizeof(char), pathsize+subpathsize+1); strcpy(complete_path, path); strcpy(complete_path+pathsize, subpath); } f = fopen((complete_path ? complete_path : (path ? path : subpath)), mode); if(!f) SENNA_error("unable to open file <%s%s>", (path ? path : ""), (subpath ? subpath : "")); if(sizeof(char) != 1) SENNA_error("char size is not 1, sorry can't load binary files"); if(sizeof(int) != 4) SENNA_error("int size is not 4, sorry can't load binary files"); if(sizeof(float) != 4) SENNA_error("float size is not 1, sorry can't load binary files"); SENNA_free(complete_path); return f; }
static void buffer_reverse_memory(void *ptr_, int block_size, int n_blocks) { char *ptr; char *ptrr; char *ptrw; int i, j; char *buffer_block; if(block_size == 1) return; ptr = (char *)ptr_; buffer_block = SENNA_malloc(sizeof(char), block_size); for(i = 0; i < n_blocks; i++) { ptrr = ptr + ((i+1)*block_size); ptrw = buffer_block; for(j = 0; j < block_size; j++) { ptrr--; *ptrw++ = *ptrr; } ptrr = buffer_block; ptrw = ptr + (i*block_size); for(j = 0; j < block_size; j++) *ptrw++ = *ptrr++; } SENNA_free(buffer_block); }
void SENNA_fread_tensor_2d(float **ptr, int *n_row, int *n_column, FILE *stream) { SENNA_fread(n_row, sizeof(int), 1, stream); SENNA_fread(n_column, sizeof(int), 1, stream); *ptr = SENNA_malloc(sizeof(float), (*n_row)*(*n_column)); SENNA_fread(*ptr, sizeof(float), (*n_row)*(*n_column), stream); }
SENNA_PSG* SENNA_PSG_new(const char *path, const char *subpath) { SENNA_PSG *psg = SENNA_malloc(sizeof(SENNA_PSG), 1); FILE *f; float dummy; memset(psg, 0, sizeof(SENNA_PSG)); f = SENNA_fopen(path, subpath, "rb"); SENNA_fread_tensor_2d(&psg->ll_word_weight, &psg->ll_word_size, &psg->ll_word_max_idx, f); SENNA_fread_tensor_2d(&psg->ll_caps_weight, &psg->ll_caps_size, &psg->ll_caps_max_idx, f); SENNA_fread_tensor_2d(&psg->ll_posl_weight, &psg->ll_posl_size, &psg->ll_posl_max_idx, f); SENNA_fread_tensor_2d(&psg->ll_psgl_weight, &psg->ll_psgl_size, &psg->ll_psgl_max_idx, f); SENNA_fread_tensor_2d(&psg->l1_weight, &psg->input_state_size, &psg->l1_state_size, f); SENNA_fread_tensor_1d(&psg->l1_bias, &psg->l1_state_size, f); SENNA_fread_tensor_2d(&psg->l2_bias, &psg->l1_state_size, &psg->window_size, f); SENNA_fread_tensor_2d(&psg->l3_weight, &psg->l2_state_size, &psg->l3_state_size, f); SENNA_fread_tensor_1d(&psg->l3_bias, &psg->l3_state_size, f); SENNA_fread_tensor_2d(&psg->l4_weight, &psg->l3_state_size, &psg->l4_state_size, f); SENNA_fread_tensor_1d(&psg->l4_bias, &psg->l4_state_size, f); SENNA_fread_tensor_1d(&psg->viterbi_score_init, &psg->l4_state_size, f); SENNA_fread_tensor_2d(&psg->viterbi_score_trans, &psg->l4_state_size, &psg->l4_state_size, f); SENNA_fread(&psg->ll_word_padding_idx, sizeof(int), 1, f); SENNA_fread(&psg->ll_caps_padding_idx, sizeof(int), 1, f); SENNA_fread(&psg->ll_posl_padding_idx, sizeof(int), 1, f); SENNA_fread(&psg->ll_psgl_padding_idx, sizeof(int), 1, f); SENNA_fread(&dummy, sizeof(float), 1, f); SENNA_fclose(f); if((int)dummy != 777) SENNA_error("psg: data corrupted (or not IEEE floating computer)"); psg->input_state = NULL; psg->l1_state = NULL; psg->l2_state = NULL; psg->l3_state = NULL; psg->l4_state = NULL; psg->labels = NULL; psg->treillis = SENNA_Treillis_new(); return psg; }
SENNA_Hash *SENNA_Hash_new_with_admissible_keys(const char *path, const char *filename, const char *admissible_keys_filename) { SENNA_Hash *hash = SENNA_Hash_new(path, filename); FILE *f; int admissiblekeyssize = 0; f = SENNA_fopen(path, admissible_keys_filename, "rb"); //@Aure b means that the file is binary file. SENNA_fseek(f, 0, SEEK_END); //@Aure #define SEEK_END 2 Reposition stream position indicator admissiblekeyssize = SENNA_ftell(f); //@ Get current position in stream if(admissiblekeyssize != hash->size) SENNA_error("inconsistent hash and admissible key files"); SENNA_fseek(f, 0, SEEK_SET); hash->is_admissible_key = SENNA_malloc(sizeof(char), admissiblekeyssize); SENNA_fread(hash->is_admissible_key, 1, admissiblekeyssize, f); //@ Read block of data from stream to char array is_admissible_key. SENNA_fclose(f); return hash; }
void SENNA_PSG_forward(SENNA_PSG *psg, const int *sentence_words, const int *sentence_caps, const int *sentence_posl, int sentence_size, int **labels_, int *n_level_) { int *sentence_psgl = SENNA_malloc(sizeof(int), sentence_size); int *sentence_segl = SENNA_malloc(sizeof(int), sentence_size); int *start_and_sentence_level_label = SENNA_malloc(sizeof(int), sentence_size+1); int t; int level; for(t = 0; t < sentence_size; t++) { sentence_psgl[t] = 0; sentence_segl[t] = 0; } psg->input_state = SENNA_realloc(psg->input_state, sizeof(float), sentence_size*psg->input_state_size); psg->l1_state = SENNA_realloc(psg->l1_state, sizeof(float), sentence_size*psg->l1_state_size); psg->l2_state = SENNA_realloc(psg->l2_state, sizeof(float), sentence_size*psg->l2_state_size); psg->l3_state = SENNA_realloc(psg->l3_state, sizeof(float), sentence_size*psg->l3_state_size); psg->l4_state = SENNA_realloc(psg->l4_state, sizeof(float), sentence_size*psg->l4_state_size); SENNA_nn_lookup(psg->input_state, psg->input_state_size, psg->ll_word_weight, psg->ll_word_size, psg->ll_word_max_idx, sentence_words, sentence_size, 0, 0); SENNA_nn_lookup(psg->input_state+psg->ll_word_size, psg->input_state_size, psg->ll_caps_weight, psg->ll_caps_size, psg->ll_caps_max_idx, sentence_caps, sentence_size, 0, 0); SENNA_nn_lookup(psg->input_state+psg->ll_word_size+psg->ll_caps_size, psg->input_state_size, psg->ll_posl_weight, psg->ll_posl_size, psg->ll_posl_max_idx, sentence_posl, sentence_size, 0, 0); level = 0; while(1) { int all_tags_are_o; int all_in_one_segment; SENNA_nn_lookup(psg->input_state+psg->ll_word_size+psg->ll_caps_size+psg->ll_posl_size, psg->input_state_size, psg->ll_psgl_weight, psg->ll_psgl_size, psg->ll_psgl_max_idx, sentence_psgl, sentence_size, 0, 0); SENNA_nn_temporal_convolution(psg->l1_state, psg->l1_state_size, psg->l1_weight, psg->l1_bias, psg->input_state, psg->input_state_size, sentence_size, 1); SENNA_nn_temporal_max_convolution(psg->l2_state, psg->l2_bias, psg->l1_state, psg->l1_state_size, sentence_size, psg->window_size); SENNA_nn_temporal_convolution(psg->l3_state, psg->l3_state_size, psg->l3_weight, psg->l3_bias, psg->l2_state, psg->l1_state_size, sentence_size, 1); SENNA_nn_hardtanh(psg->l3_state, psg->l3_state, psg->l3_state_size*sentence_size); SENNA_nn_temporal_convolution(psg->l4_state, psg->l4_state_size, psg->l4_weight, psg->l4_bias, psg->l3_state, psg->l3_state_size, sentence_size, 1); SENNA_Treillis_buildfromscorewithsegmentation(psg->treillis, psg->l4_state, psg->viterbi_score_init, psg->viterbi_score_trans, sentence_segl, psg->l4_state_size, sentence_size); SENNA_Treillis_viterbi(psg->treillis, start_and_sentence_level_label); /* update history and segmentation */ all_tags_are_o = 1; for(t = 0; t < sentence_size; t++) { if(start_and_sentence_level_label[t+1]) { sentence_psgl[t] = start_and_sentence_level_label[t+1]; /* note we always keep if something was there */ sentence_segl[t] = (start_and_sentence_level_label[t+1]-1)%4+1; all_tags_are_o = 0; } } /* check if only one big segment */ if(sentence_size == 1) all_in_one_segment = (sentence_segl[0] == SEG_S); else all_in_one_segment = (sentence_segl[0] == SEG_B) && (sentence_segl[sentence_size-1] == SEG_E); for(t = 1; all_in_one_segment && (t < sentence_size-1); t++) { if(sentence_segl[t] != SEG_I) all_in_one_segment = 0; } level++; if(psg->max_labels_size < sentence_size*level) { psg->labels = SENNA_realloc(psg->labels, sizeof(float), sentence_size*level); psg->max_labels_size = sentence_size*level; } memcpy(psg->labels+(level-1)*sentence_size, start_and_sentence_level_label+1, sizeof(float)*sentence_size); if(all_in_one_segment || all_tags_are_o) break; } free(sentence_psgl); free(sentence_segl); free(start_and_sentence_level_label); *labels_ = psg->labels; *n_level_ = level; }
void SENNA_fread_tensor_1d(float **ptr, int *n_row, FILE *stream) { SENNA_fread(n_row, sizeof(int), 1, stream); *ptr = SENNA_malloc(sizeof(float), *n_row); SENNA_fread(*ptr, sizeof(float), *n_row, stream); }
SENNA_SRL *SENNA_SRL_new(const char *path, const char *subpath) { SENNA_SRL *srl = SENNA_malloc(sizeof(SENNA_SRL), 1); FILE *f; float dummy; int dummy_size; f = SENNA_fopen(path, subpath, "rb"); SENNA_fread(&srl->window_size, sizeof(int), 1, f); SENNA_fread_tensor_2d(&srl->ll_word_weight, &srl->ll_word_size, &srl->ll_word_max_idx, f); SENNA_fread_tensor_2d(&srl->ll_caps_weight, &srl->ll_caps_size, &srl->ll_caps_max_idx, f); SENNA_fread_tensor_2d(&srl->ll_chkl_weight, &srl->ll_chkl_size, &srl->ll_chkl_max_idx, f); SENNA_fread_tensor_2d(&srl->ll_posv_weight, &srl->ll_posv_size, &srl->ll_posv_max_idx, f); SENNA_fread_tensor_2d(&srl->ll_posw_weight, &srl->ll_posw_size, &srl->ll_posw_max_idx, f); SENNA_fread_tensor_2d(&srl->l1_weight_wcc, &dummy_size, &srl->hidden_state1_size, f); SENNA_fread_tensor_2d(&srl->l1_weight_pv, &dummy_size, &srl->hidden_state1_size, f); SENNA_fread_tensor_2d(&srl->l1_weight_pw, &dummy_size, &srl->hidden_state1_size, f); SENNA_fread_tensor_1d(&srl->l1_bias, &srl->hidden_state1_size, f); SENNA_fread_tensor_2d(&srl->l3_weight, &srl->hidden_state1_size, &srl->hidden_state3_size, f); SENNA_fread_tensor_1d(&srl->l3_bias, &srl->hidden_state3_size, f); SENNA_fread_tensor_2d(&srl->l4_weight, &srl->hidden_state3_size, &srl->output_state_size, f); SENNA_fread_tensor_1d(&srl->l4_bias, &srl->output_state_size, f); SENNA_fread_tensor_1d(&srl->viterbi_score_init, &srl->output_state_size, f); SENNA_fread_tensor_2d(&srl->viterbi_score_trans, &srl->output_state_size, &srl->output_state_size, f); SENNA_fread(&srl->ll_word_padding_idx, sizeof(int), 1, f); SENNA_fread(&srl->ll_caps_padding_idx, sizeof(int), 1, f); SENNA_fread(&srl->ll_chkl_padding_idx, sizeof(int), 1, f); SENNA_fread(&dummy, sizeof(float), 1, f); SENNA_fclose(f); if ((int)dummy != 777) SENNA_error("srl: data corrupted (or not IEEE floating computer)"); /* states */ srl->sentence_posv = NULL; srl->sentence_posw = NULL; srl->input_state = NULL; srl->input_state_wcc = NULL; srl->input_state_pv = NULL; srl->input_state_pw = NULL; srl->hidden_state1 = NULL; srl->hidden_state1_wcc = NULL; srl->hidden_state1_pv = NULL; srl->hidden_state1_pw = NULL; srl->hidden_state2 = NULL; srl->hidden_state3 = NULL; srl->output_state = NULL; srl->labels = NULL; srl->labels_size = 0; srl->service = false; srl->debug = false; srl->calls = 0; srl->dnntime = 0; srl->apptime = 0; /* some info if you want verbose */ SENNA_message("srl: window size: %d", srl->window_size); SENNA_message("srl: vector size in word lookup table: %d", srl->ll_word_size); SENNA_message("srl: word lookup table size: %d", srl->ll_word_max_idx); SENNA_message("srl: vector size in caps lookup table: %d", srl->ll_caps_size); SENNA_message("srl: caps lookup table size: %d", srl->ll_caps_max_idx); SENNA_message("srl: vector size in verb position lookup table: %d", srl->ll_posv_size); SENNA_message("srl: verb position lookup table size: %d", srl->ll_posv_max_idx); SENNA_message("srl: vector size in word position lookup table: %d", srl->ll_posw_size); SENNA_message("srl: word position lookup table size: %d", srl->ll_posw_max_idx); SENNA_message("srl: number of hidden units (convolution): %d", srl->hidden_state1_size); SENNA_message("srl: number of hidden units (hidden layer): %d", srl->hidden_state3_size); SENNA_message("srl: number of classes: %d", srl->output_state_size); return srl; }