void Sheet::read (const char* spec) { FILE* f = fopen (spec, "r"); if (!f) { printf ("error - cannot open '%s'\n", spec); exit (1); } int min_len = 0x7ffffff; int max_len = 0; n_rows = 0; for (;;) { char* line; fgets_no_control_m (&line, f); if (feof(f)) break; ++n_rows; char** words; int n_words; split_by_delimiter_blank (line, &words, &n_words, "\t"); free (line); words_free (words, n_words); min_len = min (min_len, n_words); max_len = max (max_len, n_words); } /* if (min_len != max_len) { printf ("error - bad lengths in '%s'\n", spec); exit (1); } */ n_cols = max_len; data = (char***)malloc ((n_rows+1)*sizeof(char**)); rewind (f); for (int i = 1; i <= n_rows; i++) { char* line; fgets_no_control_m (&line, f); int n_words; split_by_delimiter_blank (line, &data[i], &n_words, "\t"); free (line); for (int j = 1; j <= n_words; j++) trim (data[i][j]); } fclose (f); }
static void go(char *lang1, char *lang2, char *lexfile1, char *dicfile1, char *lexfile2, char *dicfile2, char *outfile) { Words *lex1, *lex2; wchar_t *string1 = NULL; wchar_t *string2 = NULL; nat_uint32_t ptr1 = 0; nat_uint32_t ptr2 = 0; nat_uint32_t size1, size2; NATCell *cells1 = NULL; NATCell *cells2 = NULL; nat_uint32_t cellptr1 = 0; nat_uint32_t cellptr2 = 0; nat_uint32_t *tab1 = NULL; nat_uint32_t *tab2 = NULL; /* ---- First ------------------------------- */ lex1 = words_quick_load(lexfile1); if (!lex1) { fprintf(stderr, "Error loading lexicon 1\n"); exit(1); } size1 = 11 * lex1->count; string1 = g_new0(wchar_t, size1); if (!string1) { fprintf(stderr, "Error allocating string1\n"); exit(1); } cells1 = g_new0(NATCell, lex1->count + 1); if (!cells1) { fprintf(stderr, "Error allocating cells1\n"); exit(1); } tab1 = g_new0(nat_uint32_t, lex1->count + 1); if (!tab1) { fprintf(stderr, "Error allocating tab1\n"); exit(1); } tab1[0] = tab1[1] = lex1->count-1; ptr1 = tree_to_array(lex1->count,string1, cells1, lex1->tree, ptr1, size1, &cellptr1, tab1); cells1[cellptr1].offset = ptr1; cells1[cellptr1].count = 0; cells1[cellptr1].id = cellptr1; cellptr1++; g_message("** Preparing source Lexicon **"); g_message("\tPtr is at %u and original size was %u", ptr1, size1); g_message("\tOffset on the array is %u", cellptr1); g_message("\tNULL is pointing to %u", tab1[0]); /* ---- Second ------------------------------ */ lex2 = words_quick_load(lexfile2); if (!lex2) report_error("Error loading lexicon 2\n"); size2 = 11*lex2->count; string2 = g_new0(wchar_t, size2); if (!string2) report_error("Error allocating string2\n"); cells2 = g_new0(NATCell, lex2->count+1); if (!cells2) report_error("Error allocating cells2\n"); tab2 = g_new0(nat_uint32_t, lex2->count+1); if (!tab2) report_error("Error allocating tab2\n"); tab2[0] = tab2[1] = lex2->count-1; ptr2 = tree_to_array(lex2->count,string2, cells2, lex2->tree, ptr2, size2, &cellptr2, tab2); cells2[cellptr2].offset = ptr2; cells2[cellptr2].count = 0; cells2[cellptr2].id = cellptr2; cellptr2++; g_message("** Preparing target Lexicon **"); g_message("\tPtr is at %u and original size was %u", ptr2, size2); g_message("\tOffset on the array is %u", cellptr2); g_message("\tNULL is pointing to %u", tab2[0]); save(outfile, lang1, lang2, dicfile1, tab1, dicfile2, tab2, string1, ptr1, cells1, cellptr1, string2, ptr2, cells2, cellptr2); /* save(outfile, lang1, lang2, dicfile1, tab1, dicfile2, tab2, string1, ptr1, cells1, lex1->count, string2, ptr2, cells2, lex2->count); */ words_free(lex1); words_free(lex2); }
bool string_matches_pattern (const char* string, const char* passed_pattern) { /* is the whole pattern stars? ... */ const char* p; bool all_stars; for (p = passed_pattern, all_stars = true; *p; p++) if (*p != '*') { all_stars = false; break; } /* ... if so, it matches everything */ if (all_stars) return true; /* if wildcarding is disabled, just do a string match */ if (*passed_pattern == '\\') return strequal (string, passed_pattern+1); char *pattern; strcpy (&pattern, passed_pattern); /* split the pattern into non-star groups, eg "?[a-c]**[abe-hkr-s]?*" => n_sub_pats = 2 sub_pats[1] = "?[a-c]", sub_pats[2] = "[abe-hkr-s]?" The idea is that each * (or run of them) means "omit as many characters as necessary (including none) in order to be able to match the next set of characters in the string" */ /* allow for a * in [...] */ int len = strlen (pattern); bool in_brackets = false; for (int i = 0; i < len; i++) { /* not perfect ... */ if (pattern[i] == '[') in_brackets = true; else if (pattern[i] == ']') in_brackets = false; if (pattern[i] == '*' && in_brackets) pattern[i] = 1; } char** sub_pats; int n_sub_pats; split_by_delimiter (pattern, &sub_pats, &n_sub_pats, "*"); /* undo the star encoding */ for (int j = 1; j <= n_sub_pats; j++) { len = strlen (sub_pats[j]); in_brackets = false; for (int i = 0; i < len; i++) { /* not perfect ... */ if (sub_pats[j][i] == '[') in_brackets = true; else if (sub_pats[j][i] == ']') in_brackets = false; if (sub_pats[j][i] == 1 && in_brackets) sub_pats[j][i] = '*'; } } /* does the whole pattern start and/or end with stars? */ bool star_at_start = pattern[0] == '*'; bool star_at_end = pattern[strlen(pattern)-1] == '*'; bool matched = do_match (string, sub_pats, n_sub_pats, star_at_start, star_at_end); /* free-off allocated space*/ words_free (sub_pats, n_sub_pats); free (pattern); return matched; }