static char * category_file(void) { if (dragonfly) (void) snprintf(locpath, sizeof (locpath), "%s.%s", locname, category_name()); else (void) snprintf(locpath, sizeof (locpath), "%s/%s", locname, category_name()); return (locpath); }
int average() { struct Entry *temp; if (head == NULL) { printf("No records found.\n"); return -1; } else { float possible_points = 0.0; float earned_points = 0.0; temp = head; while (temp != NULL) { printf("\n\nName: %s\n", temp->name); printf("Category: %s\n", category_name(temp->type)); printf("Possible Points: %f\n", temp->possible_points); printf("Points Earned: %f\n", temp->points); possible_points += temp->possible_points; earned_points += temp->points; temp = temp->next; } if (possible_points != 0) printf("\n\nAverage Grade: %0.2f%%\n\n", (earned_points / possible_points) * 100); return 0; } }
FILE * open_category(void) { FILE *file; if (verbose) { (void) printf("Writing category %s: ", category_name()); (void) fflush(stdout); } /* make the parent directory */ if (!dragonfly) (void) mkdir(dirname(category_file()), 0755); /* * note that we have to regenerate the file name, as dirname * clobbered it. */ file = fopen(category_file(), "w"); if (file == NULL) { errf(strerror(errno)); return (NULL); } return (file); }
static char * category_file(void) { (void) snprintf(locpath, sizeof (locpath), "%s/%s/LCL_DATA", locname, category_name()); return (locpath); }
/* Report versbose identifier info */ void input_verbose(cuuid_t u) { char sflag; int version = 0; char structure[32]; char rev[32]; switch (u.version) { case QUID_REV4: version = 4; strcpy(structure, "memgrep"); strcpy(rev, "REV2012"); break; case QUID_REV7: version = 7; strcpy(structure, "ChaCha/4"); strcpy(rev, "REV2017"); break; } printf("---------------------------------------------\n"); printf("Health : %s\n", quid_validate(&u) ? "OK" : "INVALID"); printf("Timestamp (UTC) : %s", asctime(quid_timestamp(&u))); printf("Microtime : %fms\n", quid_microtime(&u)/1000.0); printf("Structure : %s\n", structure); printf("Cuuid version : %d (%s)\n", version, rev); printf("Tag : %s\n", quid_tag(&u)); printf("Category : %s\n", category_name(quid_category(&u))); /* Remove NULL */ sflag = (quid_flag(&u) ^ IDF_NULL); printf("Flags :\n"); if (sflag & FLAG_PUBLIC) { printf(" * Public\n"); } if (sflag & FLAG_IDSAFE) { printf(" * Safe\n"); } if (sflag & FLAG_MASTER) { printf(" * master\n"); } if (sflag & FLAG_SIGNED) { printf(" * Signed\n"); } if (sflag & FLAG_TAGGED) { printf(" * Tagged\n"); } if (sflag & FLAG_STRICT) { printf(" * Strict\n"); } printf("---------------------------------------------\n"); }
/* * This function is used when copying the category from another * locale. Note that the copy is actually performed using a hard * link for efficiency. */ void copy_category(char *src) { char srcpath[PATH_MAX]; int rv; (void) snprintf(srcpath, sizeof (srcpath), "%s/%s", src, category_name()); rv = access(srcpath, R_OK); if ((rv != 0) && (strchr(srcpath, '/') == NULL)) { /* Maybe we should try the system locale */ (void) snprintf(srcpath, sizeof (srcpath), "/usr/lib/locale/%s/%s", src, category_name()); rv = access(srcpath, R_OK); } if (rv != 0) { fprintf(stderr,"source locale data unavailable: %s", src); return; } if (verbose > 1) { (void) printf("Copying category %s from %s: ", category_name(), src); (void) fflush(stdout); } /* make the parent directory */ if (!dragonfly) (void) mkdir(dirname(category_file()), 0755); if (link(srcpath, category_file()) != 0) { fprintf(stderr,"unable to copy locale data: %s", strerror(errno)); return; } if (verbose > 1) { (void) printf("done.\n"); } }
wstring TransferRule::gen_apertium_transfer_head(bool debug) { locale loc(setlocale(LC_ALL,"")); wstring head=L""; head+=L"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; head+=L"<!-- -*- nxml -*- -->\n"; head+=L"<transfer>\n"; head+=L"<section-def-cats>\n"; set<pair<wstring,wstring> >::iterator it; for(it=categories.begin(); it!=categories.end(); it++) { head+=L" <def-cat n=\"CAT__"+category_name(it->first,it->second)+L"\">\n"; if (it->first.length()>0) //There is a lemma { head+=L" <cat-item lemma=\""+StringUtils::substitute(it->first,L"_",L" ")+L"\" tags=\""+escape_attr(it->second)+L"\"/>\n"; //Lemma in upper case too head+=L" <cat-item lemma=\""; head+=toupper<wchar_t>(StringUtils::substitute(it->first,L"_",L" ")[0],loc); head+=StringUtils::substitute(it->first,L"_",L" ").substr(1)+L"\" tags=\""+escape_attr(it->second)+L"\"/>\n"; } else { head+=L" <cat-item tags=\""+escape_attr(it->second)+L"\"/>\n"; if(sm_generalise || it->second[it->second.size()-1]==L'*' ) head+=L" <cat-item tags=\""+escape_attr((it->second).substr(0,it->second.size()-2))+L"\"/>\n"; } head+=L" </def-cat>\n"; } head+=L"<def-cat n=\"CAT__HASGENDER_NUMBER\"><cat-item tags=\"*.m.*\"/><cat-item tags=\"*.f.*\"/><cat-item tags=\"*.sg.*\"/><cat-item tags=\"*.pl.*\"/></def-cat>"; head+=L" <def-cat n=\"CAT__ND_GD\">\n"; head+=L" <cat-item tags=\"*.mf.*\"/>\n"; head+=L" <cat-item tags=\"*.sp.*\"/>\n"; head+=L" <cat-item tags=\"*.mf.sp.*\"/>\n"; head+=L" <cat-item tags=\"*.sp.mf.*\"/>\n"; head+=L" <cat-item tags=\"*.mf.*.sp.*\"/>\n"; head+=L" <cat-item tags=\"*.sp.*.mf.*\"/>\n"; head+=L" </def-cat>\n"; head+=L" <def-cat n=\"sent\">\n"; head+=L" <cat-item tags=\"sent\"/>\n"; head+=L" <cat-item tags=\"RESsent.sent\"/>\n"; head+=L" </def-cat>\n"; head+=L" <def-cat n=\"any\">\n"; head+=L" <cat-item tags=\"*\"/>\n"; head+=L" </def-cat>\n"; head+=L"</section-def-cats>\n"; head+=L"<section-def-attrs>\n"; //set<string>::iterator it2; //for(it2=attributes.begin(); it2!=attributes.end(); it2++) { // head+=" <def-attr n=\"ATTR__"+category_name("",(*it2))+"\">\n"; // head+=" <attr-item tags=\""+(*it2)+"\"/>\n"; // head+=" </def-attr>\n"; //} map<wstring,pair< set<wstring>, set<wstring> > >::iterator it2; for(it2=attributes.begin(); it2!=attributes.end(); it2++) { head+=L" <def-attr n=\"learned_"+(*it2).first+L"\">\n"; set<wstring> tagsfromattr=(*it2).second.first; for(set<wstring>::iterator it3= tagsfromattr.begin(); it3!=tagsfromattr.end(); ++it3) head+=L" <attr-item tags=\""+StringUtils::substitute((*it3),L"+",L"\\+")+L"\"/>\n"; head+=L" </def-attr>\n"; } head+=L" <def-attr n=\"learned_gen\">\n"; head+=L" <attr-item tags=\"m\"/>\n"; head+=L" <attr-item tags=\"f\"/>\n"; head+=L" <attr-item tags=\"mf\"/>\n"; head+=L" <attr-item tags=\"GD\"/>\n"; head+=L" </def-attr>\n"; head+=L" <def-attr n=\"learned_num\">\n"; head+=L" <attr-item tags=\"sg\"/>\n"; head+=L" <attr-item tags=\"pl\"/>\n"; head+=L" <attr-item tags=\"sp\"/>\n"; head+=L" <attr-item tags=\"ND\"/>\n"; head+=L" </def-attr>\n"; //head+=" <def-attr n=\"ATTR__notused\">\n"; //head+=" <attr-item tags=\"this.attr.will.not.be.used\"/>\n"; //head+=" </def-attr>\n"; head+=L"</section-def-attrs>\n"; head+=L"<section-def-vars>\n"; head+=L" <def-var n=\"genre\"/>\n"; head+=L" <def-var n=\"number\"/>\n"; head+=L"</section-def-vars>\n"; head+=L"<section-def-macros>\n"; head+=L"<def-macro n=\"f_bcond\" npar=\"2\">\n"; head+=L"<!--To test whether a blank contains format information.\n"; head+=L"If no format information is present it is removed. -->\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <not>\n"; head+=L" <equal>\n"; head+=L" <b pos=\"1\"/>\n"; head+=L" <lit v=\" \"/>\n"; head+=L" </equal>\n"; head+=L" </not>\n"; head+=L" </test>\n"; head+=L" <out>\n"; head+=L" <b pos=\"1\"/>\n"; head+=L" </out>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; head+=L"</def-macro>\n"; head+=L"<def-macro n=\"f_genre_num\" npar=\"1\">\n"; head+=L"<!--To set the global value storing the TL genre of the last seen word. -->\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_gen\"/>\n"; head+=L" <lit-tag v=\"m\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <let><var n=\"genre\"/><lit-tag v=\"m\"/></let>\n"; head+=L" </when>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_gen\"/>\n"; head+=L" <lit-tag v=\"f\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <let><var n=\"genre\"/><lit-tag v=\"f\"/></let>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_num\"/>\n"; head+=L" <lit-tag v=\"sg\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <let><var n=\"number\"/><lit-tag v=\"sg\"/></let>\n"; head+=L" </when>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_num\"/>\n"; head+=L" <lit-tag v=\"pl\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <let><var n=\"number\"/><lit-tag v=\"pl\"/></let>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; head+=L"</def-macro>\n"; head+=L"<def-macro n=\"f_set_genre_num\" npar=\"1\">\n"; head+=L"<!--To set the genre of those words with GD, and the number of those words with ND. -->\n"; head+=L"<!--This is only used in no alignment template at all is applied. -->\n"; if(no_vars_determined) { head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_gen\"/>\n"; head+=L" <lit-tag v=\"GD\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; //head+=L" <let><clip pos=\"1\" side=\"tl\" part=\"gen\"/><lit-tag v=\"m\"/></let>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_num\"/>\n"; head+=L" <lit-tag v=\"ND\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; //head+=L" <let><clip pos=\"1\" side=\"tl\" part=\"num\"/><lit-tag v=\"sg\"/></let>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; } else { head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_gen\"/>\n"; head+=L" <lit-tag v=\"GD\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <var n=\"genre\"/>\n"; head+=L" <lit-tag v=\"f\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <let><clip pos=\"1\" side=\"tl\" part=\"learned_gen\"/><lit-tag v=\"f\"/></let>\n"; head+=L" </when>\n"; head+=L" <otherwise>\n"; head+=L" <let><clip pos=\"1\" side=\"tl\" part=\"learned_gen\"/><lit-tag v=\"m\"/></let>\n"; head+=L" </otherwise>\n"; head+=L" </choose>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <clip pos=\"1\" side=\"tl\" part=\"learned_num\"/>\n"; head+=L" <lit-tag v=\"ND\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <choose>\n"; head+=L" <when>\n"; head+=L" <test>\n"; head+=L" <equal>\n"; head+=L" <var n=\"number\"/>\n"; head+=L" <lit-tag v=\"pl\"/>\n"; head+=L" </equal>\n"; head+=L" </test>\n"; head+=L" <let><clip pos=\"1\" side=\"tl\" part=\"learned_num\"/><lit-tag v=\"pl\"/></let>\n"; head+=L" </when>\n"; head+=L" <otherwise>\n"; head+=L" <let><clip pos=\"1\" side=\"tl\" part=\"learned_num\"/><lit-tag v=\"sg\"/></let>\n"; head+=L" </otherwise>\n"; head+=L" </choose>\n"; head+=L" </when>\n"; head+=L" </choose>\n"; } head+=L"</def-macro>\n"; head+=L"</section-def-macros>\n"; head+=L"<section-rules>\n"; return head; }
wstring TransferRule::gen_apertium_transfer_rule(bool debug) { locale loc(setlocale(LC_ALL,"")); wstring rule=L""; bool include_otherwise=true; if (ats.size()==0) { cerr<<"Error in TransferRule::gen_apertium_transfer_rule: No alignment templates available\n"; exit(EXIT_FAILURE); } //Sort the AT so as to apply always the most frequent AT that //satisfies the restrictions AlignmentTemplateGreaterThanByCount atcomparer; sort(ats.begin(), ats.end(), atcomparer); //debug //cerr<<"ats.size(): "<<ats.size()<<endl; rule+=L"<rule>\n"; //The pattern to detect is the same for all AT within this transfer rule rule+=L" <pattern>\n"; wstring chunkName=L""; vector<wstring> generalpattern=StringUtils::split_wstring(source,L" "); //for(unsigned i=0; i<ats[0].source.size(); i++) { for(unsigned i=0; i<generalpattern.size(); i++) { wstring lemma=Utils::get_lemma(generalpattern[i]); wstring tags=Utils::tags2transferformat(Utils::get_tags(generalpattern[i])); wstring tagsnogen=tags; remove_generalised_tags(tagsnogen); chunkName+=L"__"; if(sm_generalise) { wstring tmpstr=Utils::get_first_tag(Utils::get_tags(generalpattern[i])).substr(1); rule+=L" <pattern-item n=\"CAT__"+category_name(L"",tmpstr.substr(0,tmpstr.size()-1)+L".*")+L"\"/>\n"; chunkName+=category_name(L"",tmpstr.substr(0,tmpstr.size()-1)+L".*"); } else { rule+=L" <pattern-item n=\"CAT__"+category_name(lemma,tagsnogen)+L"\"/>\n"; chunkName+=category_name(lemma,tagsnogen); } } rule+=L" </pattern>\n"; rule+=L" <action>\n"; rule+=L" <choose>\n"; //There is a set of different actions depending on the TL side of //each AT. Consequently, there's one <when> statement per AT for(unsigned i=0; i<ats.size(); i++) { rule+=L" <when>"; rule+=L"<!--"+ats[i].to_wstring()+L"-->\n"; rule+=L" <test>\n"; int nconditions=0; wstring teststr=L""; //This AT can be applied if all restrictions are met for(unsigned j=0; j<ats[i].restrictions.size(); j++){ if (ats[i].restrictions[j]!=L"__CLOSEWORD__") { nconditions++; //teststr+=L" <or>\n"; if (empty_restrictions_match_everything) { wstring source_tags_transfer_format=Utils::tags2transferformat(Utils::get_tags(ats[i].source[j])); remove_generalised_tags(source_tags_transfer_format); remove_final_asterisk(source_tags_transfer_format); vector<wstring> nongenTags=StringUtils::split_wstring(source_tags_transfer_format,L"."); teststr+=L" <and>\n"; teststr+=L" <begins-with>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\"tags\" />\n"; teststr+=L" <lit-tag v=\""+escape_attr(StringUtils::split_wstring(Utils::tags2transferformat(ats[i].restrictions[j]),L".")[0])+L"\"/>\n"; teststr+=L" </begins-with>\n"; vector<wstring> resTags=StringUtils::split_wstring(Utils::tags2transferformat(ats[i].restrictions[j]),L"."); wstring partofspeech=resTags[0]; for (unsigned k=1 ; k<resTags.size(); k++) { wstring tag=resTags[k]; if(tag[0]==L'['){ wstring attr=tag.substr(1); teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\"learned_"+attr+L"\"/>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"sl\" part=\"learned_"+attr+L"\"/>\n"; teststr+=L" </equal>\n"; } else if(tag[0]==L']'){ wstring attr=tag.substr(1); teststr+=L" <not><equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\"learned_"+attr+L"\"/>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"sl\" part=\"learned_"+attr+L"\"/>\n"; teststr+=L" </equal></not>\n"; } else{ wstring attr=get_attribute_for_tag(tag,partofspeech); //teststr+=L" <or>\n"; teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\""+attr+L"\"/>\n"; if(tag.size() >= 10 && tag.substr(0,10)==L"empty_tag_" ) teststr+=L" <lit v=\"\"/>\n"; else teststr+=L" <lit-tag v=\""+escape_attr(tag)+L"\"/>\n"; teststr+=L" </equal>\n"; //Not necessary anymore //A restriction with the same value that the sl tag may also mean that the tag dissappears (e.g., genders in es-en) //if(std::find(nongenTags.begin(), nongenTags.end(), tag) != nongenTags.end()) { //teststr+=L" <equal>\n"; //teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\""+attr+L"\"/>\n"; //teststr+=L" <lit v=\"\"/>\n"; //teststr+=L" </equal>\n"; //} //teststr+=L" </or>\n"; } } teststr+=L" </and>\n"; } else { teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\"tags\" queue=\"no\"/>\n"; teststr+=L" <lit-tag v=\""+escape_attr(Utils::tags2transferformat(ats[i].restrictions[j]))+L"\"/>\n"; teststr+=L" </equal>\n"; } int targetWordPos=ats[i].get_open_target_word_pos(j,false); if(targetWordPos!=-1) { wstring target_transfer_format=Utils::tags2transferformat(Utils::get_tags(ats[i].target[targetWordPos])); bool isGeneralised=false; for(int myi=0; myi<target_transfer_format.size(); myi++) { if(target_transfer_format[myi] == L'*') { target_transfer_format=target_transfer_format.substr(0,myi-1); isGeneralised=true; break; } } if(!isGeneralised){ if(!no_double_check_restrictions) { teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\"tags\" queue=\"yes\"/>\n"; teststr+=L" <lit-tag v=\""+escape_attr(target_transfer_format)+L"\"/>\n"; teststr+=L" </equal>\n"; } } else{ vector<wstring> tltags=StringUtils::split_wstring(target_transfer_format,L"."); wstring partofspeech=tltags[0]; tltags.erase(tltags.begin()); vector<wstring> tlattrs; for(vector<wstring>::iterator it=tltags.begin(); it!=tltags.end(); ++it){ wstring attribute=get_attribute_for_tag((*it),*(tltags.begin())); tlattrs.push_back(attribute); } if(!no_double_check_restrictions) { if(tltags.size() > 0) teststr+=L" <and>\n"; if(attributes.find(attribute_pos_group_name(partofspeech))==attributes.end()) { pair<wstring,pair<set<wstring>, set<wstring> > > newelement; newelement.first=attribute_pos_group_name(partofspeech); pair<set<wstring>, wstring> newelementvalue; newelementvalue.first.insert(partofspeech); //newelementvalue.second=L""; attributes.insert(newelement); } teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\""+attribute_pos_group_name(partofspeech)+L"\"/>\n"; teststr+=L" <lit-tag v=\""+partofspeech+L"\"/>\n"; teststr+=L" </equal>\n"; for(int vcounter=0; vcounter < tltags.size(); vcounter++){ teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\""+tlattrs[vcounter]+L"\"/>\n"; teststr+=L" <lit-tag v=\""+escape_attr(tltags[vcounter])+L"\"/>\n"; teststr+=L" </equal>\n"; } if(tltags.size() > 0) teststr+=L" </and>\n"; } } } //teststr+=L" </or>\n"; } //If we are working with generalised ATs, check also SL side, as //ATs with different left side may be grouped in the same rule if(sm_generalise || provided_patterns || using_explicit_empty_tags) { wstring source_tags_transfer_format=Utils::tags2transferformat(Utils::get_tags(ats[i].source[j])); wstring source_lemma=Utils::get_lemma(ats[i].source[j]); if(source_lemma.size() > 0 && (sm_generalise || provided_patterns)) { nconditions++; teststr+=L" <or>"; teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"sl\" part=\"lem\" />\n"; teststr+=L" <lit v=\""+StringUtils::substitute(source_lemma,L"_",L" ")+L"\"/>\n"; teststr+=L" </equal>\n"; teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"sl\" part=\"lem\" />\n"; teststr+=L" <lit v=\""; teststr+=toupper<wchar_t>(StringUtils::substitute(source_lemma,L"_",L" ")[0],loc); teststr+=StringUtils::substitute(source_lemma,L"_",L" ").substr(1)+L"\"/>\n"; teststr+=L" </equal>\n"; teststr+=L" </or>\n"; } vector<wstring> alltags=StringUtils::split_wstring(source_tags_transfer_format,L"."); remove_generalised_tags(source_tags_transfer_format); remove_final_asterisk(source_tags_transfer_format); vector<wstring> nongenTags=StringUtils::split_wstring(source_tags_transfer_format,L"."); for(int tagi=1; tagi<nongenTags.size(); tagi++) { if( !(nongenTags[tagi].size()>=3 && nongenTags[tagi].substr(0,3)==L"RES") ) { nconditions++; teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"sl\" part=\""+get_attribute_for_tag(nongenTags[tagi],nongenTags[0])+L"\" />\n"; if(nongenTags[tagi].size() >= 10 && nongenTags[tagi].substr(0,10)==L"empty_tag_" ) teststr+=L" <lit v=\"\"/>\n"; else teststr+=L" <lit-tag v=\""+escape_attr(nongenTags[tagi])+L"\"/>\n"; teststr+=L" </equal>\n"; } } //I think this chunk of code should be removed anyway but... if (!empty_restrictions_match_everything) for(int tagi=1; tagi<alltags.size(); tagi++) { wstring tag=alltags[tagi]; if(tag.size()>0 && tag[0]==L'*') { nconditions++; teststr+=L" <equal>\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"sl\" part=\"learned_"+tag.substr(1)+L"\" />\n"; teststr+=L" <clip pos=\""+Utils::itoa(j+1)+L"\" side=\"tl\" part=\"learned_"+tag.substr(1)+L"\" />\n"; teststr+=L" </equal>\n"; } } } } if (nconditions==0) { //All words were close words. We introduce a //condition that is always true teststr+=L" <equal>\n"; teststr+=L" <lit v=\"TRUE\"/>\n"; teststr+=L" <lit v=\"TRUE\"/>\n"; teststr+=L" </equal>\n"; //LO COMENTO PARA QUE MIS SCRIPTS DE DEPURACION FUNCIONEN BIEN //include_otherwise=false; } if (nconditions>1) // There are more than one restriction to test rule+=L" <and>\n"; rule+=teststr; if (nconditions>1) rule+=L" </and>\n"; rule+=L" </test>\n"; if (debug) { wstring s=StringUtils::substitute(ats[i].to_wstring(), L"><", L"."); s=StringUtils::substitute(s,L"<",L"-"); s=StringUtils::substitute(s,L">",L""); rule+=L" <out>\n"; rule+=L" <lu><lit v=\"(rid:"+Utils::itoa(rule_id)+L" at:"+s+L")\"/></lu>\n"; rule+=L" </out>\n"; } wstring letvarStrs=L""; rule+=L" <out>\n"; if (generate_chunks) { rule+=L" <chunk name=\""+chunkName+L"\" case=\"caseFirstWord\">"; rule+=L" <tags>"; rule+=L" <tag><lit-tag v=\"LRN\"/></tag>"; rule+=L" </tags>"; } int blank_pos=0; for(unsigned j=0; j<ats[i].target.size(); j++) { if (ats[i].target[j][0]!='<') { //It's a lexicalized word, we copy it as is wstring target_tags=Utils::tags2transferformat(Utils::get_tags(ats[i].target[j])); wstring tagstoprint=Utils::tags2transferformat(Utils::get_tags(ats[i].target[j])); vector<wstring> attributeNames=extract_attribute_names(tagstoprint); vector<wstring> tagvector=StringUtils::split_wstring(tagstoprint,L"."); //remove_generalised_tags(tagstoprint); //remove_final_asterisk(tagstoprint); int pos=-1; //Some tags come from bilingual dictionary. Get correct gender and number in case of GD/ND //if(attributeNames.size() > 0) // { // pos=ats[i].get_source_word_pos(j); // rule+=L" <call-macro n=\"f_set_genre_num\">\n"; // rule+=L" <with-param pos=\""+Utils::itoa(pos+1)+L"\"/>\n"; // rule+=L" </call-macro>\n"; // } // rule+=L" <out>\n"; rule+=L" <lu>\n"; rule+=L" <lit v=\""+StringUtils::substitute(Utils::get_lemma_without_queue(ats[i].target[j]),L"_",L" ")+L"\"/>\n"; //rule+=L" <lit-tag v=\""+tagstoprint+L"\"/>\n"; //Some tags come from bilingual dictionary. for(vector<wstring>::iterator it=tagvector.begin(); it!=tagvector.end(); ++it){ if( (*it).substr(0,1)==L"*" ) rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"sl\" part=\"learned_"+(*it).substr(1)+L"\"/>\n"; else if ( (*it).substr(0,1)==L")" ) { long locpos=Utils::wtol((*it).substr(1,4)); rule+=L" <clip pos=\""+Utils::itoa((int) (locpos+1))+L"\" side=\"tl\" part=\"learned_"+(*it).substr(4)+L"\"/>\n"; } else if ( (*it).substr(0,1)==L"(" ) { long locpos=Utils::wtol((*it).substr(1,4)); rule+=L" <clip pos=\""+Utils::itoa((int) (locpos+1))+L"\" side=\"sl\" part=\"learned_"+(*it).substr(4)+L"\"/>\n"; } else rule+=L" <lit-tag v=\""+escape_attr((*it))+L"\"/>\n"; } //if(attributeNames.size() > 0) //{ // for(vector<wstring>::iterator it=attributeNames.begin(); it!=attributeNames.end(); ++it){ // rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"sl\" part=\""+(*it)+L"\"/>\n"; // } //} //rule+=L" <lit-tag v=\""+target_tags+L"\"/>\n"; rule+=L" <lit v=\""+StringUtils::substitute(Utils::get_queue(ats[i].target[j]),L"_",L" ")+L"\"/>\n"; rule+=L" </lu>\n"; // rule+=L" </out>\n"; //Some tags come from bilingual dictionary. Copy gender/number to global variables // if(attributeNames.size() > 0) // { // rule+=L" <call-macro n=\"f_genre_num\">\n"; // rule+=L" <with-param pos=\""+Utils::itoa(pos+1)+L"\"/>\n"; // rule+=L" </call-macro>\n"; // } //Copy gender/number to global variables in case that they come directly from AT wstring genre=Utils::get_tag_value(tagstoprint,L"m|f"); if(genre.length()>0) letvarStrs+=L" <let><var n=\"genre\"/><lit-tag v=\""+genre+L"\"/></let>\n"; wstring number=Utils::get_tag_value(tagstoprint,L"sg|pl"); if(number.length()>0) letvarStrs+=L" <let><var n=\"number\"/><lit-tag v=\""+number+L"\"/></let>\n"; } else { wstring tagstoprint=Utils::tags2transferformat(Utils::get_tags(ats[i].target[j])); vector<wstring> attributeNames=extract_attribute_names(tagstoprint); vector<wstring> tagvector=StringUtils::split_wstring(tagstoprint,L"."); //remove_generalised_tags(tagstoprint); //remove_final_asterisk(tagstoprint); int pos=ats[i].get_first_open_source_word_pos(j); //Some tags come from bilingual dictionary. Get correct gender and number in case of GD/ND // if(attributeNames.size() > 0) // { // int pos=ats[i].get_source_word_pos(j); // rule+=L" <call-macro n=\"f_set_genre_num\">\n"; // rule+=L" <with-param pos=\""+Utils::itoa(pos+1)+L"\"/>\n"; // rule+=L" </call-macro>\n"; // } // rule+=L" <out>\n"; rule+=L" <lu>\n"; if(do_rbpe){ rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"sl\" part=\"lemh\"/>\n"; } else{ rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"tl\" part=\"lemh\"/>\n"; } for(vector<wstring>::iterator it=tagvector.begin(); it!=tagvector.end(); ++it){ if( (*it).substr(0,1)==L"*" ) rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"sl\" part=\"learned_"+(*it).substr(1)+L"\"/>\n"; else if ( (*it).substr(0,1)==L")" ) { long locpos=Utils::wtol((*it).substr(1,4)); rule+=L" <clip pos=\""+Utils::itoa((int) (locpos+1))+L"\" side=\"tl\" part=\"learned_"+(*it).substr(4)+L"\"/>\n"; } else if ( (*it).substr(0,1)==L"(" ) { long locpos=Utils::wtol((*it).substr(1,4)); rule+=L" <clip pos=\""+Utils::itoa((int) (locpos+1))+L"\" side=\"sl\" part=\"learned_"+(*it).substr(4)+L"\"/>\n"; } else rule+=L" <lit-tag v=\""+escape_attr((*it))+L"\"/>\n"; } //rule+=L" <lit-tag v=\""+tagstoprint+L"\"/>\n"; //for(vector<wstring>::iterator it=attributeNames.begin(); it!=attributeNames.end(); ++it){ // rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"tl\" part=\""+(*it)+L"\"/>\n"; //} if(do_rbpe){ rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"sl\" part=\"lemq\"/>\n"; } else{ rule+=L" <clip pos=\""+Utils::itoa(pos+1)+L"\" side=\"tl\" part=\"lemq\"/>\n"; } rule+=L" </lu>\n"; // rule+=L" </out>\n"; //Some tags come from bilingual dictionary. Copy gender/number to global variables // if(attributeNames.size() > 0) // { // rule+=L" <call-macro n=\"f_genre_num\">\n"; // rule+=L" <with-param pos=\""+Utils::itoa(pos+1)+L"\"/>\n"; // rule+=L" </call-macro>\n"; // } //Copy gender/number to global variables in case that they come directly from AT wstring genre=Utils::get_tag_value(tagstoprint,L"m|f"); if(genre.length()>0) letvarStrs+=L" <let><var n=\"genre\"/><lit-tag v=\""+genre+L"\"/></let>\n"; wstring number=Utils::get_tag_value(tagstoprint,L"sg|pl"); if(number.length()>0) letvarStrs+=L" <let><var n=\"number\"/><lit-tag v=\""+number+L"\"/></let>\n"; } if (blank_pos<(int)(ats[i].source.size()-1)) { // rule+=L" <out>\n"; rule+=L" <b pos=\""+Utils::itoa(blank_pos+1)+L"\"/>\n"; // rule+=L" </out>\n"; blank_pos++; } else if (j<(ats[i].target.size()-1)) { //TL output string has more words than the SL pattern detected // rule+=L" <out>\n"; rule+=L" <b/>\n"; // rule+=L" </out>\n"; } } if (generate_chunks) { rule+=L" </chunk>\n"; } rule+=L" </out>\n"; if (debug) { rule+=L" <out>\n"; rule+=L" <lu><lit v=\"(END)\"/></lu>\n"; rule+=L" </out>\n"; } //If there are remaining blanks we print them out if they have //format information inside. This is caused by a SL input string //longer than the TL output one for (unsigned j=ats[i].target.size(); j<ats[i].source.size(); j++) { rule+=L" <call-macro n=\"f_bcond\">\n"; rule+=L" <with-param pos=\""+Utils::itoa(j)+L"\"/>\n"; rule+=L" <with-param pos=\""+Utils::itoa(j+1)+L"\"/>\n"; rule+=L" </call-macro>\n"; } rule+=letvarStrs; rule+=L" </when>\n"; if(!include_otherwise) { //As the condition will always be met it has no sense to include //further ATs break; } } //Actions to perform when none of the ATs can be applied //word-for-word translation if(include_otherwise) { rule+=L" <otherwise><!--Word-for-word translation-->\n"; if (debug) { rule+=L" <out>\n"; rule+=L" <lu><lit v=\"(rid:"+Utils::itoa(rule_id)+L" at:word-for-word)\"/></lu>\n"; rule+=L" </out>\n"; } if(use_discard_rule) { rule+=L" <reject-current-rule shifting=\"no\" />\n"; rule+=L" <out><chunk name=\"any\" case=\"caseFirstWord\"><tags> <tag><lit-tag v=\"LRN\"/></tag> </tags> <lu>"; if(do_rbpe){ rule+=L"<clip pos=\"1\" side=\"sl\" part=\"whole\"/>"; } else{ rule+=L"<clip pos=\"1\" side=\"tl\" part=\"whole\"/>"; } rule+=L"</lu></chunk></out> </action><!--isolated word-->\n"; } else { for(unsigned i=0; i<ats[0].source.size(); i++) { rule+=L" <call-macro n=\"f_genre_num\">\n"; rule+=L" <with-param pos=\""+Utils::itoa(i+1)+L"\"/>\n"; rule+=L" </call-macro>\n"; rule+=L" <call-macro n=\"f_set_genre_num\">\n"; rule+=L" <with-param pos=\""+Utils::itoa(i+1)+L"\"/>\n"; rule+=L" </call-macro>\n"; rule+=L" <out>\n"; rule+=L" <lu>\n"; rule+=L" <clip pos=\""+Utils::itoa(i+1)+L"\" side=\"tl\" part=\"whole\"/>\n"; //rule+=L" <clip pos=\""+Utils::itoa(i+1)+L"\" side=\"tl\" part=\"lemh\"/>\n"; //rule+=L" <clip pos=\""+Utils::itoa(i+1)+L"\" side=\"tl\" part=\"tags\"/>\n"; //rule+=L" <clip pos=\""+Utils::itoa(i+1)+L"\" side=\"tl\" part=\"lemq\"/>\n"; rule+=L" </lu>\n"; if (i<(ats[0].source.size()-1)) rule+=L" <b pos=\""+Utils::itoa(i+1)+L"\"/>\n"; rule+=L" </out>\n"; } if (debug) { rule+=L" <out>\n"; rule+=L" <lu><lit v=\"(END)\"/></lu>\n"; rule+=L" </out>\n"; } } rule+=L" </otherwise>\n"; } rule+=L" </choose>\n"; rule+=L" </action>\n"; rule+=L"</rule>\n"; return rule; }
/* Program main */ int main(int argc, char *argv[]) { cuuid_t cuuid; int c, rtn; unsigned int n = 1; char *fname = NULL; FILE *fp = NULL; int fout = 0, nout = 0, fmat = PRINT_FORMAT_HEX_BACKET, vbose = 0, gen = 1; int option_index; char tag[3] = {0x0, 0x0, 0x0}; static struct option long_options[] = { {"category", required_argument, 0, 0}, {"set-safe", no_argument, 0, 0}, {"set-master", no_argument, 0, 0}, {"set-public", no_argument, 0, 0}, {"set-sign", no_argument, 0, 0}, {"set-tag", no_argument, 0, 0}, {"set-strict", no_argument, 0, 0}, {"list-categories",no_argument, 0, 0}, {"rev", required_argument, 0, 0}, {"tag", required_argument, 0, 0}, {"rand-seed", required_argument, 0, 0}, {"memory-seed", required_argument, 0, 0}, {"output-hex", no_argument, 0, 'x'}, {"output-number", no_argument, 0, 'i'}, {"verbose", no_argument, 0, 'V'}, {"version", no_argument, 0, 'v'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0 } }; /* Register interrupt handler */ signal(SIGINT, set_signint); while (1) { option_index = 0; c = getopt_long(argc, argv, "c:d:o:qxivVh", long_options, &option_index); if (c == -1) break; switch (c) { case 0: if (!strcmp("rand-seed", long_options[option_index].name)) { quid_set_rnd_seed(atoi(optarg)); } else if (!strcmp("memory-seed", long_options[option_index].name)) { quid_set_mem_seed(atoi(optarg)); } else if (!strcmp("rev", long_options[option_index].name)) { switch (atoi(optarg)) { case 4: cuuid.version = QUID_REV4; break; case 7: default: cuuid.version = QUID_REV7; break; } } else if (!strcmp("tag", long_options[option_index].name)) { if (strlen(optarg) != 3) { printf("tag must be 3 characters\n"); printf("see --help for more information\n"); return 1; } tag[0] = optarg[0]; tag[1] = optarg[1]; tag[2] = optarg[2]; } else if (!strcmp("list-categories", long_options[option_index].name)) { printf("%d) %s\n", CLS_CMON, category_name(CLS_CMON)); printf("%d) %s\n", CLS_INFO, category_name(CLS_INFO)); printf("%d) %s\n", CLS_WARN, category_name(CLS_WARN)); printf("%d) %s\n", CLS_ERROR, category_name(CLS_ERROR)); gen = 0; } else if (!strcmp("set-safe", long_options[option_index].name)) { flg |= IDF_IDSAFE; } else if (!strcmp("set-public", long_options[option_index].name)) { flg |= IDF_PUBLIC; } else if (!strcmp("set-master", long_options[option_index].name)) { flg |= IDF_MASTER; } else if (!strcmp("set-tag", long_options[option_index].name)) { flg |= IDF_TAGGED; } else if (!strcmp("set-strict", long_options[option_index].name)) { flg |= IDF_STRICT; } else if (!strcmp("set-sign", long_options[option_index].name)) { flg |= IDF_SIGNED; } else if (!strcmp("category", long_options[option_index].name)) { cat = (char)atoi(optarg); switch (cat) { case CLS_CMON: case CLS_INFO: case CLS_WARN: case CLS_ERROR: break; default: printf("unknown category %d\n", cat); printf("see --help for more information\n"); return 1; } } break; case 'c': n = atoi(optarg); break; case 'd': delay = atoi(optarg); break; case 'o': fname = optarg; fout = 1; break; case 'x': fmat = PRINT_FORMAT_HEX; break; case 'i': fmat = PRINT_FORMAT_DEC; break; case 'q': nout = 1; break; case 'V': vbose = 1; break; case 'v': print_version(); return 0; case 'h': case '?': default: usage(); return 0; } } /* Identifier as positional argument */ if (optind < argc) { cuuid_t uuid; while (optind < argc) { printf("%s\t", argv[optind]); if (quid_parse(argv[optind], &uuid)) { printf("VALID\n"); if (vbose) input_verbose(uuid); } else printf("INVALID\n"); optind++; } return 0; } /* Output new identifiers */ if (gen) { gettimeofday(&t1, NULL); /* File output */ if (fout) { rtn = check_fname(fname); if (!rtn) { fp = fopen(fname, "a"); } else if (rtn == 1) { printf("%s is a directory\n", fname); return 1; } else if (rtn == 2){ printf("%s already exists\n", fname); return 1; } } for (i=0; i<n; ++i) { assert(cat != 0); quid_create(&cuuid, flg, cat, tag); if (intflag) { break; } if (!fout){ if (!nout) { quid_print(cuuid, fmat); } } else { quid_print_file(fp, cuuid, fmat); } if (delay) { qusleep(delay * 1000); } ticks = clock(); } if (fp) { fclose(fp); } gettimeofday(&t2, NULL); } /* Show counters */ if (vbose && gen) { generate_verbose(); } return 0; }