-
Notifications
You must be signed in to change notification settings - Fork 2
/
decoder.cpp
133 lines (107 loc) · 4.44 KB
/
decoder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include "decoder.h"
#include <vector>
#include <map>
#include <iostream>
#include <sstream>
#include "lexicon.h"
#include "gzstream/gzstream.h"
#include "global.h"
#include "ptree.h"
#include "aStar.h"
#include "cost.h"
Decoder::Decoder(const char filename[], double prune_threshold, unsigned int prune_count):
flex(new Lexicon(french)), elex(new Lexicon(english)), schwarz(new PTree<PTree<Cost>>){
readTable(filename, prune_threshold, prune_count);
}
Decoder::~Decoder(){
delete flex; flex = 0;
delete elex; elex = 0;
delete schwarz; schwarz = 0;
}
Decoder::hypRefPair::hypRefPair(Sentence* ref,
nBestList* nBest):
reference(ref),nBest(nBest){}
Decoder::hypRefPair::~hypRefPair(){
//delete nBest; nBest = 0;
}
Decoder::nBestList* Decoder::translate(Decoder::Sentence& sent){
return aStar::Suchalgorithmus(sent, schwarz, elex, flex);
}
Decoder::hypRefPair* Decoder::translate(Decoder::Sentence& french,
Decoder::Sentence& ref)
{
return new hypRefPair(&ref,translate(french));
}
std::vector<Decoder::nBestList>* Decoder::translate(std::vector<Decoder::Sentence>& french){
std::vector<nBestList>* result = new std::vector<nBestList>();
for(unsigned int i = 0; i < french.size(); i++){
nBestList* translation = translate(french[i]);
result->push_back(*translation);
delete translation;
}
return result;
}
std::vector<Decoder::hypRefPair>* Decoder::translate(std::vector<Decoder::Sentence>& french,
std::vector<Decoder::Sentence>& ref)
{
std::vector<hypRefPair>* result = new std::vector<hypRefPair>();
for(unsigned int i = 0; i < french.size(); i++){
hypRefPair* translation = new hypRefPair(&ref[i],translate(french[i])); //translate(french[i], ref[i]);
result->push_back(*translation);
//delete translation;
}
return result;
}
void Decoder::readTable(const char filename[], double prune_threshold, unsigned int prune_count){
//==================Einlesen der Phrasentabelle============================
PTree< pair <unsigned int, double> > pruningTree; //speichert für jede Übersetzung die Anzahl der eingelesenen Übersetzungen und die beste Übersetzung
pair <unsigned int, double> pruningStart; //die Startkombi für den PruningTree
pruningStart.first=0;
pruningStart.second=(1./0.);
igzstream in(filename);
std::string line,token;
while(getline(in,line)){
std::stringstream ist(line);
double relfreq_f, relfreq_e, source_to_target, target_to_source, unigram_sprachmodell;
unsigned int singlecf, singlece;
vector<uint> ephrase, fphrase;
//Ausgabe: relfreq_f relfreq_e # quellphrase # zielphrase # singlecf singlece # source_to_target target_to_source # unigram-sprachmodell
ist >> relfreq_f >> relfreq_e >>token; // token für "#"
while(ist>>token && token != "#"){
fphrase.push_back(flex->getWord_or_add(token).wordId());
}
while(ist>>token && token != "#"){
ephrase.push_back(elex->getWord_or_add(token).wordId());
}
ist >> singlecf >> singlece >> token >> source_to_target >> target_to_source >> token >> unigram_sprachmodell;
Cost kosten=Cost();
kosten.calc(relfreq_f, relfreq_e, fphrase, ephrase, singlecf, singlece, source_to_target, target_to_source, unigram_sprachmodell);
double kosten_insgesamt=kosten.cost();
pair< unsigned int, double>* pruning_infos=&pruningTree.traverse(fphrase,true,pruningStart)->c;
if (kosten_insgesamt > pruning_infos->second+prune_threshold || pruning_infos->first >prune_count) continue; //pruning ergibt, wir wollen es nicht in den Ptree mitaufnehmen
//if (kosten_insgesamt< pruning_infos->second) pruning_infos->second=kosten_insgesamt; _jetzt irrelevant, da ich von einer geordneten eingabe ausgehe
pruning_infos->first++;
schwarz->traverse(fphrase,true)->c.traverse(ephrase,true,Cost(1./0.))->c = kosten;
}
//cerr << " schwarz erstellt" << endl;
}
Decoder::Sentence* Decoder::parseLine(Lexicon* lex, const std::string& line){
istringstream ist(line);
std::string token;
Sentence* sent=new Sentence;
while ( ist >> token){
sent->push_back(lex->getWord_or_add(token).wordId());
}
return sent;
}
std::vector<Decoder::Sentence>* Decoder::parseFile(Lexicon* lex, const char file[]){
igzstream igz(file);
std::string line;
std::vector<Sentence>* result = new std::vector<Sentence>();
while(getline(igz,line)){
Sentence* parsed_line = parseLine(lex,line);
result->push_back(*parsed_line);
delete parsed_line;
}
return result;
}