Logo Search packages:      
Sourcecode: apertium version File versions  Download package

void HMM::tagger ( FILE *  in,
FILE *  out,
bool  show_all_good_first = false 

Tagging algorithm (Viterbi implementation).

in the input stream with the untagged text to tag
out the output stream with the tagged text

Definition at line 704 of file hmm.cc.

References find_similar_ambiguity_class(), MorphoStream::get_next_word(), TaggerWord::get_string_tags(), TaggerWord::get_superficial_form(), TaggerWord::get_tags(), and Collection::has_not().

  int i, j, k, nw;
  TaggerWord *word=NULL;
  TTag tag;
  set <TTag> tags, pretags;
  set <TTag>::iterator itag, jtag;
  double prob, loli, x;
  int N = td->getN();  
  double alpha[2][N];
  vector<TTag> best[2][N];
  vector <TaggerWord> wpend; 
  int nwpend;
  MorphoStream morpho_stream(in, debug, td);                             

  Collection &output = td->getOutput();
  loli = nw = 0;
  alpha[0][eos] = 1;
  word = morpho_stream.get_next_word();
  while (word) {
    nwpend = wpend.size();
    pretags = tags; // Tags from the previous word

    tags = word->get_tags();
    if (tags.size()==0) // This is an unknown word
      tags = td->getOpenClass();
    if (output.has_not(tags)) {  // Encontrada una clase de ambigŁedad desconocida hasta el momento      
      if (debug) {
        wstring errors;
      errors = L"A new ambiguity class was found. \n";
      errors+= L"Retraining the tagger is necessary so as to take it into account.\n";
      errors+= L"Word '"+word->get_superficial_form()+L"'.\n";
      errors+= L"New ambiguity class: "+word->get_string_tags()+L"\n";
      wcerr<<L"Error: "<<errors;
      tags = find_similar_ambiguity_class(tags);
    k = output[tags];  //Ambiguity class the word belongs to
    clear_array_double(alpha[nwpend%2], N);    
    clear_array_vector(best[nwpend%2], N);
    for (itag=tags.begin(); itag!=tags.end(); itag++) { //For all tag from the current word
      for (jtag=pretags.begin(); jtag!=pretags.end(); jtag++) {   //For all tags from the previous word
      x = alpha[1-nwpend%2][j]*(td->getA())[j][i]*(td->getB())[i][k];
      if (alpha[nwpend%2][i]<=x) {
        if (nwpend>1) 
          best[nwpend%2][i] = best[1-nwpend%2][j];
        alpha[nwpend%2][i] = x;
    if (tags.size()==1) {       
      tag = *tags.begin();      
      prob = alpha[nwpend%2][tag];
      if (prob>0) 
      loli -= log(prob);
      else {
        if (debug)
        wcerr<<L"Problem with word '"<<word->get_superficial_form()<<L"' "<<word->get_string_tags()<<L"\n";
      for (unsigned t=0; t<best[nwpend%2][tag].size(); t++) {
      if (show_all_good_first) {
        wstring const &micad = wpend[t].get_all_choosen_tag_first(best[nwpend%2][tag][t], (td->getTagIndex())[L"TAG_kEOF"]);
        fputws_unlocked(micad.c_str(), out); 
      } else {
        wstring const &micad = wpend[t].get_lexical_form(best[nwpend%2][tag][t], (td->getTagIndex())[L"TAG_kEOF"]);
        fputws_unlocked(micad.c_str(), out); 
      //Return to the initial state
      alpha[0][tag] = 1;
    delete word;
    word = morpho_stream.get_next_word();    
  if ((tags.size()>1)&&(debug)) {
    wstring errors;
    errors = L"The text to disambiguate has finished, but there are ambiguous words that has not been disambiguated.\n";
    errors+= L"This message should never appears. If you are reading this ..... these are very bad news.\n";
    wcerr<<L"Error: "<<errors;

Generated by  Doxygen 1.6.0   Back to index