Logo Search packages:      
Sourcecode: apertium version File versions  Download package

void HMM2::read_dictionary ( FILE *  is  ) 

It reads the expanded dictionary received as a parameter and calculates the set of ambiguity classes that the tagger will manage.

Parameters:
is the input stream with the expanded dictionary to read

Definition at line 394 of file HMM2.C.

References MorphoStream2::get_next_word(), TaggerWord::get_tags(), and Collection::size().

                                 {
  int i, k, nw=0;
  TaggerWord *word=NULL;
  set <TTag> tags;
  Collection &output = td->getOutput();
  
  MorphoStream2 morpho_stream(fdic, true, td);
  
  // In the input dictionary there must be all punctuation marks, including the end-of-sentece mark
   
  word = morpho_stream.get_next_word();
  
  while (word) {
    if (++nw%10000==0) cerr<<'.'<<flush;
    
    tags = word->get_tags();

    if (tags.size()>0)
      k = output[tags];

    delete word;
    word = morpho_stream.get_next_word();
  }
  cerr<<"\n";
  
  // OPEN AMBIGUITY CLASS
  // It contains all tags that are not closed.
  // Unknown words are assigned the open ambiguity class
  k=output[td->getOpenClass()];

  int N = (td->getTagIndex()).size();  
  
  // Create ambiguity class holding one single tag for each tag.
  // If not created yet
  for(i = 0; i != N; i++) {
    set<TTag> amb_class;
    amb_class.insert(i);
    k=output[amb_class];
  }

  int M = output.size();
  
  cerr<< N <<" states and "<< M <<" ambiguity classes\n";
  td->setProbabilities(N, M);
}


Generated by  Doxygen 1.6.0   Back to index