Logo Search packages:      
Sourcecode: apertium version File versions  Download package

void HMM::read_dictionary ( FILE *  is  ) 

It reads the expanded dictionary received as a parameter and calculates the set of ambiguity classes that the tagger will manage.

Parameters:
is the input stream with the expanded dictionary to read

Definition at line 402 of file hmm.cc.

References MorphoStream::get_next_word(), TaggerWord::get_tags(), and Collection::size().

                                {
  int i, k, nw=0;
  TaggerWord *word=NULL;
  set <TTag> tags;
  Collection &output = td->getOutput();
  
  MorphoStream morpho_stream(fdic, true, td);
  
  // In the input dictionary there must be all punctuation marks, including the end-of-sentece mark
   
  word = morpho_stream.get_next_word();
  
  while (word) {
    if (++nw%10000==0) wcerr<<L'.'<<flush;
    
    tags = word->get_tags();

    if (tags.size()>0)
      k = output[tags];

    delete word;
    word = morpho_stream.get_next_word();
  }
  wcerr<<L"\n";
  
  // OPEN AMBIGUITY CLASS
  // It contains all tags that are not closed.
  // Unknown words are assigned the open ambiguity class
  k=output[td->getOpenClass()];

  int N = (td->getTagIndex()).size();  
  
  // Create ambiguity class holding one single tag for each tag.
  // If not created yet
  for(i = 0; i != N; i++) {
    set<TTag> amb_class;
    amb_class.insert(i);
    k=output[amb_class];
  }

  int M = output.size();
  
  wcerr<< N <<L" states and "<< M <<L" ambiguity classes\n";
  td->setProbabilities(N, M);
}


Generated by  Doxygen 1.6.0   Back to index