Logo Search packages:      
Sourcecode: apertium version File versions  Download package

void HMM::read_dictionary ( FILE *  is  ) 

It reads the expanded dictionary received as a parameter and calculates the set of ambiguity classes that the tagger will manage.

is the input stream with the expanded dictionary to read

Definition at line 402 of file hmm.cc.

References MorphoStream::get_next_word(), TaggerWord::get_tags(), and Collection::size().

  int i, k, nw=0;
  TaggerWord *word=NULL;
  set <TTag> tags;
  Collection &output = td->getOutput();
  MorphoStream morpho_stream(fdic, true, td);
  // In the input dictionary there must be all punctuation marks, including the end-of-sentece mark
  word = morpho_stream.get_next_word();
  while (word) {
    if (++nw%10000==0) wcerr<<L'.'<<flush;
    tags = word->get_tags();

    if (tags.size()>0)
      k = output[tags];

    delete word;
    word = morpho_stream.get_next_word();
  // It contains all tags that are not closed.
  // Unknown words are assigned the open ambiguity class

  int N = (td->getTagIndex()).size();  
  // Create ambiguity class holding one single tag for each tag.
  // If not created yet
  for(i = 0; i != N; i++) {
    set<TTag> amb_class;

  int M = output.size();
  wcerr<< N <<L" states and "<< M <<L" ambiguity classes\n";
  td->setProbabilities(N, M);

Generated by  Doxygen 1.6.0   Back to index