Logo Search packages:      
Sourcecode: apertium version File versions  Download package

void HMM2::read_dictionary ( FILE *  is  ) 

It reads the expanded dictionary received as a parameter and calculates the set of ambiguity classes that the tagger will manage.

is the input stream with the expanded dictionary to read

Definition at line 394 of file HMM2.C.

References MorphoStream2::get_next_word(), TaggerWord::get_tags(), and Collection::size().

  int i, k, nw=0;
  TaggerWord *word=NULL;
  set <TTag> tags;
  Collection &output = td->getOutput();
  MorphoStream2 morpho_stream(fdic, true, td);
  // In the input dictionary there must be all punctuation marks, including the end-of-sentece mark
  word = morpho_stream.get_next_word();
  while (word) {
    if (++nw%10000==0) cerr<<'.'<<flush;
    tags = word->get_tags();

    if (tags.size()>0)
      k = output[tags];

    delete word;
    word = morpho_stream.get_next_word();
  // It contains all tags that are not closed.
  // Unknown words are assigned the open ambiguity class

  int N = (td->getTagIndex()).size();  
  // Create ambiguity class holding one single tag for each tag.
  // If not created yet
  for(i = 0; i != N; i++) {
    set<TTag> amb_class;

  int M = output.size();
  cerr<< N <<" states and "<< M <<" ambiguity classes\n";
  td->setProbabilities(N, M);

Generated by  Doxygen 1.6.0   Back to index