Logo Search packages:      
Sourcecode: apertium version File versions  Download package

void HMM::read_dictionary ( FILE *  is  ) 

It reads the expanded dictionary received as a parameter and calculates the set of ambiguity classes that the tagger will manage.

Parameters:
is the input stream with the expanded dictionary to read

Definition at line 507 of file HMM.C.

References MorphoStream::get_next_word(), TaggerWord::get_tags(), init(), and Collection::size().

                                {
  int i, k, nw=0;
  TaggerWord *word=NULL;
  set <TTag> tags;

  MorphoStream morpho_stream(fdic, true, constants, tags_index, prefer_rules);
  
  // In the input dictionary there must be all punctuation marks, including the end-of-sentece mark
   
  word = morpho_stream.get_next_word();
  
  while (word) {
    if (++nw%10000==0) cerr<<'.'<<flush;
    
    tags = word->get_tags();

    if (tags.size()>0)
      k = output[tags];

    delete word;
    word = morpho_stream.get_next_word();
  }
  cerr<<"\n";
  
  // OPEN AMBIGUITY CLASS
  // It contains all tags that are not closed.
  // Unknown words are assigned the open ambiguity class
  k=output[open_class];
  

  N = tags_index.size();  

  // Create ambiguity class holding one single tag for each tag.
  // If not created yet
  for(i=0; i<N; i++) {
    set<TTag> amb_class;
    amb_class.clear();
    amb_class.insert(i);
    k=output[amb_class];
  }

  M = output.size();
  
  /*
  //We check that all tags appears at least in one ambiguity class.
  //If not a warning message is reported and an ambiguity class holding 
  //the tag is created
  for (int i=0; i<N; i++) {
  bool appear=false;
  for (int k=0; k<M; k++) {
  if (output[k].find(i)!=output[k].end()) {
  appear=true;
  break;
  }
  }
  if (!appear) {
  char* tags_array[]=TAGS_ARRAY; 
  cerr<<"Warning: Tag '"<<tags_array[i]<<"' does not appear in any ambiguity class.\n";
  cerr<<"Warning: An ambiguity class for tag '"<<tags_array[i]<<"' was created.\n";
  set<TTag> amb_class;
  amb_class.insert(i);
  output.add(amb_class);  
  M++; //One omer ambiguity class
  }
  }
  */

  cerr<<N<<" states and "<<M<<" ambiguity classes\n";
  init();
}


Generated by  Doxygen 1.6.0   Back to index