Logo Search packages:      
Sourcecode: apertium version File versions  Download package

Transfer.C

/*
 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */
#include <apertium/Transfer.H>
#include <apertium/TRXReader.H>
#include <lttoolbox/Compression.H>
#include <lttoolbox/XMLParseUtil.H>

#include <cctype>
#include <iostream>
#include <stack>

using namespace std;

void
Transfer::copy(Transfer const &o)
{
}

void
Transfer::destroy()
{
  if(me)
  {
    delete me;
    me = NULL;
  }
  if(doc)
  {
    xmlFreeDoc(doc);
    doc = NULL;
  }  
}

Transfer::Transfer()
{
  me = NULL;
  doc = NULL;
  root_element = NULL;
  lastrule = NULL;
}

Transfer::~Transfer()
{
  destroy();
}

Transfer::Transfer(Transfer const &o)
{
  copy(o);
}

Transfer &
Transfer::operator =(Transfer const &o)
{
  if(this != &o)
  {
    destroy();
    copy(o);
  }
  return *this;
}

void 
Transfer::readData(FILE *in)
{
  alphabet.read(in);
  any_char = alphabet(TRXReader::ANY_CHAR);
  any_tag = alphabet(TRXReader::ANY_TAG);

  Transducer t;
  t.read(in);
  
  map<int, int> finals;  
  
  // finals
  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
  {
    int key = Compression::multibyte_read(in);
    finals[key] = Compression::multibyte_read(in);
  }  
  
  me = new MatchExe(t, finals);
 
  // attr_items

  // fixed attr_items
  attr_items["lem"] = "(([^<]|\"\\<\")+)";
  attr_items["lemq"] = "(#[ _][^<]+)";
  attr_items["lemh"] = "(([^<#]|\"\\<\"|\"\\#\")+)";
  attr_items["whole"] = ".+";
  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
  {
    int size_k = Compression::multibyte_read(in);
    char cad_k[size_k+1];
    fread(cad_k, sizeof(char), size_k, in);
    cad_k[size_k] = 0;
    
    int size_v = Compression::multibyte_read(in);
    char cad_v[size_v+1];
    fread(cad_v, sizeof(char), size_v, in);
    cad_v[size_v] = 0;
    
    attr_items[cad_k] = cad_v;
  }

  // variables
  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
  {
    int size_k = Compression::multibyte_read(in);
    char cad_k[size_k+1];
    fread(cad_k, sizeof(char), size_k, in);
    cad_k[size_k] = 0;
    
    int size_v = Compression::multibyte_read(in);
    char cad_v[size_v+1];
    fread(cad_v, sizeof(char), size_v, in);
    cad_v[size_v] = 0;
    
    variables[cad_k] = cad_v;
  }

  // macros
  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
  {
    int size_k = Compression::multibyte_read(in);
    char cad_k[size_k+1];
    fread(cad_k, sizeof(char), size_k, in);
    cad_k[size_k] = 0;
    
    macros[cad_k] = Compression::multibyte_read(in);
  }


  // lists
  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
  {
    int size_k = Compression::multibyte_read(in);
    char cad_k[size_k+1];
    fread(cad_k, sizeof(char), size_k, in);
    cad_k[size_k] = 0;
    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
    {
      int size_v = Compression::multibyte_read(in);
      char cad_v[size_v+1];
      fread(cad_v, sizeof(char), size_v, in);
      cad_v[size_v] = 0;
      lists[cad_k].insert(cad_v);
      listslow[cad_k].insert(tolower(cad_v));
    }  
  }
}

void
Transfer::readBil(string const &fstfile)
{
  FILE *in = fopen(fstfile.c_str(), "r");
  if(!in)
  {
    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
    exit(EXIT_FAILURE);
  }
  fstp.load(in);
  fstp.initBiltrans();
  fclose(in);
}

void
Transfer::read(string const &transferfile, string const &datafile,
             string const &fstfile)
{
  readTransfer(transferfile);
  
  // datafile
  FILE *in = fopen(datafile.c_str(), "r");
  if(!in)
  {
    cerr << "Error: Could not open file '" << datafile << "'." << endl;
    exit(EXIT_FAILURE);
  }
  readData(in);
  fclose(in);
  
  readBil(fstfile);
}

void
Transfer::readTransfer(string const &in)
{
  doc = xmlReadFile(in.c_str(), NULL, 0);
  
  if(doc == NULL)
  {
    cerr << "Error: Could not parse file '" << in << "'." << endl;
    exit(EXIT_FAILURE);
  }
  
  root_element = xmlDocGetRootElement(doc);
  
  // search for macros & rules
  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
      {
        collectMacros(i);
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
      {
        collectRules(i);
      }
    } 
  }
}

void
Transfer::collectRules(xmlNode *localroot)
{
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      for(xmlNode *j = i->children; ; j = j->next)
      {
        if(j->type == XML_ELEMENT_NODE && !xmlStrcmp(j->name, (const xmlChar *) "action"))
        {
          rule_map.push_back(j);
          break;
        }
      }
    }
  }
}

void
Transfer::collectMacros(xmlNode *localroot)
{
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      macro_map.push_back(i);
    }
  }
}

string 
Transfer::evalString(xmlNode *element)
{
  map<xmlNode *, TransferInstr>::iterator it;
  it = evalStringCache.find(element);
  if(it != evalStringCache.end())
  {
    TransferInstr &ti = it->second;
    switch(ti.getType())
    {
      case ti_clip_sl:
        return word[ti.getPos()]->source(ti.getContent().c_str());

      case ti_clip_tl:
        return word[ti.getPos()]->target(ti.getContent().c_str());

      case ti_var:
        return variables[ti.getContent()];
        
      case ti_lit_tag:
      case ti_lit:
        return ti.getContent();
        
      case ti_b:
        if(ti.getPos() >= 0)
        {
          return !blank?"":*(blank[ti.getPos()]);
        }
        return " ";
        
      case ti_get_case_from:
        return copycase(word[ti.getPos()]->source(ti.getContent().c_str()),
                        evalString((xmlNode *) ti.getPointer()));
      
      case ti_case_of_sl:
        return caseOf(word[ti.getPos()]->source(ti.getContent().c_str()));
      
      case ti_case_of_tl:
        return caseOf(word[ti.getPos()]->target(ti.getContent().c_str()));
      
      default:
        return "";
    }
  }

  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
  {
    int pos = 0;
    xmlChar *part = NULL, *side = NULL;

    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
      {
      side = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
      {
      part = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
      {
      pos = atoi((const char *)i->children->content) - 1;
      }
    }
      
    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
    {
      evalStringCache[element] = TransferInstr(ti_clip_sl, attr_items[(const char *) part], pos);

    }
    else
    {
      evalStringCache[element] = TransferInstr(ti_clip_tl, attr_items[(const char *) part], pos);
    }
  }
  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
  {
    evalStringCache[element] = TransferInstr(ti_lit_tag, 
                                             tags((const char *) element->properties->children->content), 0);                                            
  }
  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
  {
    evalStringCache[element] = TransferInstr(ti_lit, XMLParseUtil::latin1(element->properties->children->content), 0);
  }
  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
  {
    if(element->properties == NULL)
    {
      evalStringCache[element] = TransferInstr(ti_b, " ", -1);
    }
    else
    {
      int pos = atoi((const char *) element->properties->children->content) - 1;
      evalStringCache[element] = TransferInstr(ti_b, "", pos);
    }
  }
  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
  {
    int pos = atoi((const char *) element->properties->children->content) - 1;
    xmlNode *param = NULL;
    for(xmlNode *i = element->children; i != NULL; i = i->next)
    {
      if(i->type == XML_ELEMENT_NODE)
      {
      param = i;
      break;
      }
    }

    evalStringCache[element] = TransferInstr(ti_get_case_from,
                                             attr_items["lem"], pos, param);
  }
  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
  {
    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
  }
  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
  {
    int pos = 0;
    xmlChar *part = NULL, *side = NULL;

    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
      {
      side = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
      {
      part = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
      {
      pos = atoi((const char *) i->children->content) - 1;
      }
    }
      
    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
    {
      evalStringCache[element] = TransferInstr(ti_case_of_sl, attr_items[(const char *) part], pos);
    }
    else
    {
      evalStringCache[element] = TransferInstr(ti_case_of_tl, attr_items[(const char *) part], pos);
    }    
  }
  else
  {
    cerr << "Error: unexpected rvalue expression '" << element->name << endl;
    exit(EXIT_FAILURE);
  }

  return evalString(element);
}

void
Transfer::processOut(xmlNode *localroot)
{
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
      {
      fputc_unlocked('^', output);
      for(xmlNode *j = i->children; j != NULL; j = j->next)
      {
        if(j->type == XML_ELEMENT_NODE)
        {
          fputs_unlocked(evalString(j).c_str(), output);
          }
      }
      fputc_unlocked('$', output);
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
      {
      fputc_unlocked('^', output);
      bool first_time = true;
      for(xmlNode *j = i->children; j != NULL; j = j->next)
      {
        if(j->type == XML_ELEMENT_NODE)
        {
          if(!first_time)
          {
            fputc_unlocked('+', output);
          }
          else
          {
            first_time = false;
          }

          for(xmlNode *k = j->children; k != NULL; k = k->next)
          {
            if(k->type == XML_ELEMENT_NODE)
            {
            fputs_unlocked(evalString(k).c_str(), output);
            }
          }
        }
      }
      fputc_unlocked('$', output);
      }
      else
      {
        fputs_unlocked(evalString(i).c_str(), output);
      }
    }
  }
}

void
Transfer::processInstruction(xmlNode *localroot)
{
  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
  {
    processChoose(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
  {
    processLet(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
  {
    processOut(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
  {
    processCallMacro(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
  {
    processModifyCase(localroot);
  }
}

void
Transfer::processLet(xmlNode *localroot)
{

  xmlNode *leftSide = NULL, *rightSide = NULL;

  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      if(leftSide == NULL)
      {
      leftSide = i;
      }
      else
      {
      rightSide = i;
      break;
      }
    }
  }

  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
  if(it != evalStringCache.end())
  {
    TransferInstr &ti = it->second;
    switch(ti.getType())
    {
      case ti_var:
        variables[ti.getContent()] = evalString(rightSide);
        return;
        
      case ti_clip_sl:
        word[ti.getPos()]->setSource(ti.getContent().c_str(), evalString(rightSide));
        return;
      
      case ti_clip_tl:
        word[ti.getPos()]->setTarget(ti.getContent().c_str(), evalString(rightSide));
        return;      
        
      default:
        return;
    }
  }
  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
  {
    string const val = (const char *) leftSide->properties->children->content;
    variables[val] = evalString(rightSide);
    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
  }
  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
  {
    int pos = 0;
    xmlChar *part = NULL, *side = NULL;

    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
      {
      side = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
      {
      part = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
      {
      pos = atoi((const char *) i->children->content) - 1;
      }
    }
    
    if(!xmlStrcmp(side, (const xmlChar *) "tl"))
    {
      word[pos]->setTarget(attr_items[(const char *) part].c_str(), evalString(rightSide));
      evalStringCache[leftSide] = TransferInstr(ti_clip_tl, attr_items[(const char *) part], pos);
    }
    else
    {
      word[pos]->setSource(attr_items[(const char *) part].c_str(), evalString(rightSide));
      evalStringCache[leftSide] = TransferInstr(ti_clip_sl, attr_items[(const char *) part], pos);
    }    
  }
}

void
Transfer::processModifyCase(xmlNode *localroot)
{
  xmlNode *leftSide = NULL, *rightSide = NULL;

  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      if(leftSide == NULL)
      {
      leftSide = i;
      }
      else
      {
      rightSide = i;
      break;
      }
    }
  }

  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
  {
    int pos = 0;
    xmlChar *part = NULL, *side = NULL;

    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
      {
      side = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
      {
      part = i->children->content;
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
      {
      pos = atoi((const char *) i->children->content) - 1;
      }
    }
    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
    {
      string const result = copycase(evalString(rightSide), 
                             word[pos]->source(attr_items[(const char *) part].c_str()));
      word[pos]->setSource(attr_items[(const char *) part].c_str(), result);
    }
    else
    {
      string const result = copycase(evalString(rightSide), 
                             word[pos]->target(attr_items[(const char *) part].c_str()));
      word[pos]->setTarget(attr_items[(const char *) part].c_str(), result);
    }
  }
  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
  {
    string const val = (const char *) leftSide->properties->children->content;
    variables[val] = copycase(evalString(rightSide), variables[val]);
  }
}

void
Transfer::processCallMacro(xmlNode *localroot)
{
  const char *n = (const char *) localroot->properties->children->content;
  int npar = 0;

  xmlNode *macro = macro_map[macros[n]];

  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
  {
    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
    {
      npar = atoi((const char *) i->children->content);
      break;
    }
  }

  TransferWord **myword = NULL;
  if(npar > 0)
  {
    myword = new TransferWord *[npar];  
  }
  string **myblank = NULL;
  if(npar > 1)
  {
    myblank = new string *[npar - 1];
  }

  int idx = 0;
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      int pos = atoi((const char *) i->properties->children->content)-1;
      myword[idx] = word[pos];
      if(idx-1 >= 0)
      {
        myblank[idx-1] = blank[pos];
      }
      idx++;
    }
  }

  swap(myword, word);
  swap(myblank, blank);
  
  for(xmlNode *i = macro->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      processInstruction(i);
    }
  }

  swap(myword, word);
  swap(myblank, blank);

  if(myword)
  {
    delete myword;
  }
  if(myblank)
  {
    delete myblank;
  }
}

void
Transfer::processChoose(xmlNode *localroot)
{
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
      {
        bool picked_option = false;
        
      for(xmlNode *j = i->children; j != NULL; j = j->next)
      {
        if(j->type == XML_ELEMENT_NODE)
        {
          if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
          {
            if(!processTest(j))
            {
            break;
            }
            else
            {
              picked_option = true;
              }
          }
          else
          {
            processInstruction(j);
          }
        }
      }
        if(picked_option)
        {
          return;
        }   
      }
      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
      {
      for(xmlNode *j = i->children; j != NULL; j = j->next)
      {
        if(j->type == XML_ELEMENT_NODE)
        {
          processInstruction(j);
        }
      }
      }
    }
  }
}

bool
Transfer::processLogical(xmlNode *localroot)
{
  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
  {
    return processEqual(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
  {
    return processOr(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
  {
    return processAnd(localroot);
  }
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
  {
    return processNot(localroot);
  } 
  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
  {
    return processIn(localroot);
  }

  return false;
}

bool
Transfer::processIn(xmlNode *localroot)
{
  xmlNode *value = NULL;
  xmlChar *idlist = NULL;

  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    { 
      if(value == NULL)
      {
      value = i;
      }
      else
      {
      idlist = i->properties->children->content;
      break;
      }
    }
  }

  string sval = evalString(value);

  if(localroot->properties != NULL)
  {
    if(!xmlStrcmp(localroot->properties->children->content, 
              (const xmlChar *) "yes"))
    {
      set<string> &myset = listslow[(const char *) idlist];
      if(myset.find(tolower(sval)) != myset.end())
      {
      return true;
      }
      else
      {
      return false;
      }
    }
  }

  set<string> &myset = lists[(const char *) idlist];
  if(myset.find(sval) != myset.end())
  {
    return true;
  }
  else
  {
    return false;
  }
}

bool
Transfer::processTest(xmlNode *localroot)
{
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      return processLogical(i);
    }
  }  
  return false;
}

bool
Transfer::processAnd(xmlNode *localroot)
{
  bool val = true;
  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      val = val && processLogical(i);
    }
  }

  return val;
}

bool
Transfer::processOr(xmlNode *localroot)
{
  bool val = false;
  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      val = val || processLogical(i);
    }
  }

  return val;
}

bool
Transfer::processNot(xmlNode *localroot)
{
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      return !processLogical(i);
    }
  }
  return false;
}

bool
Transfer::processEqual(xmlNode *localroot)
{
  xmlNode *first = NULL, *second = NULL;

  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      if(first == NULL)
      {
        first = i;
      }
      else
      {
      second = i;
      break;
      }
    }
  }

  if(localroot->properties == NULL)
  {
    return evalString(first) == evalString(second);
  }
  else
  {
    if(!xmlStrcmp(localroot->properties->children->content,
              (const xmlChar *) "yes"))
    {
      return evalString(first) == evalString(second);
    }
    else
    {
      return evalString(first) == evalString(second);
    }
  }
}

string
Transfer::copycase(string const &source_word, string const &target_word)
{
  string result = target_word;

  bool firstupper = isupper(source_word[0]);
  bool uppercase = firstupper && isupper(source_word[source_word.size()-1]);
  bool sizeone = source_word.size() == 1;

  if(!uppercase || (sizeone && uppercase))
  {
    for(unsigned int i = 0; i < target_word.size(); i++)
    {
      result[i] = ::tolower(target_word[i]);
    }
  }
  else
  {
    for(unsigned int i = 0; i < target_word.size(); i++)
    {
      result[i] = ::toupper(target_word[i]);
    }
  }
  
  if(firstupper)
  {
    result[0] = ::toupper(result[0]);
  }
   
  return result;
}

string 
Transfer::caseOf(string const &str)
{
  if(str.size() > 1)
  {
    if(!isupper(str[0]))
    {
      return "aa";
    }
    else if(!isupper(str[str.size()-1]))
    {
      return "Aa";
    }
    else
    {
      return "AA";
    }
  }
  else if(str.size() == 1)
  {
    if(!isupper(str[0]))
    {
      return "aa";
    }
    else
    {
      return "Aa";
    }
  }
  else
  {
    return "aa";
  }
}

string
Transfer::tolower(string const &str) const
{
  string result = str;
  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
  {
    result[i] = ::tolower(result[i]);
  }

  return result;
}

string
Transfer::tags(string const &str) const
{
  string result = "<";

  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
  {
    if(str[i] == '.')
    {
      result.append("><");
    }
    else
    {
      result += str[i];
    }
  }
  
  result += '>';

  return result;
}

void
Transfer::processRule(xmlNode *localroot)
{
  // localroot is suposed to be an 'action' tag
  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
  {
    if(i->type == XML_ELEMENT_NODE)
    {
      processInstruction(i);
    }
  }
}

TransferToken &
Transfer::readToken(FILE *in)
{
  if(!input_buffer.isEmpty())
  {
    return input_buffer.next();
  }

  string content = "";
  while(true)
  {
    int val = fgetc_unlocked(in);
    if(feof(in))
    {
      return input_buffer.add(TransferToken(content, tt_eof));
    }
    if(val == '\\')
    {  
      content += '\\';
      content += char(fgetc_unlocked(in));
    }
    else if(val == '[')
    {
      content += '[';
      while(true)
      {
      int val2 = fgetc_unlocked(in);
      if(val2 == '\\')
      {
        content += '\\';
        content += char(fgetc_unlocked(in));
      }
      else if(val2 == ']')
      {
        content += ']';
        break;
      }
      else
      {
        content += char(val2);
      }
      }
    }
    else if(val == '$')
    {
      return input_buffer.add(TransferToken(content, tt_word));
    }
    else if(val == '^')
    {
      return input_buffer.add(TransferToken(content, tt_blank));
    }
    else
    {
      content += char(val);
    }
  }
}

void
Transfer::transfer(FILE *in, FILE *out)
{
  int last = 0;

  output = out;
  ms.init(me->getInitial());
  
  while(true)
  {
    if(ms.size() == 0)
    {
      if(lastrule != NULL)
      {
      applyRule();
      input_buffer.setPos(last);
      }
      else
      {
      if(tmpword.size() != 0)
      {
        string tr = fstp.biltrans(*tmpword[0], false);
        if(tr.size() != 0)
        {
          fputc_unlocked('^', output);
          fputs_unlocked(tr.c_str(), output);
          fputc_unlocked('$', output);
        }
        tmpword.clear();
        input_buffer.setPos(last);
        input_buffer.next();       
        last = input_buffer.getPos();
        ms.init(me->getInitial());
      }
      else if(tmpblank.size() != 0)
      {
        fputs_unlocked(tmpblank[0]->c_str(), output);
        tmpblank.clear();
        last = input_buffer.getPos();
        ms.init(me->getInitial());
      }
      }
    }
    int val = ms.classifyFinals(me->getFinals());
    if(val != -1)
    {
      lastrule = rule_map[val-1];      
      last = input_buffer.getPos();
    }

    TransferToken &current = readToken(in);
   
    switch(current.getType())
    {
      case tt_word:
      applyWord(current.getContent());
        tmpword.push_back(&current.getContent());
      break;

      case tt_blank:
      ms.step(' ');
      tmpblank.push_back(&current.getContent());
      break;

      case tt_eof:
      if(tmpword.size() != 0)
      {
        tmpblank.push_back(&current.getContent());
        ms.clear();
      }
      else
      {
        fputs_unlocked(current.getContent().c_str(), output);
        return;
      }
      break;

      default:
      cerr << "Error: Unknown input token." << endl;
      return;
    }
  }
}

void
Transfer::applyRule()
{
  unsigned int limit = tmpword.size();
  
  for(unsigned int i = 0; i != limit; i++)
  {
    if(i == 0)
    {
      word = new TransferWord *[limit];
      if(limit != 1)
      {
        blank = new string *[limit - 1];
      }
      else
      {
        blank = NULL;
      }
    }
    else
    {
      blank[i-1] = tmpblank[i-1];
    }
    word[i] = new TransferWord(*tmpword[i], fstp.biltrans(*tmpword[i], false));
  }

  processRule(lastrule);
  lastrule = NULL;

  if(word)
  {
    for(unsigned int i = 0; i != limit; i++)
    {
      delete word[i];
    }
    delete word;
  }
  if(blank)
  {
    delete blank;
  }
  word = NULL;
  blank = NULL;
  tmpword.clear();
  tmpblank.clear();
  ms.init(me->getInitial());
}

void
Transfer::applyWord(string const &word_str)
{
  ms.step('^');
  for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
  {
    switch(word_str[i])
    {
      case '\\':
        i++;
      ms.step(::tolower(word_str[i]), any_char);
      break;

      case '<':
      for(unsigned int j = i+1; j != limit; j++)
      {
        if(word_str[j] == '>')
        {
          int symbol = alphabet(word_str.substr(i, j-i+1));
          if(symbol)
          {
            ms.step(symbol, any_tag);
          }
          else
          {
            ms.step(any_tag);
          }
          i = j;
          break;
        }
      }
      break;
      
      default:
      ms.step(::tolower(word_str[i]), any_char);
      break;
    }
  }
  ms.step('$');
}

Generated by  Doxygen 1.6.0   Back to index