Logo Search packages:      
Sourcecode: apertium version File versions  Download package

TransferWord.C

/*
 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */

#include <apertium/TransferWord.H>
#include <iostream>

map<char const *, regex_t>  TransferWord::compiled_regexes;

void
00026 TransferWord::copy(TransferWord const &o)
{
  s = o.s;
  t = o.t;
  s_str = o.s_str;
  t_str = o.t_str;
}

void
00035 TransferWord::destroy()
{
}

string
00040 TransferWord::access(map<char const *, pair<int, int> > &m, string &str, char const *part)
{
  map<char const *, pair<int, int> >::iterator it = m.find(part);

  if(it == m.end())
  {
    regmatch_t result;
    int errorcode = regexec(&(getRegex(part)), str.c_str(), 1, &result, 0);

    
    if(errorcode != 0)
    {
      m[part] = pair<int, int>(0,0);
      return "";
    }
    else
    {
      m[part] = pair<int, int>(result.rm_so, result.rm_eo);
      return str.substr(result.rm_so, result.rm_eo - result.rm_so);
    }
  }
  else
  {
    return str.substr(it->second.first, it->second.second - it->second.first);
  }
}

regex_t &
00068 TransferWord::getRegex(const char *part)
{
  map<char const *, regex_t>::iterator it;
  it = TransferWord::compiled_regexes.find(part);

  if(it != compiled_regexes.end())
  {
    return it->second;
  }
  else
  {
    int errorcode = regcomp(&(TransferWord::compiled_regexes[part]),
                      part, REG_EXTENDED|REG_ICASE);
    regexError(part, errorcode);
    return compiled_regexes[part];
  }
}

void 
00087 TransferWord::regexError(char const *message, int const errorcode)
{
  if(errorcode != 0)
  {
    cerr << "Error: Compilation for regular expression for '"
         << message << "' failed." << endl;
    exit(EXIT_FAILURE);
  } 
}

00097 TransferWord::TransferWord()
{
}

00101 TransferWord::TransferWord(string const &src, string const &tgt)
{
  init(src, tgt);
}

00106 TransferWord::~TransferWord()
{
  destroy();
}

00111 TransferWord::TransferWord(TransferWord const &o)
{
  copy(o);
}

TransferWord &
00117 TransferWord::operator =(TransferWord const &o)
{
  if(this != &o)
  {
    destroy();
    copy(o);
  }
  return *this;
}

void
00128 TransferWord::init(string const &src, string const &tgt)
{
  s.clear();
  t.clear();
  s_str = src;
  t_str = tgt;
}

string
00137 TransferWord::source(char const *part)
{
  return access(s, s_str, part);
}

string
00143 TransferWord::target(char const *part)
{
  return access(t, t_str, part);
}

void
00149 TransferWord::assign(map<char const *, pair<int, int> > &m, string &str, char const *part,
            string const &value)
{
  map<char const *, pair<int, int> >::iterator it = m.find(part);

  if(it == m.end())
  {
    regmatch_t result;
    int errorcode = regexec(&(getRegex(part)), str.c_str(), 1, &result, 0);
    if(errorcode != 0)
    {
      return;
    }
    m[part] = pair<int, int>(result.rm_so, result.rm_eo);
  }

  string newstr = str.substr(0, m[part].first);
  newstr.append(value);
  newstr.append(str.substr(m[part].second));
  str = newstr;

  int dif = value.size() - (m[part].second - m[part].first);
  int const lowlimit = m[part].first;
  int const uplimit = m[part].second;
  for(it = m.begin(); it != m.end(); it++)
  {
    if(it->second.first >= uplimit)
    {
      it->second.first += dif;
      it->second.second += dif;
    } 
    else if(it->second.first > lowlimit)
    {
      // invalidate overlapping patterns
      m.erase(it);
    }
    else if(it->second.second >= uplimit)
    {
      it->second.second += dif;
    }
    else if(it->second.second > lowlimit)
    {
      // invalidate overlapping patterns
      m.erase(it);
    }
  }
}

void
TransferWord::setSource(char const *part, string const &value)
{
  assign(s, s_str, part, value);
}

void
TransferWord::setTarget(char const *part, string const &value)
{
  assign(t, t_str, part, value);
}

Generated by  Doxygen 1.6.0   Back to index