// Copyright 2000 by Kevin Atkinson under the terms of the LGPL

#include "as_config.h"

#include "language.hh"
#include "invalid_word.hh"
#include <fstream>
#include <vector>
#include <cassert>
#include <pspell/string.hh>
#include <pspell/getdata.hh>

#include "file_exceps.hh"

#include "string_map.hh"
#include "config.hh"
#include "file_util.hh"
#include "split.hh"
#include "clone_ptr-t.hh"
#include "file_data_util.hh"

namespace aspell {

  // FIXME: The "c" might conflict with ConfigData Use of taht slot
  //   work on a policy to avoid that such resering the first half
  //   for ConfigData's use and the otehr for users.
  static const KeyInfo lang_config_keys[] = {
    {"charset",             KeyInfoString, "iso8859-1", ""}
    , {"name",                KeyInfoString, "", ""}
    , {"run-together",        KeyInfoBool,   "", "", "c"}
    , {"run-together-limit",  KeyInfoInt,    "", "", "c"}
    , {"run-together-middle", KeyInfoString, "", ""}
    , {"run-together-min",    KeyInfoInt,    "", "", "c"}
    , {"soundslike",          KeyInfoString, "generic", ""}
    , {"special",             KeyInfoString, "", ""}
    , {"ignore-accents" ,     KeyInfoBool, "", "", "c"}
    , {"use-soundslike" ,     KeyInfoBool, "",  ""}
    , {"keyboard",            KeyInfoString, "standard", "", "c"} 
  };
  
  void Language::setup(string lang, Config * config) 
  {
    if (!config)      config = new Config();
    if (lang.empty()) lang   = config->retrieve("lang");

    string dir1, dir2;
    fill_data_dir(config, dir1, dir2);

    //
    // get_lang_info
    //

    ConfigData data("aspell-lang",
		    lang_config_keys, 
		    lang_config_keys + sizeof(lang_config_keys)/sizeof(KeyInfo));
    string path;
    find_file(path,dir1,dir2,lang,".dat");
    data.read_in_file(path);

    if (!data.have("name"))
      throw BadFileFormat(path, "The required field \"name\" is missing.");

    name_         = data.retrieve("name");
    charset_      = data.retrieve("charset");
    mid_chars_    = data.retrieve("run-together-middle");

    vector<string> special_data = split(data.retrieve("special"));
    for (vector<string>::iterator i = special_data.begin();
	 i != special_data.end();
	 ++i) 
      {
	char c = (*i)[0];
	++i;
	special_[to_uchar(c)] = 
	  SpecialChar ((*i)[0] == '*',(*i)[1] == '*',(*i)[2] == '*');
      }

    //
    //
    //

    Emulation<const KeyInfo *,PspellKeyInfoEmulation> els 
      = data.possible_elements(false);
    const KeyInfo * k;
    while ((k = els.next()) != 0) {
      if (k->otherdata[0] == 'c' 
	  && data.have(k->name) && !config->have(k->name))
	config->replace(k->name, data.retrieve(k->name).c_str());
    }
	    
  
    //
    // fill_in_tables
    //
  
    ifstream char_data;
    string char_data_name;
    find_file(char_data_name,dir1,dir2,charset_,".dat");
    char_data.open(char_data_name.c_str());
    if (!char_data)
      throw CantReadFile(char_data_name);
    
    string temp;
    getline(char_data,temp);
    getline(char_data,temp);
    for (int i = 0; i != 256; ++i) {
      char_data >> to_uni_[i];
      char_data >> temp;
      char_type_[i] = temp == "letter" ? letter 
	: temp == "space"  ? space 
	: other;
      int num;
      char_data >> num; to_lower_[i]    = static_cast<char>(num);
      char_data >> num; to_upper_[i]    = static_cast<char>(num);
      char_data >> num; to_title_[i]    = static_cast<char>(num);
      char_data >> num; to_sl_[i]       = static_cast<char>(num);
      char_data >> num; to_stripped_[i] = static_cast<char>(num);
      char_data >> num; de_accent_[i] = static_cast<char>(num);
      if (char_data.peek() != '\n') 
	throw BadFileFormat(char_data_name);
    }
    
    //
    //
    //
    
    for (int i = 0; i != 256; ++i) 
      to_normalized_[i] = 0;

    int c = 1;
    for (int i = 0; i != 256; ++i) {
      if (is_alpha(i)) {
	if (to_normalized_[to_uchar(to_stripped_[i])] == 0) {
	  to_normalized_[i] = c;
	  to_normalized_[to_uchar(to_stripped_[i])] = c;
	  ++c;
	} else {
	  to_normalized_[i] = to_normalized_[to_uchar(to_stripped_[i])];
	}
      }
    }
    for (int i = 0; i != 256; ++i) {
      if (to_normalized_[i]==0) to_normalized_[i] = c;
    }
    max_normalized_ = c;

    //
    // 
    // 

    normalize_mid_characters(*this,mid_chars_);

    //
    // prep phonetic code
    //
    
    soundslike_.reset(new_soundslike(data.retrieve("soundslike"), 
				     dir1, dir2,
				     this));
    soundslike_chars_ = soundslike_->soundslike_chars();
    
  }

  bool SensitiveCompare::operator() (const char * word, 
				     const char * inlist) const
  {
    // this will fail if word or inlist is empty
    assert (*word != '\0' && *inlist != '\0');
    
    // if begin inlist is a begin char then it must match begin word
    // chop all begin chars from the begin of word and inlist  
    if (lang->special(*inlist).begin) {
      if (*word != *inlist)
	return false;
      ++word, ++inlist;
    } else if (lang->special(*word).begin) {
      ++word;
    }
    
    // this will fail if word or inlist only contain a begin char
    assert (*word != '\0' && *inlist != '\0');
    
    if (case_insensitive) {
      if (ignore_accents) {

	while (*word != '\0' && *inlist != '\0') 
	  ++word, ++inlist;

      } else if (strip_accents) {

	while (*word != '\0' && *inlist != '\0') {
	  if (lang->to_lower(*word) != lang->de_accent(lang->to_lower(*inlist)))
	    return false;
	  ++word, ++inlist;
	}

      } else {

	while (*word != '\0' && *inlist != '\0') {
	  if (lang->to_lower(*word) != lang->to_lower(*inlist))
	    return false;
	  ++word, ++inlist;
	}

      }
    } else {
      //   (note: there are 3 possible casing lower, upper and title)
      //   if is lower begin inlist then begin word can be any casing
      //   if not                   then begin word must be the same case
      bool case_compatible = true;
      if (!ignore_accents) {
	if (strip_accents) {
	  if (lang->to_lower(*word) != lang->de_accent(lang->to_lower(*inlist)))
	    return false;
	} else {
	  if (lang->to_lower(*word) != lang->to_lower(*inlist))
	    return false;
	}
      }
      if (!lang->is_lower(*inlist) && lang->de_accent(*word) != lang->de_accent(*inlist))
	case_compatible = false;
      bool all_upper = lang->is_upper(*word);
      ++word, ++inlist;
      while (*word != '\0' && *inlist != '\0') {
	if (lang->char_type(*word) == Language::letter) {
	  if (!lang->is_upper(*word))
	    all_upper = false;
	  if (ignore_accents) {
	    if (lang->de_accent(*word) != lang->de_accent(*inlist))
	      case_compatible = false;
	  } else if (strip_accents) {
	    if (*word != lang->de_accent(*inlist))
	      if (lang->to_lower(*word) != lang->de_accent(lang->to_lower(*inlist)))
		return false;
	      else // accents match case does not
		case_compatible = false;
	  } else {
	    if (*word != *inlist)
	      if (lang->to_lower(*word) != lang->to_lower(*inlist))
		return false;
	      else // accents match case does not
		case_compatible = false;
	  }
	}
	++word, ++inlist;
      }
      //   if word is all upper than casing of inlist can be anything
      //   otherwise the casing of tail begin and tail inlist must match
      if (all_upper) 
	case_compatible = true;
      if (!case_compatible) 
	return false;
    }
    if (*inlist != '\0') ++inlist;
    assert(*inlist == '\0');
  
    //   if end   inlist is a end   char then it must match end word
    if (lang->special(*(inlist-1)).end) {
      if (*(inlist-1) != *(word-1))
	return false;
    }
    return true;
  }

  static void invalid_char(const char * word, char letter, const char * where)
  {
    string m;
    m += "The character '";
    m += letter;
    m += "' may not appear at the ";
    m += where;
    m += " of a word.";
    throw InvalidWord(word, m);
  }

  void check_if_valid(const Language & l, const char * word) {
    if (*word == '\0') 
      throw InvalidWord(word, "Empty string.");
    const char * i = word;
    if (l.char_type(*i) != Language::letter) {
      if (!l.special(*i).begin)
	invalid_char(word, *i, "beginning");
      else if (l.char_type(*(i+1)) != Language::letter)
	throw InvalidWord(word, "Does not contain any letters.");
    }
    for (;*(i+1) != '\0'; ++i) { 
      if (l.char_type(*i) != Language::letter) {
	if (!l.special(*i).middle)
	  invalid_char(word, *i, "middle");
      }
    }
    if (l.char_type(*i) != Language::letter) {
      if (!l.special(*i).end)
	invalid_char(word, *i, "end");
    }
  }

  void normalize_mid_characters(const Language & l, string & s) 
  {
    assert (s.size() < 4);
    for (unsigned int i = 0; i != s.size(); ++i) 
    {
      s[i] = l.to_lower(s[i]);
    }
    // now sort it
    if (s.size() == 3) 
    {
      if (s[0] < s[1])
	swap(s[0], s[1]);
      if (s[1] < s[2])
	swap(s[1], s[2]);
    } 
    if (s.size() >= 2) 
    {
      if (s[0] < s[1])
	swap(s[0], s[1]);
    }
    
  }

}
