// This file is part of PUMA.
// Copyright (C) 1999-2003  The PUMA developer team.
//                                                                
// This program is free software;  you can redistribute it and/or 
// modify it under the terms of the GNU General Public License as 
// published by the Free Software Foundation; either version 2 of 
// the License, or (at your option) any later version.            
//                                                                
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
// GNU General Public License for more details.                   
//                                                                
// You should have received a copy of the GNU General Public      
// License along with this program; if not, write to the Free     
// Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
// MA  02111-1307  USA                                            

#include "Puma/CScanner.h"
#include "Puma/ErrorStream.h"
#include "Puma/Source.h"
#include "Puma/Token.h"
#include "Puma/Location.h"
#include "Puma/Unit.h"
#include "Puma/CTokens.h"
#include "Puma/ScanBuffer.h"

#include <string.h>

namespace Puma {


void CScanner::configure (const Config &c) {
  // evaluate config file
  for (unsigned i = 0; i < c.Options (); i++) {
    if (! strcmp (c.Option (i)->Name (), "--lang-c")) {
      allow_std_c (true); allow_std_cplusplus (false); allow_aspectc (false);
    } else if (! strcmp (c.Option (i)->Name (), "--lang-c++")) {
      allow_std_c (true); allow_std_cplusplus (true); allow_aspectc (false);
    } else if (! strcmp (c.Option (i)->Name (), "--lang-ac++")) {
      allow_std_c (true); allow_std_cplusplus (true); allow_aspectc (true);
    }
  }
}


LanguageID CScanner::map_lang (CRecognizer::Lang lang) {
  switch (lang) {
    case CRecognizer::COMMENT: return Token::comment_id;
    case CRecognizer::PRE_DIR: return Token::pre_id;
    case CRecognizer::COMP_DIR: return Token::dir_id;
    case CRecognizer::WHITE: return Token::white_id;
    case CRecognizer::PRE: return Token::macro_op_id;
    case CRecognizer::CORE: return Token::cpp_id;
    case CRecognizer::KEYWORD: return Token::keyword_id;
    case CRecognizer::ID: return Token::identifier_id;
    case CRecognizer::WILDCARD: return Token::wildcard_id;
    case CRecognizer::UNKNOWN: return LanguageID (0);
  }
  return LanguageID (0);
}


Token *CScanner::make_token (CRecognizer::Lang lang, int expr, int len) {
  Token *result = 0;
  char short_buffer[512];
  char *tok_buffer = short_buffer;
  int rows = 0;
  int columns = 0;
  int offset = loc.column ();
  Mode old_mode = mode ();
  
  // dynamically allocate a huge buffer
  if (len >= (int)sizeof (short_buffer) - 1)
    tok_buffer = new char[len + 1];

  // select the correct language id
  if (lang == CRecognizer::UNKNOWN)
    expr = Token::ID_UNKNOWN;
  LanguageID lid;
  if (old_mode == CRecognizer::IN_COMP_DIR && lang != CRecognizer::COMMENT)
    lid = Token::dir_id;
  else {
    if (lang == CRecognizer::CORE && expr == TOK_OPEN_ROUND)
      lid = Token::open_id;
    else if (lang == CRecognizer::CORE && expr == TOK_COMMA)
      lid = Token::comma_id;
    else if (lang == CRecognizer::CORE && expr == TOK_CLOSE_ROUND)
      lid = Token::close_id;
    else
      lid = map_lang (lang);
  }

//  volatile int z, s = 0;
//  for (z = 0; z < 100; z++)
//    s += z;
  char *src  = buffer ().token ();
  if (!buffer ().new_line (len)) {
    // a token without newline => can be copied directly
    memcpy (tok_buffer, src, len);
    tok_buffer[len] = '\0';
    result = new Token (expr, lid, tok_buffer);
    result->location (loc);
    loc.setup (loc.filename (), loc.line (), loc.column () + len);
  }
  else {
    // special token with newline => more complicated
    char *dest = tok_buffer;
    char *end  = src + len;
  
    int last = 0;
    Array<int> *cl = 0;
    while (src < end) {
      if (*src == '\\' && src + 1 < end && *(src + 1) == '\n') {
        src += 2;
        rows++;
        columns = 1;
        offset = 0;
        if (!cl) cl = new Array<int>(10,10);
        cl->append (last);
        last = 0;
      }
      else if (*src == '\\' && src + 2 < end && *(src + 1) == '\x0d' &&
               *(src + 2) == '\n') {
        src += 3;
        rows++;
        columns = 1;
        offset = 0;
        if (!cl) cl = new Array<int>(10,10);
        cl->append (last);
        last = 0;
      }
      else {
        if (*src == '\n') {
          rows++;
          columns = 1;
          offset = 0;
          mode (NORMAL);
          allow_directives ();
        } else {
          columns++;
        }
        *dest = *src;
        dest++;
        src++;
        last++;
      }
    }
    *dest = '\0';
  
    result = new Token (expr, lid, tok_buffer);
    result->location (loc);
  
    // set the next token location
    if (rows > 0 || columns > 0)
      loc.setup (loc.filename (), loc.line () + rows, offset + columns);
  
    // attach the continuation line marks
    if (cl) result->cont_lines (cl);
  }
  // eventually accept the token
  buffer ().accept (len);

  // free a dynamically allocated huge buffer
  if (len >= (int)sizeof (short_buffer))
    delete[] tok_buffer;

  return result;
}


void CScanner::scan_all (Unit &unit) {

  loc.setup (unit.name () ? unit.name () : "<anonymous unit>", 1, 1);

  while (buffer ().state () == CScanBuffer::STATE_OK ||
      buffer ().state () == CScanBuffer::STATE_NEW) {

    CRecognizer::Lang lang;
    int expr, len;
    int result = recognize (lang, expr, len); 

//    if (lang == WHITE && mode () == NORMAL && !buffer ().new_line (len)) {
//      // eventually accept the token
//      buffer ().accept (len);
//      continue;
//    }
//    if (lang == COMMENT) {
//      // eventually accept the token
//      buffer ().accept (len);
//      continue;
//    }
    
    if (result >= 0) {
      Token *new_token = make_token (lang, expr, len); 
      unit.append (*new_token);
//      cout << loc << " token " << new_token->type () << ": " << new_token->text () << endl;
    }
    else {
      err << sev_error << loc 
        << "Error while scanning tokens" << endMessage;
      break;
    }
  }
}


void CScanner::fill_unit (Source &in, Unit &unit) {

  int size = in.size ();
  if (size == 0)
    return;
  if (size == -1) {
    err << sev_error << "can't scan file of unknown size" << endMessage;
    return;
  }
  char *buf = new char[size];
	if (in.read (buf, size) != size) {
    err << sev_error << "can't load input file" << endMessage;
	  return;
	}
  setup ();
  buffer ().init (buf, size);
  scan_all (unit);
  delete[] buf;
}


void CScanner::fill_unit (const char *in, Unit &unit) {
  setup ();
  buffer ().init (in);
  scan_all (unit);
}

   
} // namespace Puma
