/***************************************************************************

  read.c

  Lexical parser

  (c) 2000-2004 Benot Minisini <gambas@users.sourceforge.net>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 1, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

***************************************************************************/

#define _READ_C

#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <stdio.h>
#include <ctype.h>

#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <dirent.h>

#include "gb_common.h"
#include "gb_error.h"
#include "gb_table.h"
#include "gb_file.h"

#include "gbc_compile.h"
#include "gbc_class.h"
#include "gbc_read.h"


/*#define DEBUG*/

PRIVATE boolean is_init = FALSE;
PRIVATE COMPILE *comp;
PRIVATE long source_ptr;
PRIVATE int last_pattern_type = -1;
PRIVATE PATTERN last_pattern = 0;
PRIVATE boolean begin_line = FALSE;

PRIVATE void READ_init(void)
{
  JOB->line = 1;
}


/* We declare the well known class to the compiler. See trans_expr.c */

#if 0
PRIVATE void READ_exit(void)
{
  static const char *wkclass[] = { "gb", "Class", "File", "Error", "Application", "System", "Collection", "Process", "Component", "Components", "Object", "Class", NULL };
  long index;
  const char **wkc;
  FILE *file;
  char buffer[256];
  int len;
  DIR *dir;
  struct dirent *dirent;
  const char *name;

  for (wkc = wkclass; *wkc; wkc++)
  {
    if (TABLE_find_symbol(JOB->class->table, *wkc, strlen(*wkc), NULL, &index))
      CLASS_add_class(JOB->class, index);
  }

  if (JOB->class_file)
  {
    file = fopen(JOB->class_file, "r");
    if (file)
    {
      for(;;)
      {
        if (!fgets(buffer, sizeof(buffer), file))
          break;

        len = strlen(buffer);

        if (len > 0 && buffer[len - 1] == '\n')
          len--;

        if (len == 0)
          continue;

        if (TABLE_find_symbol(JOB->class->table, buffer, len, NULL, &index))
          CLASS_add_class(JOB->class, index);
      }
      fclose(file);
    }
    else
      THROW("Cannot open file: &1", JOB->class_file);
  }
  else
  {
    dir = opendir(FILE_get_dir(COMP_project));
    if (dir)
    {
      while ((dirent = readdir(dir)) != NULL)
      {
        name = dirent->d_name;
        if (*name == '.')
          continue;

        if ((strcasecmp(FILE_get_ext(name), ".module") == 0)
            || (strcasecmp(FILE_get_ext(name), ".class") == 0))
        {
          name = FILE_get_basename(name);
          if (TABLE_find_symbol(JOB->class->table, name, strlen(name), NULL, &index))
            CLASS_add_class(JOB->class, index);
        }
      }

      closedir(dir);
    }
  }
}
#endif

PRIVATE void READ_exit(void)
{
  char *p, *p2;
  long index;
  
  p = COMP_classes;

  for(;;)
  {
    p2 = strchr(p, '\n');
    if (p2 == p)
      break;
    
    if (TABLE_find_symbol(JOB->class->table, p, p2 - p, NULL, &index))
      CLASS_add_class(JOB->class, index);
    
    p = p2 + 1;
  }
}



PUBLIC void READ_dump_pattern(PATTERN *pattern)
{
  int type = PATTERN_type(*pattern);
  long index = PATTERN_index(*pattern);
  long pos;

  pos = (long)(pattern - JOB->pattern);
  if (pos >= 0 && pos < ARRAY_count(JOB->pattern))
    printf("%ld ", pos);

  if (PATTERN_flag(*pattern) & RT_FIRST)
    printf("!");
  else
    printf(" ");

  if (PATTERN_flag(*pattern) & RT_POINT)
    printf(".");
  else
    printf(" ");

  printf(" ");

  if (type == RT_RESERVED)
    printf("RESERVED     %s\n", TABLE_get_symbol_name(COMP_res_table, index));
  else if (type == RT_NUMBER)
    printf("NUMBER       %s\n", TABLE_get_symbol_name(JOB->class->table, index));
  else if (type == RT_IDENTIFIER)
    printf("IDENTIFIER   %s\n", TABLE_get_symbol_name(JOB->class->table, index));
  else if (type == RT_STRING)
    printf("STRING       %s\n", TABLE_get_symbol_name(JOB->class->string, index));
  else if (type == RT_TSTRING)
    printf("TSTRING      %s\n", TABLE_get_symbol_name(JOB->class->string, index));
  else if (type == RT_NEWLINE)
    printf("NEWLINE      (%ld)\n", index);
  else if (type == RT_END)
    printf("END\n");
  else if (type == RT_PARAM)
    printf("PARAM        %ld\n", index);
  else if (type == RT_SUBR)
    printf("SUBR         %s\n", COMP_subr_info[index].name);
  else
    printf("?            %ld\n", index);
}


PRIVATE unsigned char get_char_offset(int offset)
{
  offset += source_ptr;

  if (offset >= BUFFER_length(comp->source) || offset < 0)
    return 0;
  else
    return (unsigned char)(comp->source[offset]);
}


PRIVATE unsigned char get_char(void)
{
  return get_char_offset(0);
}


PRIVATE unsigned char next_char(void)
{
  source_ptr++;
  return get_char();
}


PRIVATE void add_pattern(int type, long index)
{
  PATTERN *pattern;

  /*
  if (index > 100)
    THROW("add_pattern", "Too many words.");
  */

  pattern = ARRAY_add(&comp->pattern);

  *pattern = PATTERN_make(type, index);
  last_pattern_type = type;
  last_pattern = *pattern;

  #ifdef DEBUG
  READ_dump_pattern(pattern);
  #endif

}


PRIVATE void add_newline()
{
  add_pattern(RT_NEWLINE, comp->line);
  /*source_ptr++;*/

  comp->line++;
}


PRIVATE void add_end()
{
  add_pattern(RT_END, 0);
  /*source_ptr++;*/

  comp->line++;
}


PRIVATE bool is_number()
{
  int pos = 0;
  unsigned char car;
  unsigned char car2;

  car = get_char_offset(pos);

  if (car == '-' || car == '+')
  {
    car = get_char_offset(-1);
    if (car && !isspace(car))
      return FALSE;
    pos++;
    car = get_char_offset(pos);
  }

  if (isdigit(car))
    return TRUE;

  car2 = toupper(get_char_offset(pos + 1));

  if (car == '&')
  {  
    if (car2 == 'H')
    {
      pos += 2;
      goto __HEX;
    }
    
    if (car2 == 'X')
    {
      pos += 2;
      goto __BIN;
    }
    
    pos++;
    goto __HEX;
  }
  else if (car == '%')
  {
    pos ++;
    goto __BIN;
  }
  else
    return FALSE;
  
__HEX:
 
  car = get_char_offset(pos);
  return (isdigit(car) || index("abcdefABCDEF", car) != NULL);
  
__BIN:
   
  car = get_char_offset(pos);
  return (car == '0' || car == '1');
}


PRIVATE void add_number()
{
  unsigned char car;
  long start;
  long index;

  start = source_ptr;
  car = get_char();

  if (car == '-' || car == '+')
    car = next_char();

  if (car == '&')
  {
    car = toupper(next_char());

    if (car == 'H')
      goto READ_HEXA;
    else if (car == 'X')
      goto READ_BINARY;
    else
    {
      source_ptr--;
      goto READ_HEXA;
    }
  }
  else if (car == '%')
    goto READ_BINARY;
  else
    goto READ_NUMBER;

READ_BINARY:

  for (;;)
  {
    car = next_char();
    if (car != '0' && car != '1')
      break;
  }

  if (car == '&')
    car = next_char();

  goto END;

READ_HEXA:

  for (;;)
  {
    car = next_char();
    if (!isxdigit(car))
      break;
  }

  if (car == '&')
    car = next_char();

  goto END;

READ_NUMBER:

  while (isdigit(car))
    car = next_char();

  if (car == '.')
  {
    do
    {
      car = next_char();
    }
    while (isdigit(car));
  }

  if (toupper(car) == 'E')
  {
    car = next_char();
    if (car == '+' || car == '-')
      car = next_char();

    while (isdigit(car))
      car = next_char();
  }

  goto END;

END:

  TABLE_add_symbol(comp->class->table, &comp->source[start], source_ptr - start, NULL, &index);

  add_pattern(RT_NUMBER, index);
}


PRIVATE void add_identifier(bool no_res)
{
  unsigned char car;
  long start;
  int len;
  long index;
  int type;
  boolean not_first;
  boolean can_be_reserved;
  boolean can_be_subr;
  boolean is_type;
  boolean last_func, last_declare, last_type;

  start = source_ptr;
  len = 1;

  for(;;)
  {
    source_ptr++;
    car = get_char();
    if (car == 0 || ((!isalnum(car)) && (strchr("$_?", (int)car) == NULL)))
      break;
    len++;
  }

  if (no_res)
  {
    if (get_char() == '}')
      source_ptr++;
    goto IDENTIFIER;
  }

  /* On peut mettre ':' dans un identifieur,  condition qu'il ne soit pas en dernier ! */
  /*
  if (comp->source[start + len - 1] == ':')
  {
    source_ptr--;
    len--;
  }
  */

  not_first = PATTERN_is(last_pattern, RS_PT) || PATTERN_is(last_pattern, RS_EXCL);
  last_func = PATTERN_is(last_pattern, RS_PROCEDURE) || PATTERN_is(last_pattern, RS_SUB) || PATTERN_is(last_pattern, RS_FUNCTION);
  last_declare = PATTERN_is(last_pattern, RS_PUBLIC) || PATTERN_is(last_pattern, RS_PRIVATE) 
                 || PATTERN_is(last_pattern, RS_DIM) || PATTERN_is(last_pattern, RS_PROPERTY) 
                 || PATTERN_is(last_pattern, RS_READ) || PATTERN_is(last_pattern, RS_INHERITS);
  last_type = PATTERN_is(last_pattern, RS_AS) || PATTERN_is(last_pattern, RS_NEW);

  car = get_char();
  /*while (car == ' ')
    car = next_char();*/

  can_be_reserved = !not_first && TABLE_find_symbol(COMP_res_table, &comp->source[start], len, NULL, &index);
  if (can_be_reserved)
  {
    if (index == RS_ME || index == RS_NEW || index == RS_LAST)
    {
      /* Toujours rserv, ne rien faire */
    }
    else if (index == RS_CLASS)
    {
      can_be_reserved = begin_line && isspace(car);
    }
    else
    {
      is_type = PATTERN_is_type(PATTERN_make(RT_RESERVED, index));

      if (is_type && (car == '[') && (get_char_offset(1) == ']'))
      {
        len += 2;
        source_ptr += 2;
        is_type = FALSE;
        can_be_reserved = FALSE;
      }
      else
      {
        if (index == RS_NEW)
          is_type = TRUE;

        if (last_type)
          can_be_reserved = is_type;
        else if (last_func)
          can_be_reserved = FALSE;
        else
          can_be_reserved = !is_type && (car != ':') && (car != '.') && (car != '!') && (car != '(');
      }
    }
  }

  can_be_subr = !not_first && !last_func && !last_declare && !last_type && (car != '.' && car != '!');

  if (can_be_reserved)
  {
    type = RT_RESERVED;
  }
  else if (can_be_subr && TABLE_find_symbol(COMP_subr_table, &comp->source[start], len, NULL, &index))
  {
    type = RT_SUBR;
    /*index = COMP_subr_info[index].opcode;*/
  }
  else
    goto IDENTIFIER;

  add_pattern(type, index);
  return;

IDENTIFIER:

  TABLE_add_symbol(comp->class->table, &comp->source[start], len, NULL, &index);
  type = RT_IDENTIFIER;
  add_pattern(type, index);
}


PRIVATE void add_operator()
{
  unsigned char car;
  long start;
  int len;
  long op = NO_SYMBOL;
  long index;

  start = source_ptr;
  len = 1;

  for(;;)
  {
    source_ptr++;

    if (TABLE_find_symbol(COMP_res_table, &comp->source[start], len, NULL, &index))
      op = index;
    else if (op != NO_SYMBOL)
    {
      source_ptr--;
      break;
    }

    car = get_char();
    if (!ispunct(car))
      break;
    len++;
  }

  if (op < 0)
    THROW(E_SYNTAX);

  add_pattern(RT_RESERVED, op);
}


PRIVATE int xdigit_val(unsigned char c)
{
  c = tolower(c);

  if (c >= '0' && c <= '9')
    return (c - '0');
  else if (c >= 'a' && c <= 'f')
    return (c - 'a' + 10);
  else
    return (-1);
}

PRIVATE void add_string()
{
  unsigned char car;
  long start;
  int len;
  long index;
  int newline;
  bool jump;
  char *p;
  int i;

  start = source_ptr;
  len = 0;
  newline = 0;
  jump = FALSE;
  p = &comp->source[source_ptr];

  for(;;)
  {
    source_ptr++;
    car = get_char();

    if (jump)
    {
      if (car == '\n')
        newline++;
      else if (car == '"')
        jump = FALSE;
      else if (!isspace(car))
        break;
    }
    else
    {
      p++;
      len++;

      if (car == '\n')
        THROW("Non terminated string");

      if (car == '\\')
      {
        source_ptr++;
        car = get_char();

        if (car == 'n')
          *p = '\n';
        else if (car == 't')
          *p = '\t';
        else if (car == 'r')
          *p = '\r';
        else if (car == '\"' || car == '\'' || car == '\\')
          *p = car;
        else
        {
          if (car == 'x')
          {
            i = xdigit_val(get_char_offset(1));
            if (i >= 0)
            {
              car = i;
              i = xdigit_val(get_char_offset(2));
              if (i >= 0)
              {
                car = (car << 4) | i;
                *p = car;
                source_ptr += 2;
                continue;
              }
            }
          }

          THROW("Bad character constant in string");
        }
      }
      else if (car == '"')
      {
        p--;
        len--;
        jump = TRUE;
      }
      else
        *p = car;
    }
  }

  TABLE_add_symbol(comp->class->string, &comp->source[start + 1], len, NULL, &index);

  add_pattern(RT_STRING, index);

  for (i = 0; i < newline; i++)
    add_newline();
}


PUBLIC void READ_do(void)
{
  unsigned char car;

  comp = JOB;

  if (!is_init)
    READ_init();

  source_ptr = 0;
  begin_line = TRUE;

  while (source_ptr < BUFFER_length(comp->source))
  {
    car = get_char();

    if (isspace(car))
    {
      source_ptr++;
      if (car == '\n')
      {
        add_newline();
        begin_line = TRUE;
      }
      continue;
    }

    if (car == '\'')
    {
      do
      {
        source_ptr++;
        car = get_char();
      }
      while (car != '\n' && car != 0);

      begin_line = FALSE;
      continue;
    }

    if (car == '"')
    {
      add_string();
      begin_line = FALSE;
      continue;
    }

    if (isalpha(car) || car == '_' || car == '$')
    {
      add_identifier(FALSE);
      begin_line = FALSE;
      continue;
    }

    if (car == '{')
    {
      source_ptr++;
      add_identifier(TRUE);
      begin_line = FALSE;
      continue;
    }

    if (is_number())
    {
      add_number();
      begin_line = FALSE;
      continue;
    }

    add_operator();
    begin_line = FALSE;
  }

  /* On ajoute des marqueurs de fin pour simplifier le travail du compilateur
     lorsqu'il examine des patterns  l'avance (pas plus de quatre !) */
  add_newline();
  add_end();
  add_end();
  add_end();
  add_end();

  READ_exit();
}
