/*
    token.c* - tokeniser functions for term input/output in IC-Prolog ][
    Written by Frank McCabe and Damian Chu
    Imperial College, Winter 1989

    Modifications :
    24/2/90		dac
	changed tokeniser to use only one token
*/

#include <stdio.h>
#include <ctype.h>
#include "defs.h"
#include "primitives.h"
#include "termio.h"
#include "io.h"
#include "parser_errors.h"
#include "read.h"

/*
   The set of graphs :
   '#','$','&','*','+','-','/',':','<','=','>','?','@','\','^','`','~'

   The set of puncts :
   '(', ')', '[', ']', '{', '}', ',', '.', '|', '%'
*/

char_t chtypes[]= {
    sep,   sep,   sep,   sep,	eof,   sep,   sep,   sep,
    sep,   sep,   sep,   sep,   sep,   sep,   sep,   sep,
    sep,   sep,   sep,   sep,   sep,   sep,   sep,   sep,
    sep,   sep,   sep,   sep,   sep,   sep,   sep,   sep,
    sep,   solo,  string,graph, lower, punct, graph, quoted,
    /* changed $ (36) from graph to lower */
    punct, punct, graph, graph, punct, graph, punct, graph,
    number,number,number,number,number,number,number,number,
    number,number,graph, solo,  graph, graph, graph, graph,
    graph, upper, upper, upper, upper, upper, upper, upper,
    upper, upper, upper, upper, upper, upper, upper, upper,
    upper, upper, upper, upper, upper, upper, upper, upper,
    upper, upper, upper, punct, graph, punct, graph, under,
    graph, lower, lower, lower, lower, lower, lower, lower,
    lower, lower, lower, lower, lower, lower, lower, lower,
    lower, lower, lower, lower, lower, lower, lower, lower,
    lower, lower, lower, punct, punct, punct, graph, sep,

		/* upper ASCII half */

    lower, lower, lower, lower, lower, lower, lower, lower,
    lower, lower, lower, lower, lower, lower, lower, lower,
    lower, lower, lower, lower, lower, lower, lower, lower,
    lower, lower, lower, lower, lower, lower, lower, lower,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, lower, lower,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, lower, lower,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, lower, lower, lower, lower, lower, lower,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, graph, graph,
    graph, graph, graph, graph, graph, graph, graph, graph
};

/* entry points for character level input output */
extern	void	syntax_error(twoBytes errcode, token **tok);

/* token buffer space management functions */
static	token	tk;			/* the global token */
static	token	*tokaddr = &tk;		/* address of the global token */
static	char	*last_char;		/* where to put next char of token */
static	char	*last_char_1; 
static	int	brace_depth	= 0;	/* to differentiate '.' from '. ' */
static	boolean	have_token	= FALSE;



/*------------------------------------------------------------*
 *              I O           F U N C T I O N S               *
 *------------------------------------------------------------*/
/*------------------------------------------------------------------------------
new_charin()

------------------------------------------------------------------------------*/

extern int _filbuf (FILE *);
extern int ungetc (int, FILE *);
extern int sscanf (const char *, const char *, ...);

CHARTYPE
new_charin(void)
{
    inc_read_count;
    if (string_read) {
	if (*string_read_pos)
            return((*string_read_pos++) & 0x7F);
	else
	    return(EOF);
    }
    else if (Qstdin != stdin)
        return(getc(Qstdin));
    else {
	CHARTYPE ch=getc(Qstdin);
	if (ch == EOF)
	    return(ch);
	if (stdinlast == '\n')
	    stdinposn = 0;
	if (stdinposn < BUFSIZ)
	    stdinbuf[stdinposn++] = ch;
	stdinlast = ch;
	return(ch);
    }
}
/*------------------------------------------------------------------------------
new_charback(c)

------------------------------------------------------------------------------*/
boolean
new_charback(char c)
{
    dec_read_count;
    if (string_read) {
	if ((c) == EOF) {
	    *string_read_pos=0;
	    return(TRUE);
	}
	else {
	    *--string_read_pos=(c);
	    return(TRUE);
	}
    }
    else if (Qstdin != stdin)
	return(ungetc(c, Qstdin) != EOF);
    else {
	boolean ch=ungetc(c, Qstdin);
	stdinlast = 0;
	if (stdinposn)
	    --stdinposn;
        return(ch != EOF);
    }
}

/*------------------------------------------------------------*
 *          U T I L I T Y     F U N C T I O N S               *
 *------------------------------------------------------------*/
/*-----------------------------------------------------------------------------
reset_brace()

-----------------------------------------------------------------------------*/
void
reset_brace(void)
{
    brace_depth = 0;
    have_token = FALSE;
}
/*---------------------------------------------------------------------------*/
#define	newtoken        last_char = tk.buff
#define cp_ch(ch)	*last_char++=(ch)

/*-----------------------------------------------------------------------------
toktype end_token(ch)  -   return a token which uses the buffer

-----------------------------------------------------------------------------*/
toktype end_token(char_t ch)
{
    cp_ch('\0');
    tk.bufflen = last_char-tk.buff;
    tk.tt = (toktype)ch;
    return((toktype)ch);
}
/*------------------------------------------------------------------------------
toktype number_token(n)   -    return a number token 

------------------------------------------------------------------------------*/
toktype number_token(fourBytes n)
{
    /* overload the bufflen field for numbers */
    tk.bufflen = n;
    tk.tt = (toktype)number;
    return((toktype)number);
}

/*------------------------------------------------------------------------------
toktype single_tok(ttype)     -       return a predefined token 

------------------------------------------------------------------------------*/
toktype single_tok(toktype ttype)
{
    tk.tt = ttype;
    return(ttype);
}
/*------------------------------------------------------------------------------
long int tok_len(t)   -     returns length of a token 

------------------------------------------------------------------------------*/
long int tok_len(token *t)
{
    switch(t->tt) {
	case eof:
	case space:
	case dot:
	case bra:
	case ket:
	case sqbra:
	case sqket:
	case brace:
	case endbrace:
	case comma:
	case semicolon: 	/* this is really a bar */
	    return(1);
	default:
	    return(strlen(t -> buff));
    }
}
/*------------------------------------------------------------------------------
dump_token(t)

------------------------------------------------------------------------------*/
/*
void dump_token(t)
token *t;
{
    switch(t->tt) {
	case eof:
	    printf("<EOF>");
	    break;
	case semicolon:
	    (void)putchar('|');
	    break;
	case space:
	case dot:
	case bra:
	case ket:
	case sqbra:
	case sqket:
	case brace:
	case endbrace:
	case comma:
	    (void)putchar(t->tt);
	    break;
	default:
	    printf("%s", t->buff);
	    break;
    }
}
*/
/*-----------------------------------------------------------------------------
get_exponent()   -      read exponent part of a floating-point number 

-----------------------------------------------------------------------------*/
void
get_exponent(void)
{
    register CHARTYPE ch = charin;
    cp_ch('e');
    if (ch == Minus) {
	cp_ch(ch);
	ch = charin;
    }
    if (chtype(ch) != number) {
	(void)end_token(floating);
	syntax_error(14, &tokaddr);
    }
    cp_ch(ch);
    while (chtype((ch=charin))==number)
	cp_ch(ch);
    charback(ch);
}


#define LAYOUT -2
/*-----------------------------------------------------------------------------
escape_char()      -       read an escaped character 

-----------------------------------------------------------------------------*/
CHARTYPE
escape_char(void)
{
    register CHARTYPE ch = charin;

    switch (ch) {
	case 'b': case 'B':	/* backspace */
		return('\b');

	case 't': case 'T':	/* tab */
		return('\t');

	case 'n': case 'N':	/* newline */
		return('\n');

	case 'v': case 'V':	/* vertical tab */
		return('\v');

	case 'f': case 'F':	/* formfeed */
		return('\f');

	case 'r': case 'R':	/* carriage return */
		return('\r');

	case 'e': case 'E':	/* escape */
		return('\033');

	case 'd': case 'D':	/* delete */
		return('\177');

	case '0': case '1':	/* octal string */
	case '2': case '3':
	case '4': case '5':
	case '6': case '7': {
		char octalchar = ch - '0';
		ch=charin;
		if (ch > '7' || ch < '0')
		    charback(ch);
		else {
		    octalchar = octalchar * 8 + ch - '0';
		    ch=charin;
		    if (ch > '7' || ch < '0')
			charback(ch);
		    else octalchar = octalchar * 8 + ch - '0';
		}
		return(octalchar);
	}

	case '^': 		/* control char */
		if ((ch=charin)==EOF)
		    return(EOF);
		else return(ch & 0x1F);

	case 'c': case 'C':	/* ignore layout chars */
		while ((ch=charin) <= 040 || ch >= 0177)
		    if (ch==EOF)
			return(EOF);
		charback(ch);
		return(LAYOUT);

	case 's': case 'S':	/* visible space */
		return(' ');

	case EOF:
		return(EOF);

	default:
		if (ch <= 040 || ch >= 0177)	/* layout */
		    return(LAYOUT);
		else return(ch);
    }
}


/*------------------------------------------------------------------------------
toktype nxtoken()      -      basic tokeniser   

------------------------------------------------------------------------------*/
toktype nxtoken(void)
{
    register CHARTYPE ch;

    newtoken;

    ch=charin;

tkrestart:

    if (ch == EOF)	/* this test must come after the tkrestart label */
	ch = '\004';	/* CTRL-D is end of file */

    switch(chtype(ch)) {
	case sep:
	    /*
	     * This is wrong.  It only returns the space when it is followed
	     * by a '('.  This makes skip in hedtoken and nextoken not doing
	     * the right job.
	    if ((ch=charin)==bra) 	/* space before opening bracket
	    {
		charback(ch);
		return(single_tok(space));
	    }
	    else
	        goto tkrestart;
	    */
	    /*
	     * Munch all the sep into a single one until the first non-sep is
	     * encountered.  Then, return as space.
	     */
	    while ((ch = charin) == sep)
	    {
		;
	    }
	    charback(ch);
	    return(single_tok(space));

	case punct:
	    if (ch==dot)
		if ((ch=charin)==EOF) 
		{
		    charback(ch);
		    return(single_tok(dot));
		}
		else if (chtype(ch)==sep) 
		{
		    if (brace_depth>0) 
		    {
			cp_ch(dot);
			cp_ch(space);
			return(end_token(graph));
		    }
		    else
			return(single_tok(dot));
		}
		else if (ch==endbrace) 
		{
		    charback(endbrace);
		    if (brace_depth) 
		    {
			cp_ch(dot);
			cp_ch(space);
			return(end_token(graph));
		    }
		    else
			return(single_tok(dot));
		}
		else 		/* we have a graph started by a dot ... */
		{
		    charback(ch);
		    /* read in the graph token */
		    cp_ch(dot);
		    while(chtype((ch=charin))==graph||ch==dot)
			cp_ch(ch);
		    charback(ch);
		    return(end_token(graph));
		}
	    else if (ch==percent) 		/* % - eol comment */
	    {
		while((ch=charin)!='\n')
		    if (ch==EOF)
			goto tkrestart;
		if ((ch=charin)==bra) 
		{
		    charback(ch);
		    ch=space;
		}
		goto tkrestart;
	    }
	/* 
         * The use of '|' as alternative to ';' gets confused
         * when ';' has precedence less than 1000.  So this 
         * alternative use is stopped.
	 *  else if (ch==bar) 		/* bars are mapped to semicolons
	 *  {
	 *	cp_ch(semicolon);
	 *	return(end_token((char_t)semicolon));
	 *  }
	 */
	    else if (ch==comma) 
	    {
		cp_ch(comma);
		return(end_token((char_t)comma));
	    }
	    else if (ch==brace)			/* starting a brace pair */
		brace_depth++;
	    else if (ch==endbrace)		/* closing a brace pair */
		if (brace_depth)
		    brace_depth--;

	    return(single_tok(ch));

	case solo:
	    cp_ch(ch);
	    return(end_token(solo));

	case graph: 
	{
	    char initial = ch;
	    ch=charin;

	    if (initial==slash && ch==star) 	/* we have a comment */
	    {
		for(;TRUE;) 
		{
		    while((ch=charin)!=star)
			if (ch==EOF)
			    goto tkrestart;
		    if ((ch=charin)==slash)
			break;
		    else charback(ch);
		}
		if ((ch=charin)==bra) 
		{
		    charback(ch);
		    ch=space;
		}
		goto tkrestart;
	    }
	    else
		cp_ch(initial);			/* start of a graph token */
	    while(chtype(ch)==graph||ch==dot) 	/* read in the graph token */
	    {
		cp_ch(ch);
		ch=charin;
	    }
	    charback(ch);		/* step back from extra char */

	    return(end_token(graph));
	}

	case upper: 
	{
	    register char_t tt;
	     
	    last_char_1 = last_char;
	    cp_ch(ch);
	    while((tt=chtype(ch=charin))==upper || tt==under || tt==lower
		  || tt==number)
		cp_ch(ch);
	    charback(ch);
	    return(end_token(upper));
	}

	case under:
	{
	    register char_t tt;
	     
	    last_char_1 = last_char;
	    cp_ch(ch);
	    while((tt=chtype(ch=charin))==upper || tt==under || tt==lower
		  || tt==number)
		cp_ch(ch);
	    charback(ch);
	    return(end_token(under));
	}

	case lower: 
	{
	    register char_t tt;
	    last_char_1 = last_char;
	     
	    cp_ch(ch);
	    while((tt=chtype(ch=charin))==upper || tt==under || tt==lower
		  || tt==number)
		cp_ch(ch);
	    charback(ch);
	    return(end_token(lower));
	}

	case number: 
	{
	    register fourBytes n = ch - '0';
	    FLOAT fl_num;
	     
	    cp_ch(ch);
	    while (chtype(ch=charin)==number) 
	    {
		n = n * 10 + ch - '0';
		cp_ch(ch);
	    }
            
	    if (ch == quote) 
	    {
		int base = n;
		if (base == 0) 	/* 0'<char> notation */
		{
		    while ((ch=charin)==backslash)
			if ((ch=escape_char())!=LAYOUT)
			    break;

		    if (ch==EOF)
			syntax_error(13, &tokaddr);
		    else 
		        return(number_token((fourBytes)ch));
		}
		else if (base <= 36) 
		{
		    boolean ok = FALSE;
		    n = 0;
		    for (;;) 
		    {
			int digit;
			ch = charin;

			if (ch >= '0' && ch <= '9')
			    digit = ch - '0';
			else if (ch >= 'a' && ch <= 'z')
			    digit = ch - 'a' + 10;
			else if (ch >= 'A' && ch <= 'Z')
			    digit = ch - 'A' + 10;
			else break;
			if (digit >= base)
			    break;
			n = n * base + digit;
			ok = TRUE;
		    }
		    charback(ch);
		    if (n==0 & !ok) 
		    {
			charback(quote);
			return(number_token((fourBytes)base));
		    }
		    else 
		        return(number_token(n));
		}
	    }
	    else if (ch == 'e' || ch == 'E') 
	    {
		get_exponent();
		return(end_token(floating));
	    }
	    else if (ch == dot) 
            {
		if (chtype((ch=charin))==number) 
		{
		    cp_ch(dot);
		    cp_ch(ch);
		    while (chtype((ch=charin))==number)
			cp_ch(ch);
		    if (ch == 'e' || ch == 'E')
			get_exponent();
		    else 
		        charback(ch);
		    return(end_token(floating));
		}
		else 
		{
		    charback(ch);
		    ch = dot;
		}
	    }

	    charback(ch);

	    /* test for overflow */
	    cp_ch('\0');
	    (void)sscanf(tk.buff, "%lg", &fl_num);
	    last_char--;
	    if (fl_num == n)
		return(number_token(n));
	    else 
	        return(end_token(floating));
	}

	case string:
more_string:
	    while((ch=charin)!=dquote) 
	    {
		if (ch==backslash) 
		{
		    if ((ch=escape_char())==LAYOUT)
			continue;
		}
		else if (ch == '\n') 
		{
		    (void)end_token(string);
		    syntax_error(9, &tokaddr);
		}

		if (ch==EOF) 
		{
		    (void)end_token(string);
		    syntax_error(10, &tokaddr);
		}
		else cp_ch(ch);
	    }

	    if ((ch=charin)==dquote) 
	    {
		cp_ch(dquote);
		goto more_string;
	    }
	    else 
	        charback(ch);
	    return(end_token(string));

	case quoted:
more_quoted:
	    while((ch=charin)!=quote) 
	    {
		if (ch==backslash) 
		{
		    if ((ch=escape_char())==LAYOUT)
			continue;
		}
		else if (ch == '\n') 
		{
		    (void)end_token(quoted);
		    syntax_error(11, &tokaddr);
		}

		if (ch == EOF) 
		{
		    (void)end_token(quoted);
		    syntax_error(12, &tokaddr);
		}
		else 
		    cp_ch(ch);
	    }

	    if ((ch=charin)==quote) 
	    {
		cp_ch(quote);
		goto more_quoted;
	    }
	    else 
	        charback(ch);
	    return(end_token(quoted));

	default:
	    return(single_tok((toktype)eof));
    }
}

      /* packaged up tokeniser to allow for token look ahead */

/*------------------------------------------------------------------------------
nextoken(tok,skip)   -    get the next token 

------------------------------------------------------------------------------*/
toktype nextoken(token **tok, int skip)
{
    toktype type = tk.tt;

    if (!have_token)
	type = nxtoken();
    else
	have_token=FALSE;

    while(skip && type==space)		/* skipping space tokens? */
	type = nxtoken();

    *tok=&tk;
    
    return(type);
}

/*-----------------------------------------------------------------------------
toktype hedtoken(tok,skip)   -       look ahead one token  

-----------------------------------------------------------------------------*/
toktype hedtoken(token **tok, int skip)
{
    toktype type = tk.tt;

    
    
    if (!have_token) 
    {
	type = nxtoken();
	have_token = TRUE;
    }
    while(skip && type==space)		/* skipping space tokens? */
	type = nxtoken();

    *tok=&tk;
    
    return(type);
}

/*------------------------------------------------------------------------------
hedchar()      -        look at character following next token 

------------------------------------------------------------------------------*/
CHARTYPE hedchar(void)
{
    CHARTYPE ch;
    if (!have_token) 
    {
	(void) nxtoken();
	have_token = TRUE;
    }

    ch = charin;
    charback(ch);
    return ch;
}
