/** here are some utilities for reading in the input file and so forth **/
/* author: Avrim Blum, 1995.  */
#include<stdlib.h>
#include<stdio.h>
#include<ctype.h>
#include<strings.h>
#include "header.h"
char *desired_event[] = {"add-event", "copy-event"};
int desired_len[] = {9,10};
int num_desired_events = 2;
char example_name[100];   /* the name of the current example */
example_t the_example[MAX_FEATURES]; /* NO LONGER IN USE */

char *day_of_week_features[] =
  { 
    "most-common-time-these-attendees",
    "day-of-week-of-last-meeting-with-these-attendees",
    "department-attendees",
    "percent-friday-used-last-60-days",
    "cmu-attendees?",
    "req-event-type",
    "req-course-name",
    "req-speakers",
    "group-attendees?",
    "single-person?",
    "historical-immediacy",
    "req-seminar-type",
    "action",
    "position-attendees",
    "known-attendees?",
    "day-of-week-of-next-meeting-with-these-attendees"
    };
char *req_start_time_features[] = 
  {
    "day-of-week-of-last-meeting-with-these-attendees",
    "req-event-type",
    "req-course-name",
    "req-speakers",
    "single-person?",
    "cmu-attendees?",
    "group-attendees?",
    "position-attendees",
    "department-attendees",
    "sponsor-attendees",
    "known-attendees?",
    "action",
    "day-of-week",
    "req-seminar-type",
    "most-common-time-these-attendees-last-60-days"
    };
char *req_duration_features[] =  
  {
    "req-seminar-type",
    "known-attendees?",
    "sponsor-attendees",
    "department-attendees",
    "position-attendees",
    "group-attendees?",
    "cmu-attendees?",
    "req-course-name",
    "group-name",
    "single-person?",
    "req-event-type"
    };
char *req_location_features[] = 
  {
    "sponsor-attendees",
    "req-seminar-type",
    "department-attendees",
    "position-attendees",
    "group-attendees?", 
    "req-course-name",
    "department-speakers",
    "group-name", 
    "lunchtime?",
    "single-person?",
    "number-of-person",
    "req-event-type"
    };
char *bigset_features[] = 
   {
     "req-seminar-type",
     "known-attendees?",
     "sponsor-attendees",
     "department-attendees",
     "position-attendees",
     "group-attendees?",
     "cmu-attendees?",
     "req-course-name",
     "department-speakers",
     "req-speakers",
     "group-name",
     "lunchtime?",
/*     "dow-percent-morning-used-last-60-days",   */
     "single-person?",
     "number-of-person",
     "time-until-next-meeting-with-these-attendees",
     "time-since-last-meeting-with-these-attendees",
     "third-most-common-time-last-60-days-this-meeting-type",
     "third-most-common-time-last-60-days",
     "second-most-common-time-last-60-days-this-meeting-type",
     "second-most-common-time-last-60-days",
     "most-common-time-these-attendees-last-60-days",
     "most-common-time-these-attendees",
     "most-common-time-last-60-days-this-meeting-type",
     "most-common-time-last-60-days",
     "most-common-day-these-attendees-last-60-days",
     "most-common-day-these-attendees",
     "historical-immediacy-last-60-days",
     "historical-immediacy",
     "duration-of-next-meeting-with-these-attendees",
     "duration-of-last-meeting-with-these-attendees",
     "day-of-week-of-next-meeting-with-these-attendees",
     "day-of-week-of-last-meeting-with-these-attendees",
     "req-sorted-attendees",
     "req-event-type"
   };

char *DayneRich_features[] = 
   {
     "time-until-next-meeting-with-these-attendees",
     "time-since-last-meeting-with-these-attendees",
     "third-most-common-time-last-60-days-this-meeting-type",
     "third-most-common-time-last-60-days",
     "second-most-common-time-last-60-days-this-meeting-type",
     "second-most-common-time-last-60-days",
     "most-common-time-these-attendees-last-60-days",
     "most-common-time-these-attendees",
     "most-common-time-last-60-days-this-meeting-type",
     "most-common-time-last-60-days",
     "most-common-day-these-attendees-last-60-days",
     "most-common-day-these-attendees",
     "historical-immediacy-last-60-days",
     "historical-immediacy",
     "duration-of-next-meeting-with-these-attendees",
     "duration-of-last-meeting-with-these-attendees",
     "day-of-week-of-next-meeting-with-these-attendees",
     "day-of-week-of-last-meeting-with-these-attendees",
     "req-location",
     "req-sorted-attendees",
     "req-event-type",
     "req-seminar-type",
     "req-course-name",
     "req-speakers",
     "single-person?",
     "action",
     "cmu-attendees?",
     "group-attendees?",
     "position-attendees",
     "department-attendees",
     "sponsor-attendees",
     "known-attendees?"
   };


feature_set the_feature_set[] = {
  {"req-location set", req_location_features, 12},
  {"req-duration set", req_duration_features, 11},
  {"req-start-time set", req_start_time_features, 15},
  {"day-of-week set", day_of_week_features, 16},
  {"big set", bigset_features, 34},
  {"Dayne & Rich set", DayneRich_features, 32}
};

int num_feature_sets = 6;

char to_predict[100];  /* This is the name of  what we want to predict */


/***read in current example, looking for desired features and putting them
  into the argument "example".  Note: not assuming they're in any specific
  order.  If it can't find all the features (i.e., some were missing)
  then it returns FMISSING.  It returns -1 on EOF.
  Once all desired features are read in, it scans to the end of the example.
  This routine also finds the value of the feature we want to predict
  (using global "to_predict") and puts into global "correct_ans".
  Returns NORESULT if the "result" slot is *inferred.novalue* or *novalue*
  NOTE: "result" cannot be a feature in set.
 ***/
int 
read_current_example(FILE *fp, char *feature[], int num, example_t example[])
{
  char name[100], junk[30];
  int c,i,l, returnval = 0, num_found=0, found_answer=0;
  while(1) {
    if (fscanf(fp,"%s",name) != 1) {          /* feature name */
      returnval = -1;
      break;
    }
    if (strcmp(name+1,"result") == SAME) { /* check for novalue */
      read_next_object(fp,junk,30);
      if (strcmp(junk,"*novalue*") == SAME || 
	  strcmp(junk,"*inferred.novalue*") == SAME) {
	returnval = NORESULT;
	read_to_end_of_list(fp);  /* scan past rest of example */
	break;
      }
      else continue;
    }
    /* find feature in "feature" if any */
    for(i=0; i < num; ++i)
      if (strcmp(name+1, feature[i]) == SAME) break; /*(stripping off paren)*/
      
    if (i != num) { /* it's a useful one */
/*      printf("(%s %s)\n",feature[i], example[i]);  */ /*******/
      read_next_object(fp, example[i], INPUT_LEN);
      ++num_found;
    }
    if (strcmp(name+1,to_predict) == SAME) { /* feature to predict */
      if (i==num) read_next_object(fp, correct_ans, INPUT_LEN);
      else strcpy(correct_ans, example[i]);
      found_answer = 1;
    } else if (i==num) {                          /* no match */
      read_to_end_of_list(fp);                    /* so junk rest of item */
      if ((c = getc(fp)) == ')') break;           /* hit end of example */
    }
  }
  if (returnval == 0 && !found_answer) returnval = FMISSING;
  return returnval;
}
    
/*** reads until see one more closing paren than opening paren ***/
void read_to_end_of_list(FILE *fp)
{
  int letter, count=0;
  while((letter = getc(fp)) != EOF) {
    if (letter == '(') ++count;
    if (letter == ')') --count;
    if (count < 0) return;
  }
}

/* read next object into str. "object" is a string or a list.  If it's
   a list, then strip off parentheses and concatenate with + signs.
   puts into "str" up to len-1 characters only (and terminates with '\0'). 
   Returns EOF on EOF. Reads through the extra parenthesis.
 */
int read_next_object(FILE *fp, char str[], int len)
{
  int count = 0, c=0, num = 0;
  char *ptr = str;
  while (isspace(c = getc(fp)));  /* get to first non-space character */
  ungetc(c,fp);
  while ((c = getc(fp)) != EOF) {
    if (isspace(c) && num < len-1) {*ptr++ = '+'; ++num;}
    else if (c == '(') ++count;
    else if (c == ')') --count;
    else if (num < len-1) {*ptr++ = (char) c; ++num;}
    if (count < 0) break;
  }
  *ptr = '\0';
  return c;
}

/* return string corresponding to event name, or NULL on EOF.
   Also, puts the name of the event into the global "example_name".
 */
char * find_next_desired_event(FILE *fp)
{
  int letter, i;
  while(1) {
    /* read up to first real letter */
    while(!isalnum(letter = getc(fp)) && letter != EOF);
    if (letter == EOF) break;
    ungetc(letter,fp);
    fscanf(fp,"%s",example_name);
    for(i=0; i < num_desired_events; ++i) {
      if (strncmp(example_name,desired_event[i], desired_len[i]) == SAME) {
	while ((letter = getc(fp)) != '\n'); /* read throuhgh newline */
	return example_name;
      }
    }
    read_to_end_of_list(fp);
  }
  return NULL;
}


/* Read up to desired date.  Returns string, or NULL */
char *read_upto_date(FILE *fp, int month, int day, int year)
{
  char *line, *ptr;
  int curmonth, curday, curyear;
  while((line = find_next_desired_event(fp)) != NULL) {
    for(ptr = line; *ptr != '-'; ++ptr); /* go to first dash */
    /* now get month,day,year */
    sscanf(ptr,"-event-%d-%d-%d",&curmonth, &curday, &curyear);
    if ((curyear > year) ||
	(curyear == year && curmonth > month) ||
	(curyear == year && curmonth == month && curday >= day))
      return line;
  }
}

int oldmain(void)
{
  char filename[150], resp[50];
  FILE *fp;
  int month,day,year, i;
  printf("Name of file to read: ");
  scanf("%s",filename);
  if ((fp = fopen(filename,"r")) == NULL) {
    printf("can't open file '%s'.\n", filename);
    exit(1);
  }
  printf("what date to skip up to (month,day,year)? ");
  scanf("%d%d%d",&month,&day,&year);
  printf("%s\n",read_upto_date(fp,month,day,year));
  while(1) {
    printf("avaliable feature sets: ");
    for(i=0; i < num_feature_sets; ++i) 
      printf("%s (%d), ", the_feature_set[i].name, i);
    printf("\nenter desired feature set number: ");
    scanf("%d",&i);
    printf("next example (using %s) is:\n", the_feature_set[i].name);
    if (read_current_example(fp, the_feature_set[i].features,
			     the_feature_set[i].num_features,
			     the_example) == -1)
      printf("example %s not in correct order.\n", example_name);
    find_next_desired_event(fp);
  }
}
    
/********** some more utilities ***********/

/* given an array of integers, takes the best choice by majority vote, 
   breaking ties by using lowest index entry.  Returns value.
 */
int best_choice(int array[],int len)
{
  int i,j,votes, maxv=0, bestchoice;
  for(i=0; i < len; ++i)  {
    for(votes=1, j=i+1; j < len; ++j) 
      if (array[i] == array[j]) ++votes;
    if (votes > maxv) {
      maxv = votes;
      bestchoice = array[i];
    }
  }
  return(bestchoice);
}

