;;; -*- Mode: LISP; Package: ID3; Syntax: Common-lisp;                   -*-
;;;
;;; ************************************************************************
;;;
;;; PORTABLE AI LAB - UNI ZH
;;;
;;; ************************************************************************
;;;
;;; Filename:   id3.cl
;;; Short Desc: Main routine for ID3 algorithm
;;; Version:    2.0
;;; Status:     Review
;;; Last Mod:   5.2.91 TW
;;; Author:     Thomas Wehrle
;;;
;;; Copyright (c) 1992 Istituto Dalle Molle (IDSIA), University of
;;; Zurich, Swiss Federal Institute of Technology Lausanne.
;;;
;;; Permission is granted to any individual or institution to use, copy,
;;; modify, and distribute this software, provided that this complete
;;; copyright and permission notice is maintained, intact, in all
;;; copies and supporting documentation.
;;;
;;; IDSIA provides this software "as is" without express or implied
;;; warranty.  
;;;

;;; --------------------------------------------------------------------------
;;; Change History: 
;;; 
;;;	
;;; --------------------------------------------------------------------------

;;; ==========================================================================
;;; DESCRIPTION
;;; ==========================================================================


;;; The basic ID3 algorithm.  ID3 takes as input a set of examples,
;;; represented as lists of attributes and their values.  It computes
;;; a decision tree which has the shortest possible expected search
;;; depth.  This is done by computing the information content
;;; (entropy) of each attribute, and selecting the most informative
;;; attribute to test next.  More details can be found in the Module
;;; Description for Machine Learning accompanying the Portable AI Lab.
;;;
;;; This version was coded from the following article:
;;;    Thompson, B. & Thompson, W. (1986). Finding Rules in Data.
;;;    BYTE, 11, 149 - 158.


;;; ==========================================================================
;;; PACKAGE DECLARATIONS
;;; ==========================================================================


(in-package :id3)

(export '(classify  *accept-clashes* *precision*))


;;; ==========================================================================
;;; GLOBAL VARIABLE DECLARATIONS
;;; ==========================================================================


(defvar *accept-clashes* nil)           ; handle with clashes

(defvar *precision* 'double-float)	; arithmetic precision



;;; ==========================================================================
;;; ENTROPY calculation
;;; ==========================================================================


;;; p computes the conditioned probability that the target-attribute
;;; has value t-value, given that a particular attribute has a value.
;;; This is computed for a particular set of data by actually
;;; computing the frequency with which the target-attribute takes the
;;; value t-value when the attribute has the specified value, divided
;;; by the overall frequency with which the attribute has value.  This
;;; is written as p(Ci|Aj) in Thompson, where t-value is the i'th
;;; value of target-attribute, and value is the j'th value of
;;; attribute.


(defmethod p (target-attribute t-value attribute value (data table))
  (/ (count-occurences-if target-attribute t-value attribute value data)
     (coerce (count-occurences attribute value data) *precision*)))


;;; H is the conditioned entropy of the target-attribute, given that
;;; attribute has a particular value.  It is computed from the
;;; conditional probabilities defined above.

(defmethod H (target-attribute attribute value (data table))
  (- (reduce (function +)
             (mapcar (function 
		      (lambda (t-value)
			(let ((probability 
			       (p target-attribute t-value attribute value data)))
			  (* probability (log2 probability)))))
		     (get-possible-values target-attribute data)))))


;;; The total entropy of an attribute, with respect to a target
;;; attribute.  This is a weighted sum of the conditioned entropies
;;; (H, above), given all possible values of the attribute.  It
;;; represents the expected amount of information left to be
;;; determined after the value for attribute has been specified.  This
;;; value is minimum for the best attribute to be tested next.

(defmethod entropy (target-attribute attribute (data table))
  (reduce (function +)
          (mapcar (function 
		   (lambda (value)
		     (let ((HH (H target-attribute attribute value data)))
		       (* HH (/ (count-occurences attribute value data)
				(number-of-rows data))))))
                  (get-possible-values attribute data))))


;;; The best attribute to select next is the one with the lowest
;;; entropy.  The entropy of each attributes in the table is computed,
;;; and the lowest one selected.

(defmethod best-attribute (target-attribute (data table))
  (format-display *verbose* "~%===========================================================")
  (let ((*readable* t))
    (format-display *verbose* "~%Try to find best splitting attribute for ~a in ~a" 
		    target-attribute data))

  (let* ((entropies (mapcar 
		     (function 
		      (lambda (attribute)
			(cons attribute (entropy target-attribute attribute data))))
		     (remove target-attribute (attributes data))))
	 (minimum (do ((pair-list entropies (cdr pair-list))
		       (entr most-positive-fixnum)
		       (pair nil))
		      ((null pair-list) pair)
		    (when (< (cdr (car pair-list)) entr)
		      (setf entr (cdr (car pair-list))
			    pair (car pair-list))))))

    (format-display *verbose* "~%   Entropies  : ~a" entropies)
    (format-display *verbose* "~%   Choosing   : ~a~%~%" 
		    (if (null minimum)
			"<unable to get entropy>"
		      (car minimum)))

    (cond ((null minimum) nil)
	  (t (car minimum)))))



;;; ==========================================================================
;;; DECISION TREE CONSTRUCTION
;;; ==========================================================================


;;; Determines if in the data table, the target attribute has exactly
;;; one value.  This is used to detect successful bottom of recursion
;;; in classify.

(defmethod one-value-p (target-attribute (data table))
  (= (length (get-possible-values target-attribute data)) 1))


;;; Constructs a decision tree for the target-attribute from a data
;;; table.  

(defmethod classify (target-attribute (data table))
  (if (one-value-p target-attribute data) ; if the target attribute
					; has only one value, then
					; the decision tree is
					; trivial. 
      (progn
	(let ((*readable* t))
	  (format-display *verbose* " <no further splitting>~%~a~%" data))
	(make-instance 'decision-tree
	  :content target-attribute 
	  :descendants 
	  (list (make-instance 'decision-tree
		  :content (get-nth-value target-attribute 0 data)))))
					; otherwise, select the best
					; splitting attribute.
    (let ((split-attribute (best-attribute target-attribute data)))
      (if (null split-attribute)
	  (if *accept-clashes* 
	      (make-instance 'decision-tree
		:content target-attribute
		:descendants
		(list 
		 (make-instance 'decision-tree
		   :content (get-possible-values target-attribute data))))
	    (progn
	      (format-display *verbose* "~%Too few attributes or bad data. Cannot find rule~%~%")
	      (make-instance 'decision-tree
		:content 'classification
		:descendants 
		(list (make-instance 'decision-tree
			:content "*impossible*")))))
	(make-instance 'decision-tree	; construct a tree with the
					; chosen splitting attribute
					; at its root, with its
					; descendants given by a
					; recursive call to id3 on the
					; subtables with the splitting
					; attribute taking each
					; possible value. 
	  :content split-attribute 
	  :descendants 
	  (mapcar
	   (function 
	    (lambda (value)
	      (format-display *verbose* "~%For ~a = ~a:" split-attribute value)
	      (make-instance 'decision-tree
		:content value
		:descendants 
		(list (classify 
		       target-attribute 
		       (get-subtable split-attribute value data))))))
	   (get-possible-values split-attribute data)))))))


;;; ========================================================================
;;; END OF FILE
;;; ========================================================================
