;;; installed-scm-file

;;;; 	Copyright (C) 1996 Free Software Foundation, Inc.
;;;; 
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 2, or (at your option)
;;;; any later version.
;;;; 
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;;; GNU General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU General Public License
;;;; along with this software; see the file COPYING.  If not, write to
;;;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
;;;; 




(define-module #/lang/lr0
  :use-module #/lang/grammar
  :use-module #/ice-9/common-list
  :use-module #/ice-9/hcons)

;;; {LR(0) DFA Construction}
;;;
;;; A grammar (see grammar.scm) implies a set of "items" which can be interpreted as
;;; states in an NFA.  An item corresponds to a choice of one
;;; production from the grammar, and a position within that production.
;;;
;;; For example, if a production is:
;;;
;;;  A -> B C D			aka 	(a (b c d) . <reduction rule>)
;;;
;;; Then the items are:
;;;
;;;  A -> * B C D		aka 	((a (b c d) . <reduction rule>) b c d)
;;;  A -> B * C D		aka 	((a (b c d) . <reduction rule>) c d)
;;;  A -> B C * D		aka 	((a (b c d) . <reduction rule>) d)
;;;  A -> B C D	*		aka 	((a (b c d) . <reduction rule>))
;;;
;;; An empty production has exactly one item:
;;;
;;;  A -> *			aka 	((a () . <reduction rule>))
;;;
;;; If an item has nothing to the left of the star, it is called an "initial item".
;;; If an item has nothing to the right of the star, it is called an "final item".
;;;
;;; An item says what we think we are parsing (an A, in the above examples)
;;; and what we expect the next bit of input to be (for example, in
;;; the item A -> * B C D, we next expect to parse a B).
;;;
;;; These items are states in an NFA, the LR(0) NFA, with edges defined
;;; between them.
;;;
;;; The first item in the grammar is conventionally the start state, although
;;; any initial item can be used for this purpose.
;;;
;;; Final states are items with no more expansion to the right of the *.
;;; A final state implies that an entire expansion has been seen and now
;;; a reduction of that expansion is possible.   Final states have no 
;;; outward edges.
;;;
;;; An NFA state (aka an item) has two kinds of edges corresponding to whether 
;;; the symbol to the right of the star is a terminal or non-terminal.
;;;
;;; Terminal edges indicate that a particular type of input token is expected.
;;; If the lookahead is of that type, it can be shifted onto the parse stack
;;; whie crossing the terminal edge to a new state.
;;; 
;;; Non-terminal edges indicate a point where a nested construct should be parsed.
;;; If in state I the symbol following the star is non-terminal B, then there is an epsilon
;;; edge to all initial items associated with productions of B.   In case one of
;;; those epsilon edges leads to a correct parse, there is also an edge labeled
;;; B that can be used to shift the B generated by a reduction.
;;;
;;; For example, in the state:
;;;
;;;	A -> w * B u
;;;
;;; it is legal to shift the non-terminal B (this is sometimes called a "goto" move). 
;;; It is also legal to make an epsilon transition to states:
;;;
;;; 	B -> * w
;;;     B -> * u
;;;     ...
;;;
;;; presuming the corresponding productions exist.  Informally: you're
;;; allowed to absorb the next burst of input as a reduction to B,
;;; therefore, you are also allowed at this point to begin working on
;;; a parse of B.
;;;
;;; The rule applies transitively so that if A -> w * B u epsilon
;;; transitions to B -> * C v, that in turn epsilon transitions
;;; to C -> * z.
;;;
;;; To summarize:
;;;
;;; The lr(0) NFA has final states which specify reductions,
;;; terminal transitions, which specify permissible input,
;;; non-terminal transitions, which specify permissible feedback from
;;; the parsing of nested constructs, and epsilon edges, which specify
;;; points at which nested constructs may begin.  
;;;
;;;
;;; The above describes all the states, designates the start and final
;;; states and defines the transition function of a non-deterministic
;;; automata.  The epsilon edges associated with non-terminal transitions
;;; prevent the automata from being deterministic.
;;;
;;; But the lr(1) parser requires a deterministic lr(0) automata
;;; So the code in this file not only must construct the lr(0) NFA,
;;; but it must convert that NFA into a DFA.
;;;
;;; To parse deterministically, we apply a few stern measures.
;;; First, we combine all states accessible by epsilon transitions
;;; into superstates.   Second, when components of a superstate disagree
;;; about whether to shift some input or reduce because of it, we favor
;;; shifting.   Third, we only permit reductions if we can guarantee
;;; that after a series of reductions, the look-ahead token will be 
;;; shifted.   Fourth, if the components of a superstate disagree 
;;; about which reduction rule to apply, we pick whichever one we
;;; discover first (clearly this could be improved). Fifth and finally,
;;; if only some components of a superstate think the look-ahead 
;;; token is an error, we ignore them (if all components agree the look-ahead
;;; is an error, then so do we).
;;;
;;; Its useful to prove that a particular grammar has the property that
;;; components of an achievable superstate will never disagree about
;;; what reduction rule to apply.  This can be proven automatically.
;;; One convenient technique for lalr(1) grammars is to convert them 
;;; to yacc syntax and run a table generator on them.  Some day, grammar
;;; debugging tools will be added to this library.
;;;
;;;
;;; In the usual way, we can construct a DFA from an LR(0) NFA, combining
;;; multiple NFA states accessible along epsilon paths into singular DFA
;;; superstates.
;;;
;;; item-set-start-kernel g -> item-set
;;;	return the kernel of the starting superstate.
;;;
;;;	Only the initial items of start-symbol productions
;;;	are included in this set.   Compose with item-set-closure 
;;;	to also obtain all items reachable by epsilon transitions.
;;;
;;;
;;; item-set-successor-kernel g is s -> item-set|#f
;;;	return is', is after shifting s.  
;;;
;;;	Only the items which are direct successors of items
;;;	in IS are present in the successor kernel.  To also
;;;	include all items reachable by epsilon transitions,
;;;	is item-set-closure.
;;;
;;;
;;; item-set-closure g is
;;;	Return the epsilon transition closure of is.
;;;
;;; 	In the LR(0) NFA, if we are at the state:
;;;
;;;		A -> w * B v
;;;
;;; 	Then we are also an epsilon transition (not to be confused
;;; 	with an empty reduction) away from also being in the items:
;;;
;;;		B -> * u
;;;		B -> * r
;;;		B -> * s
;;;		...
;;;
;;; 	So if a superstate contains A->w*Bv, then it should also contain
;;; 	the inital states of B.  This rule applies transitively.
;;; 
;;;	If IS was returned by item-set-start-kernel. 
;;;	then this procedure returns the DFA start state.
;;;	If IS was returned by item-set-successor-kernel, and
;;;	the set argument to item-set-successor-kernel was a
;;;	DFA state, then this procedure returns a DFA state.
;;;
;;;
;;; item-set-reductions g is
;;;	Return the set of all productions that can be 
;;;	reduced from is, disregarding the context and
;;;	look-ahead.
;;;


(define-public item-set-start-kernel
  (lambda (g)
    (map (lambda (production)
	   (make-item g production (production-expansion production)))
	 (symbol-productions g (grammar-start-symbol g)))))


(define-public (item-set-successor-kernel g is symbol)
  (pick-item-set-mappings g
			  (lambda (item)
			    (let ((pos (item-position g item)))
			      (and pos
				   (eq? symbol (car pos))
				   (make-item g
					      (item-production g item)
					      (cdr pos)))))
			  is))

(define-public item-set-reductions
  (lambda (g is)
    (pick-item-set-mappings g
			    (lambda (item)
			      (let ((pos (item-position g item)))
				(and (not pos)
				     (item-production g item))))
			    is)))

(define-public item-set-closure
  (lambda (g is)
    (apply item-set-union g (item-set-map g (lambda (i) (item-closure g i)) is))))

(define item-closure
  (let (;; A memo of closures already computed.
	;;
	(memo (make-weak-key-hash-table 1031))
	
	;; symbol-item-set-kernel g s
	;;   Return all the initial items for non-terminal S.
	;;
	(symbol-item-set-kernel-members (lambda (g s)
					  (map (lambda (production)
						 (make-item g production (production-expansion production)))
					       (symbol-productions g s)))))


    ;; This is the digraph algorithm from "Efficient Construction of LALR(1) Lookahead Sets" 
    ;; by F. DeRemer  and T. Pennello, in ACM TOPLS Vol 4 No 4, October 1982. 
    ;; They credit J. Eve and R. Kurki Suonio, "On Computing the transitive closure
    ;; of a relation."  Acta Inf. 8 1977.
    ;;
    ;; It is used here to compute epsilon closures of individual items.
    ;;

    (lambda (g it)
      (cond
       ((hashq-ref memo it) => id)

       (else (let traverse ((x it)
			    (approximations '()))
	       (let* ((depth (if approximations
				 (+ 1 (cadar approximations))
				 1))
		      (next-symbol (and (item-position g x) (car (item-position g x))))
		      (next-states (symbol-item-set-kernel-members g next-symbol))
		      (approximate-x-closure (apply item-set g x next-states))
		      (new-approximations (acons x (cons depth approximate-x-closure) approximations)))

		 (let next-loop ((pos next-states)
				 (current-approx new-approximations))
		      (if pos
			  (let* ((y (car pos))
				 (x-approx (and=> (assq x current-approx) cdr))
				 (y-approx (or (and=> (assq y current-approx) cdr)
					       (and=> (hashq-ref memo y) (lambda (v) (cons (car x-approx) v)))))
				 (better-approx (if y-approx
						    current-approx
						    (traverse y current-approx)))
				 (better-y-approx (or y-approx
						      (and=> (assq y better-approx) cdr)
						      (and=> (hashq-ref memo y) (lambda (v) (cons (car x-approx) v)))))
				 (improved-x-approx (cons (min (car x-approx) (car better-y-approx))
							  (append (cdr x-approx) (cdr better-y-approx)))))
			    ;; We could get by without the side effect here, in case
			    ;; anybody is counting.
			    ;;
			    (set-cdr! (assq x better-approx) improved-x-approx)
			    (next-loop (cdr pos) better-approx))

			  (if (not (= depth (cadr (assq x current-approx))))
			      current-approx
			      (let ((actual-x-value (apply item-set g (cddr (assq x current-approx)))))
				(let unify-component-loop ((stack current-approx))
				  (and stack
				       (begin
					 (hashq-set! memo (caar stack) actual-x-value)
					 (if (not (eq? x (caar stack)))
					     (unify-component-loop (cdr stack))
					     (cdr stack)))))))))))
	     (hashq-ref memo it))))))
				 



;;; {Item and Item Set Operations}
;;;
;;; Items and items sets are an abstract data type whose
;;; representation is hidden from the definitions of the five basic
;;; superstate functions, item-set-successor, item-set-start-kernel,
;;; item-set-reductions, item-set-closure, and item-set-nullable?.
;;;
;;; The abstract data type is encapsulated in the definitions
;;; listed below:
;;;
;;;
;;; make-item g production right-hand-expansion
;;;	Return an item for production with the * before RIGHT-HAND-EXPANSION.
;;;	RIGHT-HAND-EXPANSION must be eq? to some tail of the expansion
;;;	part of PRODUCTION.
;;;
;;; item-production g item
;;;	Return the production corresponding to ITEM
;;;
;;; item-position g item
;;;	Return the right-hand-expansion corresponding to ITEM.
;;;
;;; item-set g . items
;;;	Construct an item set from the item arguments.
;;;
;;; item-set-map g f is
;;;	Call f repeatedly, passing each item in IS, returning a list of results.
;;;
;;; pick-item-set-mappings g f is
;;;	Call f repeatedly, passing each item of IS, returning a list of the non-#f results.
;;;
;;; item-set-union g isa isb isc ...
;;; item-set-empty? g is
;;;	Standard set operations on item sets.
;;;
;;;


;;; {Item Functions}
;;;

;; grammar-item-table g
;;   Return the item table for G, making it if necessary.
;; 
;; Every grammar is given an item table: a vector of all items
;; associated with the grammar.  An item's position in this table
;; is called the item's "index".
;;
;; The extent of the table is tied to the extent of G, and the extent
;; of items to the table.  For a given production, and position,
;; make-item (see below) will always return eq? items from the item-table
;; of G.
;;
;;

(define-public (grammar-item-table g)
  (define (production-items g p)
    (let loop ((pos (production-expansion p))
	       (so-far '()))
      (if (null? pos)
	  (cons (make-item g p '()) so-far)
	  (loop (cdr pos) (cons (make-item g p pos) so-far)))))

  (define (precompute-item-indexes g)
    (let* ((table (grammar-item-table g))
	   (bound (vector-length table)))
      (let loop ((n 0))
	(if (< n bound)
	    (begin
	      (assert-item-index g (vector-ref table n) n)
	      (loop (+ 1 n)))))))

  (define (assert-item-index g i n) (set-object-property! i 'item-index n))

  (or (hashq-ref (grammar-cache g) 'grammar-item-table)
      (let ((table (hashq-set! (grammar-cache g) 'grammar-item-table
			       (list->vector (apply append! (map (lambda (p) (production-items g p)) g))))))
	(precompute-item-indexes g)
	table)))


;; item-index g i
;;
(define (item-index g i)
  (or (object-property i 'item-index)
      (begin
	(grammar-item-table g)
	(object-property i 'item-index))))

;; make-item g production position
;;   Construct and return the indicated item.
;;
;;   Note that POSITION must be eq? to some tail of the expansion
;;   of PRODUCTION and that PRODUCTION must not share any cons pairs
;;   with any other grammar production.
;;
(define (make-item g p pos)
  (hashq-cons (grammar-cache g) p pos))
  
(define (item-production g x) (car x))
(define (item-position g x) (cdr x))

;; item-<? g a b
;;    Compute an ordering of items, based on item indexes.
;;
(define (item-<? g itema itemb) (< (item-index g itema) (item-index g itemb)))




;;; {Item Set Functions}
;;;
;;; Item sets are represented as lists of items.   Sets
;;; constructed by these functions have the property that
;;; equal? sets are eq?.
;;;


(define (item-set-empty? g is) (null? is))
(define (item-set-map g f is) (map f is))
(define (pick-item-set-mappings g f is) (pick-mappings f is))

;; item-set g . elts
;;   Build an item set from its members.
;;
;;   This implementation effectively builds a "bit vector" and then
;;   reads the set off from that.
;;
(define (item-set g . elts)
  (index-list->item-set g (item-set->index-list g elts)))

(define (item-set-union g . sets) (apply item-set g (apply append sets)))

(define (item-set->index-list g il)
  (map (lambda (i) (item-index g i)) il))

;; index-list->item-set g il
;; Compute an item set (as a list of items) from a list of item indexes.
;;
;; For equal? index lists and eq? g, eq? sets will be returned.
;;
(define (index-list->item-set g il)
  (let* ((table (grammar-item-table g))
	 (cache (grammar-cache g))
	 (flag-table (make-string (vector-length table) #\000)))
    (for-each (lambda (n) (string-set! flag-table n #\001)) il)
    (let loop ((s flag-table)
	       (base 0)
	       (set '()))
      (let ((n (string-index s #\001 base)))
	(if (not n)
	    set
	    (loop s (+ n 1) (hashq-cons cache (vector-ref table n) set)))))))



