module

#include "texpt.t"

export lextexp;

-- lextexp : List Char -> List parse_texpLextype
--       convert a string to a list of lexical tokens

local

  idchar c = isalnum c | c = '_' | c = '\''
  
  and
 
  symbchar c = 
	  '!' = c 		--  !
	| '#' < c & c < '(' 	--  $ % & '
	| '*' < c & c < '0'  	--  + , - . /
	| ';' < c & c < 'A' 	--  < = > ? @ 
	| 'Z' < c & c < 'a' 	--  [ \ ] ^ _ ` 
	| '|' = c		--  |
	| '~' = c		--  ~

  -- for simplicity, I assume that the following characters cannot 
  -- occur in symbolic names  " # * : ; { }
  -- Thus, I can handle fewer symbolic names than the LML compiler can.

in

  rec
      lextexp [] = []
  ||  lextexp ('{'.tail) = let (sinfo, '}'.rest) = take (~= '}') tail
		       in  lextexp rest
  ||  lextexp (' '.tail)  = lextexp tail
  ||  lextexp ('\n'.tail) = lextexp tail
  ||  lextexp ('\t'.tail) = lextexp tail
  ||  lextexp ('('.tail) = parse_texpCLPAR . lextexp tail
  ||  lextexp (')'.tail) = parse_texpCRPAR . lextexp tail
  ||  lextexp ('-'.'>'.tail) = parse_texpCARROW . lextexp tail
  ||  lextexp ('#'.tail) = 
	  case take isdigit tail
	  in  [], rest 	      : parse_texpCTIMES . lextexp rest
	  || (ds as _._),rest : parse_texpCCONST ('#'.ds) . lextexp rest
	  end

  ||  lextexp ('*'.tail) = 
	  let i,rest = take idchar tail
	  in parse_texpCVAR i . lextexp rest

  ||  lextexp ('_'.tail) = 
	  let    alphaname, rest_a = take idchar tail
	  in let symbname, rest_s  = take symbchar tail
	     in  if length alphaname > length symbname
	         then parse_texpCCONST ('_'.alphaname) . lextexp rest_a
	         else parse_texpCCONST ('_'.symbname) . lextexp rest_s

  ||  lextexp (id as c._) & (isalpha c) =
	  let i,rest   = take idchar id
	  in parse_texpCCONST i . lextexp rest

  ||  lextexp (id as c._) & (symbchar c) =
	  let i,rest   = take symbchar id
	  in parse_texpCCONST i . lextexp rest

  ||  lextexp (x.tail) = fail ("Illegal character: " @ [x] @ "\n")

end -- local

end -- module

