(*************************************************************************)
(*                                                                       *)
(*                     Projet      Formel                                *)
(*                                                                       *)
(*                            CAML                                       *)
(*                                                                       *)
(*************************************************************************)
(*                                                                       *)
(*                            Inria                                      *)
(*                      Domaine de Voluceau                              *)
(*                      78150  Rocquencourt                              *)
(*                            France                                     *)
(*                                                                       *)
(*************************************************************************)

(* gram         The syntax of grammar definitions                        *)
(*		Michel Mauny						 *)

#standard arith true;;
#fast arith true;;

system module Gram ;;

let alpha = <:Caml:Type<'a>>;;

let parse_warning s =
 warning ("line "^string_of_int !line_counter^" "^s);;

let default_header =
 {String_delimitor="\""; Comment_delimitor="%"; Precedences=[]};;

let default_sc = "\"","%";;

let check_delimitor kind s =
 match explode s with
    [s'] -> s'
  | _ -> raise parse kind^" delimitor must be single character";;

let mk_gheader ((s,c),pl) =
 let s' = check_delimitor "string" s
 and c' = check_delimitor "comment" c in
  if s' = c'
   then raise parse
         "you cannot use same characters as string and comment delimitors"
   else {String_delimitor=s'; Comment_delimitor=c'; Precedences=pl};;

let check_precedences (tok1,toks as tokl) =
 let check = function
     Keyword _ -> ()
   | Non_terminal s ->
       raise parse
        "non terminal "^s^" is not allowed in precedences declarations"
   | Predef_tok s -> ()
   | _ -> raise parse "Illegal token in precedences declarations" in
  check tok1;do_list check toks;tokl;;

let check_box = function
    "hov" -> ()
  | "h" -> ()
  | "v" -> ()
  | "hv" -> ()
  | s -> raise parse s^": illegal box type";;

let eval_macro_expr_Token e =
 match eval_prag_syntax e with
    (dynamic (t:Token)) -> t
  | d ->
      ill_typed_macro
       e d (Gconsttype (#<:Caml:Expr<#(MLconst (mlsystyp"Token"))>>,[]));;

grammar for values Gram = 
delimitor
  string is "\""
  comment is "%"
  ;
precedences
  nonassoc BOOL Literal "it";

rule entry Token = 
    parse ext_token tok -> tok
        
and entry Rule_body = 
    parse cases cl -> prefix :: cl
        
and entry Rules = 
    parse grule_list rl -> rev rl
        
and entry Decl = 
    parse top_decl d; Literal ";;" -> d
        
and top_decl = 
    parse Literal "for"; Literal "values"; Gname name; Literal "=";
          header h; grules rl
          -> MLgrammar (Grammar_values {Name=name; Header=h; Rules=rl})
        | Literal "for"; Literal "programs"; Gname name; Literal "=";
          header h; grules rl
          -> MLgrammar
              (Grammar_programs {Name=name; Header=h; Rules=rl})
          
        | Gname name; Literal "="; header h; grules rl
          -> MLgrammar
              (Grammar_programs {Name=name; Header=h; Rules=rl})
          
        | Literal "for"; Macro e; Gname name; Literal "="; header h;
          grules rl
          -> (let gram =
               match eval_prag_syntax e with
                  (dynamic ("values":string)) -> Grammar_values
                | (dynamic ("programs":string)) -> Grammar_programs
                | (dynamic (s:string)) ->
                    raise parse
                     s^
                     ": illegal grammar type (should be values or programs)"
                | d ->
                    ill_typed_macro
                     e d 
                     (Gconsttype
                      (#<:Caml:Expr<#(MLconst (mlsystyp"string"))>>,[])) in
               MLgrammar (gram {Name=name; Header=h; Rules=rl}))
          
        | Gname name; Literal "=="; Gname old
          -> MLsyn_grammar (name,old)
        
and header = 
    parse Literal "rule" -> default_header
        | -> (parse_warning "forgotten keyword \"rule\", continuing...";
              default_header)
            
        | defined_header h; Literal "rule" -> h
        
and defined_header = 
    parse delim_kwd _; string_or_comment sc; Literal ";";
          Literal "precedences"; prec_list pl; Literal ";"
          -> mk_gheader (sc,rev pl)
        | Literal "precedences"; prec_list pl; Literal ";";
          delim_kwd _; string_or_comment sc; Literal ";"
          -> mk_gheader (sc,rev pl)
        | Literal "precedences"; prec_list pl; Literal ";"
          -> mk_gheader (default_sc,rev pl)
        | delim_kwd _; string_or_comment sc; Literal ";"
          -> mk_gheader (sc,[])
        
and delim_kwd = 
    parse Literal "delimitors" -> ()
        | Literal "delimitor" -> ()
        
and string_or_comment = 
    parse Literal "string"; is_kwd _; STRING s -> (s,"%")
        | Literal "comment"; is_kwd _; STRING c -> ("\"",c)
        | Literal "string"; is_kwd _; STRING s; Literal "comment";
          is_kwd _; STRING c -> (s,c)
        | Literal "comment"; is_kwd _; STRING c; Literal "string";
          is_kwd _; STRING s -> (s,c)
        
and prec_list = 
    parse prec_list hl; Literal ";"; prec_elem he -> he::hl
        | prec_elem he -> [he]
        
and prec_elem = 
    parse Literal "right"; (+ (parse prec_token tok -> tok
                                   )) tokl;
          {check_precedences tokl} _ -> Right_toks tokl
        | Literal "left"; (+ (parse prec_token tok -> tok
                                  )) tokl;
          {check_precedences tokl} _ -> Left_toks tokl
        | Literal "nonassoc"; (+ (parse prec_token tok -> tok
                                      )) tokl;
          {check_precedences tokl} _ -> Nonassoc_toks tokl
        | Literal "precedence"; IDENT v -> Prec_def v
        
and grules = 
    parse grule_list rl -> uncons (rev rl)
        
and is_kwd = 
    parse Literal "is" -> ()
        | -> parse_warning "forgotten keyword \"is\", continuing..."
        
and Gname = 
    parse IDENT name -> name
        | Macro e
          -> (match eval_prag_syntax e with
                 (dynamic (s:string)) -> s
               | d ->
                   ill_typed_macro
                    e d 
                    (Gconsttype
                     (#<:Caml:Expr<#(MLconst (mlsystyp"string"))>>,[])))
          
        
and prec_token = 
    parse predef_token s -> Predef_tok s
        | literal s -> Keyword s
        | IDENT s -> Non_terminal s
        
and grule_list = 
    parse grule_list rl; Literal "and"; grule r -> r::rl
        | grule r -> [r]
        | grule_list rl; Literal "#and"; Caml_Expr0 e
          -> (match eval_prag_syntax e with
                 (dynamic (rl1:Grammar_rule list)) -> rev_append rl1 rl
               | d ->
                   ill_typed_macro
                    e d 
                    (Gconsttype
                     (#<:Caml:Expr<#(MLconst (mlsystyp"list"))>>,
                      [Gconsttype
                       (#<:Caml:Expr<#(MLconst (mlsystyp"Grammar_rule"))>>,
                        [])])))
          
        
and grule = 
    parse kind k; typed_ident v; Literal "="; parsing_rule cs
          -> {Kind=k; Rule_name=Rule_name v; Cases=cs}
        
and kind = 
    parse Literal "entry" -> Entry
        | -> Non_exported
        
and typed_ident = 
    parse IDENT name -> (name,alpha)
        | Literal "("; IDENT name; Literal ":"; Caml_Type t;
          Literal ")" -> (name,t)
        
and parsing_rule = 
    parse parse_kwd _; cases cs -> cs
        
and parse_kwd = 
    parse Literal "parse" -> ()
        | Literal "parser" -> ()
        
and cases = 
    parse case_list cl -> uncons (rev cl)
        
and case_list = 
    parse case_list cl; Literal "|"; case c -> c::cl
        | case_list cl; Macro_case mc -> mc cl
        | case c -> [c]
        
and Macro_case = 
    parse Literal "#|"; Caml_Expr0 e
          -> (match eval_prag_syntax e with
                 (dynamic (cl1:Rule_case list)) -> rev_append cl1
               | d ->
                   ill_typed_macro
                    e d 
                    (Gconsttype
                     (#<:Caml:Expr<#(MLconst (mlsystyp"list"))>>,
                      [Gconsttype
                       (#<:Caml:Expr<#(MLconst (mlsystyp"Rule_case"))>>,
                        [])])))
          
        
and case = 
    parse left_elem_list lml; with_prec wp; rule_action a;
          {let left_mem = match wp with [] -> lml | _::_ -> lml@wp in
            {Left_member=left_mem; Bindings=[]; Action=a}} c
          -> c
        | rule_action a -> {Left_member=[]; Bindings=[]; Action=a}
        
and left_elem_list = 
    parse left_elem l;
          ( * (parse Literal ";"; left_elem l -> l
                   )) lml
          -> l::lml
        
and left_elem = 
    parse dollar_binding b -> Dollar_binding b
        | wild_binding tok -> Token tok
        | Literal "-" -> Space_annot
        | Literal "\\" -> Break_annot (0,0)
        | Literal "\\\\" -> Newline_annot
        | Literal "\\-" -> Break_annot (1,0)
        | Literal "\\"; Literal "("; INT n; Literal ","; INT m;
          Literal ")" -> Break_annot (n,m)
        | Literal "["; pp_offset po; {check_box (fst po)} _;
          left_elem_list ll; Literal "]" -> Box_annot (po,ll)
        | Literal "("; left_elem e; Literal ")" -> e
        
and dollar_binding = 
    parse token tok; {parse_caml_pat0 ()} p -> (p,tok)
        | literal_as_binding b -> b
        
and wild_binding = 
    parse literal s -> Keyword s
        
and ext_token = 
    parse literal s -> Keyword s
        | token tok -> tok
        
and literal_as_binding = 
    parse Literal "Literal"; Literal "("; literal_as_binding b;
          Literal ")" -> b
        | STRING lit; Literal "as"; IDENT name
          -> (MLvarpat name,Keyword lit)
        
and token = 
    parse predef_token s -> Predef_tok s
        | IDENT s -> Non_terminal s
        | Literal "{"; Caml_Expr esc; Literal "}"
          -> Escape_tok {Esc_bindings=[]; Escape_expr=esc}
        | Literal "("; regular r; Literal ")" -> r
        | Macro e -> eval_macro_expr_Token e
        
and literal = 
    parse Literal "Literal"; STRING s -> s
        | STRING s -> s
        
and regular = 
    parse parsing_rule cs -> Regular cs
        | Literal "*"; Literal "("; regular r; Literal ")" -> Star r
        | Literal "+"; Literal "("; regular r; Literal ")"
          -> Concat (r,Star r)
        
and rule_action = 
    parse Literal "accept"; Caml_Expr e -> Exit_action e
        | Literal "->"; Caml_Expr e -> Regular_action e
        
and with_prec = 
    parse -> []
        | Literal "with"; Literal "precedence"; prec_token tok
          -> [With_prec tok]
        
and predef_token = 
    parse Literal ("NUM" as s) -> s
        | Literal ("BOOL" as s) -> s
        | Literal ("IDENT" as s) -> s
        | Literal ("STRING" as s) -> s
        | Literal ("INT" as s) -> s
        | Literal ("INFIX" as s) -> s
        | Literal ("FLOAT" as s) -> s
        | Literal ("CHAR" as s) -> s
        | Literal ("EOF" as s) -> s
        | Literal ("BIGINT" as s) -> s
        | Literal ("RATIO" as s) -> s
        | Literal ("DYN" as s) -> s
        | Literal "Num" -> "NUM"
        | Literal "Bool" -> "BOOL"
        | Literal "Ident" -> "IDENT"
        | Literal "String" -> "STRING"
        | Literal "Int" -> "INT"
        | Literal "Infix" -> "INFIX"
        | Literal "Float" -> "FLOAT"
        | Literal "Char" -> "CHAR"
        | Literal "Eof" -> "EOF"
        | Literal "Bigint" -> "BIGINT"
        | Literal "Ratio" -> "RATIO"
        | Literal "Dyn" -> "DYN"
        
and pp_offset = 
    parse -> ("hov",0)
        | Literal "<"; name_of_box s; offset_box n; Literal ">"
          -> (s,n)
        
and offset_box = 
    parse INT n -> n
        | -> 0
        
and name_of_box = 
    parse IDENT s -> s
        | INFIX s -> s
        
and Caml_Type = 
    parse {parse_caml_typ ()} t -> t
        
and Caml_Expr0 = 
    parse {parse_caml_expr0 ()} e -> e
        
and Caml_Expr = 
    parse {parse_caml_expr ()} e -> e
        
and Macro = 
    parse Literal "#"; Caml_Expr0 e -> e
        
;;

let parse_decl = (Gram "Decl").Parse_raw;;

let parse_grammar_decl () =
 try
  match sys_eval_syntax (parse_decl ()) with
     (dynamic (s:MLsyntax)) -> s
   | _ -> system_error "parse_grammar_decl"
 with reraise ->
        error_message "(while parsing grammar declaration)";
        raise reraise;;

end module with value parse_grammar_decl;;
