%{
(* START HEADER *)

open AST

(* END HEADER *)
%}


(* Entry points *)

%start program
%type <AST.t> program

%%

(* RULES *)

(* This parser leverages Menhir-specific features, in particular
   parametric rules, rule inlining and primitives to get the source
   locations of tokens from the lexer engine generated by ocamllex.

     We define below two rules, [reg] and [oreg]. The former parses
   its argument and returns its synthesised value together with its
   region in the source code (that is, start and end positions --- see
   module [Region]). The latter discards the value and only returns
   the region: this is mostly useful for parsing keywords, because
   those can be easily deduced from the AST node and only their source
   region has to be recorded there.
*)

%inline reg(X):
  X { let start  = Pos.from_byte $symbolstartpos
      and stop   = Pos.from_byte $endpos in
      let region = Region.make ~start ~stop
      in Region.{region; value=$1} }

%inline oreg(X):
  reg(X) { $1.Region.region }

(* Keywords, symbols, literals and virtual tokens *)

kwd(X) : oreg(X)     { $1 }
sym(X) : oreg(X)     { $1 }
ident  : reg(Ident)  { $1 }
constr : reg(Constr) { $1 }
string : reg(Str)    { $1 }
eof    : oreg(EOF)   { $1 }

(* The rule [sep_or_term(item,sep)] ("separated or terminated list")
   parses a non-empty list of items separated by [sep], and optionally
   terminated by [sep]. *)

sep_or_term_list(item,sep):
  nsepseq(item,sep) {
    $1, None
  }
| nseq(item sep {$1,$2}) {
    let (first,sep), tail = $1 in
    let rec trans (seq, prev_sep as acc) = function
      [] -> acc
    | (item,next_sep)::others ->
        trans ((prev_sep,item)::seq, next_sep) others in
    let list, term = trans ([],sep) tail
    in (first, List.rev list), Some term }

(* Compound constructs *)

par(X): sym(LPAR) X sym(RPAR) { {lpar=$1; inside=$2; rpar=$3} }

brackets(X): sym(LBRACKET) X sym(RBRACKET) {
  {lbracket=$1; inside=$2; rbracket=$3} }

(* Sequences

   Series of instances of the same syntactical category have often to
   be parsed, like lists of expressions, patterns etc. The simplest of
   all is the possibly empty sequence (series), parsed below by
   [seq]. The non-empty sequence is parsed by [nseq]. Note that the
   latter returns a pair made of the first parsed item (the parameter
   [X]) and the rest of the sequence (possibly empty). This way, the
   OCaml typechecker can keep track of this information along the
   static control-flow graph. The rule [sepseq] parses possibly empty
   sequences of items separated by some token (e.g., a comma), and
   rule [nsepseq] is for non-empty such sequences. See module [Utils]
   for the types corresponding to the semantic actions of those
   rules.
*)

(* Possibly empty sequence of items *)

seq(item):
  (**)           {     [] }
| item seq(item) { $1::$2 }

(* Non-empty sequence of items *)

nseq(item):
  item seq(item) { $1,$2 }

(* Non-empty separated sequence of items *)

nsepseq(item,sep):
  item                       {                        $1, [] }
| item sep nsepseq(item,sep) { let h,t = $3 in $1, ($2,h)::t }

(* Possibly empy separated sequence of items *)

sepseq(item,sep):
  (**)              {    None }
| nsepseq(item,sep) { Some $1 }

(* Helpers *)

type_name   : ident  { $1 }
field_name  : ident  { $1 }
module_name : constr { $1 }
struct_name : Ident { $1 }

(* Non-empty comma-separated values (at least two values) *)

tuple(item):
  item sym(COMMA) nsepseq(item,sym(COMMA)) { let h,t = $3 in $1,($2,h)::t }

(* Possibly empty semicolon-separated values between brackets *)

list_of(item):
  sym(LBRACKET) sepseq(item,sym(SEMI)) sym(RBRACKET) {
   {opening    = LBracket $1;
    elements   = $2;
    terminator = None;
    closing    = RBracket $3} }

(* Main *)

program:
  nseq(declaration) eof                           { {decl=$1; eof=$2} }

declaration:
  reg(kwd(Let)          let_bindings     {$1,$2})       {      Let $1 }
| reg(kwd(LetEntry)     let_binding      {$1,$2})       { LetEntry $1 }
| reg(type_decl)                                        { TypeDecl $1 }

(* Type declarations *)

type_decl:
  kwd(Type) type_name sym(EQ) type_expr {
    {kwd_type=$1; name=$2; eq=$3; type_expr=$4} }

type_expr:
  cartesian                                              {   TProd $1 }
| reg(sum_type)                                          {    TSum $1 }
| reg(record_type)                                       { TRecord $1 }

cartesian:
  reg(nsepseq(fun_type, sym(TIMES)))                             { $1 }

fun_type:
  core_type                                                 {      $1 }
| reg(arrow_type)                                           { TFun $1 }

arrow_type:
  core_type sym(ARROW) fun_type                            { $1,$2,$3 }

core_type:
  reg(path) {
    let {module_proj; value_proj} = $1.value in
    let selection_to_string = function
      Name ident -> ident.value
    | Component {value={inside;_}; _} ->
        fst inside.value in
    let module_str =
      match module_proj with
        None -> ""
      | Some (constr,_) -> constr.value ^ "." in
    let value_str =
      Utils.nsepseq_to_list value_proj
      |> List.map selection_to_string
      |> String.concat "." in
    let alias = module_str ^ value_str
    in TAlias {$1 with value=alias}
  }
| reg(core_type type_constr {$1,$2}) {
    let arg, constr = $1.value in
    let lpar, rpar = Region.ghost, Region.ghost in
    let arg = {lpar; inside=arg,[]; rpar} in
    TApp Region.{$1 with value = constr, arg}
  }
| reg(type_tuple type_constr {$1,$2}) {
    let arg, constr = $1.value in
    TApp Region.{$1 with value = constr, arg}
  }
| reg(par(cartesian)) {
    let Region.{region; value={lpar; inside=prod; rpar}} = $1 in
    TPar Region.{region; value={lpar; inside = TProd prod; rpar}} }

type_constr:
  type_name { $1 }
| kwd(Set)  { Region.{value="set";  region=$1} }
| kwd(Map)  { Region.{value="map";  region=$1} }
| kwd(List) { Region.{value="list"; region=$1} }

type_tuple:
  par(tuple(type_expr)) { $1 }

sum_type:
  ioption(sym(VBAR)) nsepseq(reg(variant), sym(VBAR)) { $2 }

variant:
  constr kwd(Of) cartesian { {constr=$1; args = Some ($2,$3)} }
| constr                   { {constr=$1; args=None} }

record_type:
  sym(LBRACE) sep_or_term_list(reg(field_decl),sym(SEMI))
  sym(RBRACE) {
    let elements, terminator = $2 in {
      opening = LBrace $1;
      elements = Some elements;
      terminator;
      closing = RBrace $3} }

field_decl:
  field_name sym(COLON) type_expr {
    {field_name=$1; colon=$2; field_type=$3} }

(* Non-recursive definitions *)

let_bindings:
  nsepseq(let_binding, kwd(And)) { $1 }

let_binding:
  ident nseq(sub_irrefutable) option(type_annotation) sym(EQ) expr {
    let let_rhs = EFun (norm $2 $4 $5) in
    {pattern = PVar $1; lhs_type=$3; eq = Region.ghost; let_rhs}
  }
| irrefutable option(type_annotation) sym(EQ) expr {
    {pattern=$1; lhs_type=$2; eq=$3; let_rhs=$4} }

type_annotation:
  sym(COLON) type_expr { $1,$2 }

(* Patterns *)

irrefutable:
  reg(tuple(sub_irrefutable))                            {  PTuple $1 }
| sub_irrefutable                                        {         $1 }

sub_irrefutable:
  ident                                                  {    PVar $1 }
| sym(WILD)                                              {   PWild $1 }
| unit                                                   {   PUnit $1 }
| reg(par(closed_irrefutable))                           {    PPar $1 }

closed_irrefutable:
  reg(tuple(sub_irrefutable))                           {   PTuple $1 }
| sub_irrefutable                                       {          $1 }
| reg(constr_pattern)                                   {  PConstr $1 }
| reg(typed_pattern)                                    {   PTyped $1 }

typed_pattern:
  irrefutable sym(COLON) type_expr {
    {pattern=$1; colon=$2; type_expr=$3} }

pattern:
  reg(sub_pattern sym(CONS) tail {$1,$2,$3})       {   PList (PCons $1) }
| reg(tuple(sub_pattern))                                {  PTuple $1 }
| core_pattern                                           {         $1 }

sub_pattern:
  reg(par(tail))                                         {    PPar $1 }
| core_pattern                                           {         $1 }

core_pattern:
  ident                                                  {    PVar $1 }
| sym(WILD)                                              {   PWild $1 }
| unit                                                   {   PUnit $1 }
| reg(Int)                                               {    PInt $1 }
| kwd(True)                                              {   PTrue $1 }
| kwd(False)                                             {  PFalse $1 }
| string                                                 { PString $1 }
| reg(par(ptuple))                                       {    PPar $1 }
| reg(list_of(tail))                               { PList (Sugar $1) }
| reg(constr_pattern)                                    { PConstr $1 }
| reg(record_pattern)                                    { PRecord $1 }

record_pattern:
  sym(LBRACE)
    sep_or_term_list(reg(field_pattern),sym(SEMI))
  sym(RBRACE) {
    let elements, terminator = $2 in
    {opening = LBrace $1;
     elements = Some elements;
     terminator;
     closing = RBrace $3} }

field_pattern:
  field_name sym(EQ) sub_pattern {
    {field_name=$1; eq=$2; pattern=$3} }

constr_pattern:
  constr sub_pattern                                   {  $1, Some $2 }
| constr                                               {  $1, None    }

ptuple:
  reg(tuple(tail))                                       {  PTuple $1 }

unit:
  reg(sym(LPAR) sym(RPAR) {$1,$2})                       {         $1 }

tail:
  reg(sub_pattern sym(CONS) tail {$1,$2,$3})         { PList (PCons $1) }
| sub_pattern                                        {             $1 }

(* Expressions *)

expr:
  base_cond__open(expr)                                   {        $1 }
| match_expr(base_cond)                                   { EMatch $1 }

base_cond__open(x):
  base_expr(x)
| conditional(x)                                                 { $1 }

base_cond:
  base_cond__open(base_cond)                                     { $1 }

base_expr(right_expr):
  let_expr(right_expr)
| fun_expr(right_expr)
| disj_expr_level                                         {        $1 }
| reg(tuple(disj_expr_level))                             { ETuple $1 }

conditional(right_expr):
  if_then_else(right_expr)
| if_then(right_expr)                                    {   ECond $1 }

if_then(right_expr):
  reg(kwd(If) expr kwd(Then) right_expr {$1,$2,$3,$4})   {  IfThen $1 }

if_then_else(right_expr):
  reg(kwd(If) expr kwd(Then) closed_if kwd(Else) right_expr {
    $1,$2,$3,$4,$5,$6 })                              { IfThenElse $1 }

base_if_then_else__open(x):
  base_expr(x)                                             {       $1 }
| if_then_else(x)                                          { ECond $1 }

base_if_then_else:
  base_if_then_else__open(base_if_then_else)               {       $1 }

closed_if:
  base_if_then_else__open(closed_if)                       {       $1 }
| match_expr(base_if_then_else)                            { EMatch $1 }

match_expr(right_expr):
  reg(kwd(Match) expr kwd(With)
        option(sym(VBAR)) cases(right_expr) {
          $1,$2,$3, ($4, Utils.nsepseq_rev $5) })
| reg(match_nat(right_expr))                                     { $1 }

match_nat(right_expr):
  kwd(MatchNat) expr kwd(With)
    option(sym(VBAR)) cases(right_expr) {
    let open Region in
    let cast_name = Name {region=ghost; value="assert_pos"} in
    let cast_path = {module_proj=None; value_proj=cast_name,[]} in
    let cast_fun  = Path {region=ghost; value=cast_path} in
    let cast      = ECall {region=ghost; value=cast_fun,$2}
    in $1, cast, $3, ($4, Utils.nsepseq_rev $5) }

cases(right_expr):
  case(right_expr)                                           { $1, [] }
| cases(base_cond) sym(VBAR) case(right_expr) {
    let h,t = $1 in $3, ($2,h)::t }

case(right_expr):
  pattern sym(ARROW) right_expr                            { $1,$2,$3 }

let_expr(right_expr):
  reg(kwd(Let) let_bindings kwd(In) right_expr {$1,$2,$3,$4}) {
    ELetIn $1 }

fun_expr(right_expr):
  reg(kwd(Fun) nseq(irrefutable) sym(ARROW) right_expr {$1,$2,$3,$4}) {
    let Region.{region; value = kwd_fun, patterns, arrow, expr} = $1
    in EFun (norm ~reg:(region, kwd_fun) patterns arrow expr) }

disj_expr_level:
  reg(disj_expr)                          { ELogic (BoolExpr (Or $1)) }
| conj_expr_level                                                { $1 }

bin_op(arg1,op,arg2):
  arg1 op arg2                            { {arg1=$1; op=$2; arg2=$3} }

un_op(op,arg):
  op arg                                            { {op=$1; arg=$2} }

disj_expr:
  bin_op(disj_expr_level, sym(BOOL_OR), conj_expr_level)
| bin_op(disj_expr_level, kwd(Or),      conj_expr_level)         { $1 }

conj_expr_level:
  reg(conj_expr)                         { ELogic (BoolExpr (And $1)) }
| comp_expr_level                        {                         $1 }

conj_expr:
  bin_op(conj_expr_level, sym(BOOL_AND), comp_expr_level)        { $1 }

comp_expr_level:
  reg(lt_expr)                         { ELogic (CompExpr (Lt $1))    }
| reg(le_expr)                         { ELogic (CompExpr (Leq $1))   }
| reg(gt_expr)                         { ELogic (CompExpr (Gt $1))    }
| reg(ge_expr)                         { ELogic (CompExpr (Geq $1))   }
| reg(eq_expr)                         { ELogic (CompExpr (Equal $1)) }
| reg(ne_expr)                         { ELogic (CompExpr (Neq $1))   }
| cat_expr_level                       {                           $1 }

lt_expr:
  bin_op(comp_expr_level, sym(LT), cat_expr_level)  { $1 }

le_expr:
  bin_op(comp_expr_level, sym(LE), cat_expr_level)  { $1 }

gt_expr:
  bin_op(comp_expr_level, sym(GT), cat_expr_level)  { $1 }

ge_expr:
  bin_op(comp_expr_level, sym(GE), cat_expr_level)  { $1 }

eq_expr:
  bin_op(comp_expr_level, sym(EQ), cat_expr_level)  { $1 }

ne_expr:
  bin_op(comp_expr_level, sym(NE), cat_expr_level) { $1 }

cat_expr_level:
  reg(cat_expr)                                   {  EString (Cat $1) }
| reg(append_expr)                                { EList (Append $1) }
| cons_expr_level                                 {                $1 }

cat_expr:
  bin_op(cons_expr_level, sym(CAT), cat_expr_level)              { $1 }

append_expr:
  cons_expr_level sym(APPEND) cat_expr_level               { $1,$2,$3 }

cons_expr_level:
  reg(cons_expr)                                    { EList (Cons $1) }
| add_expr_level                                    {              $1 }

cons_expr:
  bin_op(add_expr_level, sym(CONS), cons_expr_level)             { $1 }

add_expr_level:
  reg(plus_expr)                                    { EArith (Add $1) }
| reg(minus_expr)                                   { EArith (Sub $1) }
| mult_expr_level                                   {              $1 }

plus_expr:
  bin_op(add_expr_level, sym(PLUS), mult_expr_level) { $1 }

minus_expr:
  bin_op(add_expr_level, sym(MINUS), mult_expr_level) { $1 }

mult_expr_level:
  reg(times_expr)                                 {  EArith (Mult $1) }
| reg(div_expr)                                   {   EArith (Div $1) }
| reg(mod_expr)                                   {   EArith (Mod $1) }
| unary_expr_level                                {                $1 }

times_expr:
  bin_op(mult_expr_level, sym(TIMES), unary_expr_level)          { $1 }

div_expr:
  bin_op(mult_expr_level, sym(SLASH), unary_expr_level)          { $1 }

mod_expr:
  bin_op(mult_expr_level, kwd(Mod), unary_expr_level)            { $1 }

unary_expr_level:
  reg(uminus_expr)                       {            EArith (Neg $1) }
| reg(not_expr)                          { ELogic (BoolExpr (Not $1)) }
| call_expr_level                        {                         $1 }

uminus_expr:
  un_op(sym(MINUS), core_expr)                                   { $1 }

not_expr:
  un_op(kwd(Not), core_expr)                                     { $1 }

call_expr_level:
  reg(call_expr)                                           {  ECall $1 }
| core_expr                                                {       $1 }

call_expr:
  call_expr_level core_expr                                   { $1,$2 }

core_expr:
  reg(Int)                                          { EArith (Int $1) }
| reg(Mtz)                                          { EArith (Mtz $1) }
| reg(Nat)                                          { EArith (Nat $1) }
| reg(path)                                               {   Path $1 }
| string                                        { EString (String $1) }
| unit                                                    {  EUnit $1 }
| kwd(False)                          {  ELogic (BoolExpr (False $1)) }
| kwd(True)                           {  ELogic (BoolExpr ( True $1)) }
| reg(list_of(expr))                                { EList (List $1) }
| reg(par(expr))                                         {    EPar $1 }
| constr                                                 { EConstr $1 }
| reg(sequence)                                          {    ESeq $1 }
| reg(record_expr)                                       { ERecord $1 }

path:
  reg(struct_name) sym(DOT) nsepseq(selection,sym(DOT)) {
    let head, tail = $3 in
    let seq = Name $1, ($2,head)::tail
    in {module_proj=None; value_proj=seq}
  }
| module_name sym(DOT) nsepseq(selection,sym(DOT)) {
    {module_proj = Some ($1,$2); value_proj=$3}
  }
| ident {
    {module_proj = None; value_proj = Name $1, []} }

selection:
  ident              {      Name $1 }
| reg(par(reg(Int))) { Component $1 }

record_expr:
  sym(LBRACE)
    sep_or_term_list(reg(field_assignment),sym(SEMI))
  sym(RBRACE) {
    let elements, terminator = $2 in
    {opening = LBrace $1;
     elements = Some elements;
     terminator;
     closing = RBrace $3} }

field_assignment:
  field_name sym(EQ) expr {
    {field_name=$1; assignment=$2; field_expr=$3} }

sequence:
  kwd(Begin) sep_or_term_list(expr,sym(SEMI)) kwd(End) {
    let elements, terminator = $2 in
    {opening = Begin $1;
     elements = Some elements;
     terminator;
     closing = End $3} }