From 9512992d2ba37db263f81ce27e8236285e2c7bf4 Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Wed, 18 Dec 2019 21:21:39 +0100 Subject: [PATCH] Generating .msg files and extracting from them LIGO source files. --- vendors/ligo-utils/simple-utils/cover.sh | 258 ++++++++++++++++++++ vendors/ligo-utils/simple-utils/messages.sh | 222 +++++++++++++++++ 2 files changed, 480 insertions(+) create mode 100755 vendors/ligo-utils/simple-utils/cover.sh create mode 100755 vendors/ligo-utils/simple-utils/messages.sh diff --git a/vendors/ligo-utils/simple-utils/cover.sh b/vendors/ligo-utils/simple-utils/cover.sh new file mode 100755 index 000000000..e4717b5ca --- /dev/null +++ b/vendors/ligo-utils/simple-utils/cover.sh @@ -0,0 +1,258 @@ +#!/bin/sh + +# This script extracts the error states of an LR automaton produced by +# Menhir and generates minimal inputs that cover all of them and only +# them. + +set -x + +# ==================================================================== +# General Settings and wrappers + +script=$(basename $0) + +print_nl () { test "$quiet" != "yes" && echo "$1"; } + +print () { test "$quiet" != "yes" && printf "$1"; } + +fatal_error () { + echo "$script: fatal error:" + echo "$1" 1>&2 + exit 1 +} + +warn () { + print_nl "$script: warning:" + print_nl "$1" +} + +failed () { + printf "\033[31mFAILED$1\033[0m\n" +} + +emphasise () { + printf "\033[31m$1\033[0m\n" +} + +# ==================================================================== +# Parsing loop +# +while : ; do + case "$1" in + "") break;; + --par-tokens=*) + if test -n "$par_tokens"; then + fatal_error "Repeated option --par-tokens."; fi + par_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --par-tokens) + no_eq=$1 + break + ;; + --lex-tokens=*) + if test -n "$lex_tokens"; then + fatal_error "Repeated option --lex-tokens."; fi + lex_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --lex-tokens) + no_eq=$1 + break + ;; + --ext=*) + if test -n "$ext_opt"; then + fatal_error "Repeated option --ext."; fi + ext=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --ext) + no_eq=$1 + break + ;; + --dir=*) + if test -n "$dir_opt"; then + fatal_error "Repeated option --dir."; fi + dir=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --dir) + no_eq=$1 + break + ;; + # Help + # + --unlexer=*) + if test -n "$unlexer"; then + fatal_error "Repeated option --unlexer."; fi + unlexer=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --unlexer) + no_eq=$1 + break + ;; + -h | --help | -help) + help=yes + ;; + # Invalid option + # + -*) + fatal_error "Invalid option \"$1\"." + ;; + # Invalid argument + # + *) + if test -n "$parser_arg"; then + fatal_error "Only one Menhir specification allowed."; fi + parser=$1 + esac + shift +done + +# ==================================================================== +# Help +# +usage () { + cat <.mly + --lex-tokens=.mli + --unlexer= + --ext= + --dir= + .mly + +Generates in directory a set of LIGO source files with +extension covering all erroneous states of the LR +automaton produced by Menhir from .mly, .mly, +.mli and .msg (see script `messages.sh` for +generating the latter). The LIGO files will be numbered with their +corresponding state number in the automaton. The executable +reads a line on stdin of tokens and produces a line of corresponding +lexemes. + +The following options, if given, must be given only once. + +Display control: + -h, --help display this help and exit + +Mandatory options: + --lex-tokens=.mli the lexical tokens + --par-tokens=.mly the syntactical tokens + --ext=EXT Unix file extension for the + generated LIGO files + (no starting period) + --dir=PATH directory to store the generated + LIGO files (no trailing slash) + --unlexer= from tokens to lexemes (one line on stdin) +EOF + exit 1 +} + +if test "$help" = "yes"; then usage; fi + +# ==================================================================== +# Checking the command-line options and arguments and applying some of +# them. + +# It is a common mistake to forget the "=" in GNU long-option style. + +if test -n "$no_eq" +then + fatal_error "Long option style $no_eq must be followed by \"=\"." +fi + +# Checking options + +if test -z "$unlexer"; then + fatal_error "Unlexer binary not found (use --unlexer)."; fi + +if test -z "$parser"; then + fatal_error "No parser specification."; fi + +if test -z "$par_tokens"; then + fatal_error "No syntactical tokens specification (use --par-tokens)."; fi + +if test -z "$lex_tokens"; then + fatal_error "No lexical tokens specification (use --lex-tokens)."; fi + +if test ! -e "$parser"; then + fatal_error "Parser specification \"$parser\" not found."; fi + +if test ! -e "$lex_tokens"; then + fatal_error "Lexical tokens specification \"$lex_tokens\" not found."; fi + +if test ! -e "$par_tokens"; then + fatal_error "Syntactical tokens specification \"$par_tokens\" not found."; fi + +parser_ext=$(expr "$parser" : ".*\.mly$") +if test "$parser_ext" = "0"; then + fatal_error "Parser specification must have extension \".mly\"."; fi + +par_tokens_ext=$(expr "$par_tokens" : ".*\.mly$") +if test "$par_tokens_ext" = "0"; then + fatal_error "Syntactical tokens specification must have extension \".mly\"." +fi + +lex_tokens_ext=$(expr "$lex_tokens" : ".*\.mli$") +if test "$lex_tokens_ext" = "0"; then + fatal_error "Lexical tokens specification must have extension \".mli\"." +fi + +mly=$parser +parser_base=$(basename $mly .mly) +par_tokens_base=$(basename $par_tokens .mly) +lex_tokens_base=$(basename $lex_tokens .mli) + +# Checking the output directory + +if test -z "$dir"; then + fatal_error "No output directory (use --dir)."; fi + +if test ! -d "$dir"; then + fatal_error "Output directory \"$dir\" not found."; fi + +# Checking the LIGO extension + +if test -z "$ext"; then + fatal_error "No LIGO extension (use --ext)."; fi + +ext_start=$(expr "$ext" : "^\..*") +if test "$ext_start" != "0" +then fatal_error "LIGO extensions must not start with a period." +fi + +# Checking the presence of the messages + +msg=$parser_base.msg +if test ! -e $msg; then + fatal_error "File $msg not found."; fi + +# ==================================================================== +# Menhir's flags + +flags="--table --strict --external-tokens $lex_tokens_base \ + --base $parser_base $par_tokens" + +# ==================================================================== +# Producing erroneous sentences from Menhir's error messages + +msg=$parser_base.msg +raw=$parser_base.msg.raw +printf "Making $raw from $msg... " +menhir --echo-errors $parser_base.msg $flags $mly > $raw 2>/dev/null +sed -i -e 's/^.*: \(.*\)$/\1/g' $raw +printf "done.\n" + +# ==================================================================== +# Converting Menhir's minimal erroneous sentences to concrete syntax + +printf "Unlexing the erroneous sentences... " +states=$msg.states +map=$msg.map +sed -n "s/.* state\: \([0-9]\+\)./\1/p" $msg > $states +paste -d ':' $states $raw > $map +rm -f $dir/*.$ext +while read -r line; do + state=$(echo $line | sed -n 's/\(.*\):.*/\1/p') + filename=$(printf "$dir/%04d.$ext" $state) + sentence=$(echo $line | sed -n 's/.*:\(.*\)/\1/p') + echo $sentence | $unlexer >> $filename +done < $map +printf "done.\n" diff --git a/vendors/ligo-utils/simple-utils/messages.sh b/vendors/ligo-utils/simple-utils/messages.sh new file mode 100755 index 000000000..c9e0034e7 --- /dev/null +++ b/vendors/ligo-utils/simple-utils/messages.sh @@ -0,0 +1,222 @@ +#!/bin/sh + +# This script uses Menhir to generate the exhaustive list of errors +# for a given parser specification. The generated file has to be +# filled with the error messages. The script must be called in the +# same directory where the parser specification and external token +# specifications are located, in accordance with the convention of the +# LIGO compiler source code. + +#set -x + +# ==================================================================== +# General Settings and wrappers + +script=$(basename $0) + +print_nl () { test "$quiet" != "yes" && echo "$1"; } + +print () { test "$quiet" != "yes" && printf "$1"; } + +fatal_error () { + echo "$script: fatal error:" + echo "$1" 1>&2 + exit 1 +} + +warn () { + print_nl "$script: warning:" + print_nl "$1" +} + +failed () { + printf "\033[31mFAILED$1\033[0m\n" +} + +emphasise () { + printf "\033[31m$1\033[0m\n" +} + +# ==================================================================== +# Parsing loop +# +while : ; do + case "$1" in + "") break;; + --par-tokens=*) + if test -n "$par_tokens"; then + fatal_error "Repeated option --par-tokens."; fi + par_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --par-tokens) + no_eq=$1 + break + ;; + --lex-tokens=*) + if test -n "$lex_tokens"; then + fatal_error "Repeated option --lex-tokens."; fi + lex_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --lex-tokens) + no_eq=$1 + break + ;; + -h | --help | -help) + help=yes + ;; + # Invalid option + # + -*) + fatal_error "Invalid option \"$1\"." + ;; + # Invalid argument + # + *) + if test -n "$parser"; then + fatal_error "Only one Menhir specification allowed."; fi + parser=$1 + esac + shift +done + +# ==================================================================== +# Help +# +usage () { + cat <.mli \ +--par-tokens=.mly .mly + +Generates in place .msg, the form containing the exhaustive +list of errors for the LR automaton generated by Menhir from +.mly, .mly and .mli. The file +.msg is meant to be edited and filled with the error messages. + +The following options, if given, must be given only once. + +Display control: + -h, --help display this help and exit +Mandatory options: + --lex-tokens=.mli the lexical tokens + --par-tokens=.mly the syntactical tokens +EOF + exit 1 +} + +if test "$help" = "yes"; then usage; fi + +# ==================================================================== +# Checking the command-line options and arguments and applying some of +# them. + +# It is a common mistake to forget the "=" in GNU long-option style. + +if test -n "$no_eq"; then + fatal_error "Long option style $no_eq must be followed by \"=\"." +fi + +# Checking the parser and tokens + +if test -z "$parser"; then + fatal_error "No parser specification."; fi + +if test -z "$par_tokens"; then + fatal_error "No syntactical tokens specification (use --par-tokens)."; fi + +if test -z "$lex_tokens"; then + fatal_error "No lexical tokens specification (use --lex-tokens)."; fi + +if test ! -e "$parser"; then + fatal_error "Parser specification \"$parser\" not found."; fi + +if test ! -e "$lex_tokens"; then + fatal_error "Lexical tokens specification \"$lex_tokens\" not found."; fi + +if test ! -e "$par_tokens"; then + fatal_error "Syntactical tokens specification \"$par_tokens\" not found."; fi + +parser_ext=$(expr "$parser" : ".*\.mly$") +if test "$parser_ext" = "0"; then + fatal_error "Parser specification must have extension \".mly\"."; fi + +par_tokens_ext=$(expr "$par_tokens" : ".*\.mly$") +if test "$par_tokens_ext" = "0"; then + fatal_error "Syntactical tokens specification must have extension \".mly\"." +fi + +lex_tokens_ext=$(expr "$lex_tokens" : ".*\.mli$") +if test "$lex_tokens_ext" = "0"; then + fatal_error "Lexical tokens specification must have extension \".mli\"." +fi + +mly=$parser +parser_base=$(basename $mly .mly) +par_tokens_base=$(basename $par_tokens .mly) +lex_tokens_base=$(basename $lex_tokens .mli) + +# ==================================================================== +# Menhir's flags + +flags="--table --strict --external-tokens $lex_tokens_base \ + --base $parser_base $par_tokens" + +# ==================================================================== +# Generating error messages with Menhir + +msg=$parser_base.msg +err=.$msg.err +out=.$mly.out + +if test -e $msg; then mv -f $msg $msg.old; echo "Saved $msg."; fi + +printf "Making new $msg from $mly... " +menhir --list-errors $flags $mly > $msg 2>$out + +if test "$?" = "0"; then + sentences=$(grep "YOUR SYNTAX ERROR MESSAGE HERE" $msg | wc -l) + if test -z "$sentences"; then printf "done.\n" + else + spurious=$(grep WARNING $msg | wc -l) + printf "done:\n" + printf "There are %s error sentences, %s with spurious reductions.\n" \ + $sentences $spurious; fi + if test -s $out; then cat $out; fi + if test -f $msg.old; then + printf "Checking inclusion of mappings (new in old)... " + menhir --compare-errors $msg \ + --compare-errors $msg.old \ + $flags $mly 2> $out + if test "$?" = "0"; then + if test -s $out; then + printf "done:\n" + cat $out + else printf "done.\n"; fi + rm -f $out + printf "Updating $msg... " + menhir --update-errors $msg.old \ + $flags $mly > $msg 2> $err + if test "$?" = "0"; then + printf "done:\n" + emphasise "Warning: The LR items may have changed." + emphasise "> Check your error messages again." + rm -f $err + else failed "." + touch $err + mv -f $msg.old $msg + echo "Restored $msg."; fi + else failed ":" + mv -f $out $err + sed -i -e "s/\.msg/.msg.new/g" \ + -e "s/\.new\.old//g" $err + mv -f $msg $msg.new + emphasise "See $err and update $msg." + echo "The default messages are in $msg.new." + mv -f $msg.old $msg + echo "Restored $msg."; fi; fi +else + failed ":" + mv -f $out $err + emphasise "> See $err." + mv -f $msg.old $msg + echo "Restored $msg." +fi