#!/bin/sh # This script extracts the error states of an LR automaton produced by # Menhir and generates minimal inputs that cover all of them and only # them. # set -x # ==================================================================== # General Settings and wrappers script=$(basename $0) print_nl () { test "$quiet" != "yes" && echo "$1"; } print () { test "$quiet" != "yes" && printf "$1"; } fatal_error () { echo "$script: fatal error:" echo "$1" 1>&2 exit 1 } warn () { print_nl "$script: warning:" print_nl "$1" } failed () { printf "\033[31mFAILED$1\033[0m\n" } emphasise () { printf "\033[31m$1\033[0m\n" } # ==================================================================== # Parsing loop # while : ; do case "$1" in "") break;; --par-tokens=*) if test -n "$par_tokens"; then fatal_error "Repeated option --par-tokens."; fi par_tokens=$(expr "$1" : "[^=]*=\(.*\)") ;; --par-tokens) no_eq=$1 break ;; --lex-tokens=*) if test -n "$lex_tokens"; then fatal_error "Repeated option --lex-tokens."; fi lex_tokens=$(expr "$1" : "[^=]*=\(.*\)") ;; --lex-tokens) no_eq=$1 break ;; --ext=*) if test -n "$ext_opt"; then fatal_error "Repeated option --ext."; fi ext=$(expr "$1" : "[^=]*=\(.*\)") ;; --ext) no_eq=$1 break ;; --dir=*) if test -n "$dir_opt"; then fatal_error "Repeated option --dir."; fi dir=$(expr "$1" : "[^=]*=\(.*\)") ;; --dir) no_eq=$1 break ;; # Help # --unlexer=*) if test -n "$unlexer"; then fatal_error "Repeated option --unlexer."; fi unlexer=$(expr "$1" : "[^=]*=\(.*\)") ;; --unlexer) no_eq=$1 break ;; -h | --help | -help) help=yes ;; # Invalid option # -*) fatal_error "Invalid option \"$1\"." ;; # Invalid argument # *) if test -n "$parser_arg"; then fatal_error "Only one Menhir specification allowed."; fi parser=$1 esac shift done # ==================================================================== # Help # usage () { cat <.mly --lex-tokens=.mli --unlexer= --ext= --dir= .mly Generates in directory a set of LIGO source files with extension covering all erroneous states of the LR automaton produced by Menhir from .mly, .mly, .mli and .msg (see script messages.sh for generating the latter). The LIGO files will be numbered with their corresponding state number in the automaton. The executable reads a line on stdin of tokens and produces a line of corresponding lexemes. The following options, if given, must be given only once. Display control: -h, --help display this help and exit Mandatory options: --lex-tokens=.mli the lexical tokens --par-tokens=.mly the syntactical tokens --ext=EXT Unix file extension for the generated LIGO files (no starting period) --dir=PATH directory to store the generated LIGO files (no trailing slash) --unlexer= from tokens to lexemes (one line on stdin) EOF exit 1 } if test "$help" = "yes"; then usage; fi # ==================================================================== # Checking the command-line options and arguments and applying some of # them. # It is a common mistake to forget the "=" in GNU long-option style. if test -n "$no_eq" then fatal_error "Long option style $no_eq must be followed by \"=\"." fi # Checking options if test -z "$unlexer"; then fatal_error "Unlexer binary not found (use --unlexer)."; fi if test -z "$parser"; then fatal_error "No parser specification."; fi if test -z "$par_tokens"; then fatal_error "No syntactical tokens specification (use --par-tokens)."; fi if test -z "$lex_tokens"; then fatal_error "No lexical tokens specification (use --lex-tokens)."; fi if test ! -e "$parser"; then fatal_error "Parser specification \"$parser\" not found."; fi if test ! -e "$lex_tokens"; then fatal_error "Lexical tokens specification \"$lex_tokens\" not found."; fi if test ! -e "$par_tokens"; then fatal_error "Syntactical tokens specification \"$par_tokens\" not found."; fi parser_ext=$(expr "$parser" : ".*\.mly$") if test "$parser_ext" = "0"; then fatal_error "Parser specification must have extension \".mly\"."; fi par_tokens_ext=$(expr "$par_tokens" : ".*\.mly$") if test "$par_tokens_ext" = "0"; then fatal_error "Syntactical tokens specification must have extension \".mly\"." fi lex_tokens_ext=$(expr "$lex_tokens" : ".*\.mli$") if test "$lex_tokens_ext" = "0"; then fatal_error "Lexical tokens specification must have extension \".mli\"." fi mly=$parser parser_base=$(basename $mly .mly) par_tokens_base=$(basename $par_tokens .mly) lex_tokens_base=$(basename $lex_tokens .mli) # Checking the output directory if test -z "$dir"; then fatal_error "No output directory (use --dir)."; fi if test ! -d "$dir"; then fatal_error "Output directory \"$dir\" not found."; fi # Checking the LIGO extension if test -z "$ext"; then fatal_error "No LIGO extension (use --ext)."; fi ext_start=$(expr "$ext" : "^\..*") if test "$ext_start" != "0" then fatal_error "LIGO extensions must not start with a period." fi # Checking the presence of the messages msg=$parser_base.msg if test ! -e $msg; then fatal_error "File $msg not found."; fi # ==================================================================== # Menhir's flags flags="--table --strict --external-tokens $lex_tokens_base \ --base $parser_base $par_tokens" # ==================================================================== # Producing erroneous sentences from Menhir's error messages msg=$parser_base.msg raw=$parser_base.msg.raw printf "Making $raw from $msg... " menhir --echo-errors $parser_base.msg $flags $mly > $raw 2>/dev/null sed -i -e 's/^.*: \(.*\)$/\1/g' $raw printf "done.\n" # ==================================================================== # Converting Menhir's minimal erroneous sentences to concrete syntax printf "Unlexing the erroneous sentences... " states=$msg.states map=$msg.map sed -n "s/.* state\: \([0-9]\+\)./\1/p" $msg > $states paste -d ':' $states $raw > $map rm -f $dir/*.$ext while read -r line; do state=$(echo $line | sed -n 's/\(.*\):.*/\1/p') filename=$(printf "$dir/%04d.$ext" $state) sentence=$(echo $line | sed -n 's/.*:\(.*\)/\1/p') echo $sentence | $unlexer >> $filename done < $map printf "done.\n"