diff --git a/.gitignore b/.gitignore index b81a3e8de..e8553c903 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ .DS_Store +__pycache__ -/_build +/docs/introduction/readme.rst + +_build *.install /tezos-node diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a31bd4cb5..c7a522ed6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -458,3 +458,21 @@ deploy:bootstrap6: # - ./scripts/delete_ci_image.sh gitlab-ci-token "${CI_BUILD_TOKEN}" # "${CI_PROJECT_PATH}/build" # "${CI_BUILD_REF}" + +pages: + stage: publish + image: ${CI_REGISTRY_IMAGE}/build:${CI_BUILD_REF} + only: + - master@tezos/tezos + artifacts: + paths: + - public + script: + - cd /home/opam/tezos + - sudo apk update && sudo apk upgrade + - sudo apk add py3-sphinx py3-sphinx_rtd_theme + - sudo ln -s /usr/bin/sphinx-build-3 /usr/bin/sphinx-build + - opam install odoc + - make doc-html && sudo mv docs/_build "${CI_PROJECT_DIR}"/public + dependencies: + - build diff --git a/CHANGES.alphanet b/CHANGES.alphanet index b308b0fa5..606b333f4 100644 --- a/CHANGES.alphanet +++ b/CHANGES.alphanet @@ -1,6 +1,9 @@ -For the next reset +Alphanet changelog ================== +For the next reset +------------------ + [Alpha] - Do not allow revealing the same endorsement twice. @@ -34,7 +37,7 @@ For the next reset Reset 2017-11-20 -================== +------------------ [Alphanet] @@ -104,7 +107,7 @@ Reset 2017-11-20 - Add `SIZE` on lists. Reset 2017-11-17 -================ +---------------- [Node] @@ -112,7 +115,7 @@ Reset 2017-11-17 - Irmin: restore usage `git-repack`... (mistakenly removed) Reset 2017-10-13 -================ +---------------- [Client] @@ -134,7 +137,7 @@ Reset 2017-10-13 prevent an error raised when using an unrevealed key. Reset 2017-09-21 -================ +---------------- [Node] @@ -156,7 +159,7 @@ Reset 2017-09-21 prevent an error raised when using an unrevealed key. Reset 2017-08-10 -================ +---------------- This update includes changes in the on-disk state of the node and in the format of blocks and operations. It thus requires a chain reset. diff --git a/Makefile b/Makefile index ae65297b9..4096aa7fd 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,10 @@ all: doc-html: @jbuilder build @doc ${DEV} + @mkdir -p $$(pwd)/docs/_build/api/odoc + @rm -rf $$(pwd)/docs/_build/api/odoc/* + @cp -r $$(pwd)/_build/default/_doc/* $$(pwd)/docs/_build/api/odoc/ + @make -C docs build-test: @jbuilder build @buildtest ${DEV} @@ -28,5 +32,6 @@ docker-image: clean: @-jbuilder clean @-rm -f tezos-node tezos-client tezos-protocol-compiler + @-make -C docs clean .PHONY: all test build-deps docker-image clean diff --git a/README.md b/README.md deleted file mode 100644 index daa165e5c..000000000 --- a/README.md +++ /dev/null @@ -1,413 +0,0 @@ -TEZOS -===== - -Tezos is a distributed consensus platform with meta-consensus capability. Tezos -not only comes to consensus about state, like BTC or ETH. It also comes to -consensus about how the protocol and the nodes should adapt and upgrade. - -See https://www.tezos.com/ for more information about the project. - - -Build instructions ------------------- - -To compile Tezos, you need an OCaml compiler (version 4.04.2) and all the -libraries listed in the various `tezos-*.opam` files. - -The best way to install all dependencies is by first installing -[OPAM](https://opam.ocaml.org/), the OCaml package manager. - -Then, you need to create a new switch alias for Tezos. A switch is -your own version of the OPAM configuration, including the OCaml -compiler, all packages, and package manager configuration related to -your project. This is necessary so that the project doesn't conflict -with other OCaml projects or other versions of Tezos. - -``` -opam update -opam switch "tezos" --alias-of 4.04.2 -``` - -Note that if you previously created a switch named `tezos` but with an -older OCaml version you need to remove the switch with `opam switch -remove "tezos"`. - -When the switch is ready, you need to activate it: - -``` -eval `opam config env` -``` - -Install the libraries which Tezos depends on: - -``` -make build-deps -``` - -While building the dependencies, `opam` is able to handle correctly -the OCaml libraries but it is not always able to handle all external C -libraries we depend on. On most system, it is able to suggest a call -to the system package manager but it currently does not handle version -check. In particular, the `libsodium-dev` packages on Ubuntu is too -old for building Tezos, we rely on version `1.0.11` at least. - -At last, compile the project: - -``` -make -``` - -This should produce three binaries: - -* `tezos-node`: the tezos daemon itself; -* `tezos-client`: a minimal command-line client; -* `tezos-protocol-compiler`: a protocol compiler used for developing new version of the economic protocol. - -Currently Tezos is being developed for Linux only. It should work on mac OS, -but it has not been tested recently. A Windows port is in progress. - - -Note that, when executing `make build-deps`, OPAM will detect if -required system dependencies are installed. However, it is not able to -detect which versions you actually have. Typically, `make` will -probably fail if you have an libsodium < 1.0.11. In this case, make -sure you have a recent version of libsodium and libsodium-dev, or -download and install them from, eg, -https://pkgs.org/download/libsodium18 and -https://pkgs.org/download/libsodium-dev - -Running the node ----------------- - -So far there is no official Tezos network being run, but you might run a local -test network (the development team is running its own testnet, if you're interested -in joining this network, please make a request on our slack channel. We have -limited support abilities at the moment but we'll try to help you best we can). - -Use the following command to run a node that will accept incoming -connections: - -``` -./tezos-node identity generate 24. -``` - -This will first generate a new node identity and compute the -associated stamp of proof-of-work. Then, the node will listen to -connections coming in on `[::]:9732`. All used data is stored at -`$HOME/.tezos-node/`. For example, the default configuration file is -at `$HOME/.tezos-node/config.json`. - -To run multiple nodes on the same machine, you can duplicate and edit -`$HOME/.tezos-node/config.json` while making sure they don't share paths to the -database or any other data file (cf. options `db.store` ; `db.context` ; -`db.protocol`, `net.peers-metadata` and `net.identity`). - -You could also let Tezos generate a config file by specifying options on the -command line. For instance, if `$dir/config.json` does not exist, the following -command will generate it and replace the default values with the values from -the command line: - -``` -./tezos-node run --data-dir "$dir" --net-addr localhost:9733 -``` - -The Tezos server has a built-in mechanism to discover peers on the local -network (using UDP packets broadcasted on port 7732). - -If this mechanism is not sufficient, one can provide Tezos with a list of -initial peers, either by editing the option `net.bootstrap-peers` in the -`config.json` file, or by specifying a command line parameter: - -``` -./tezos-node run \ - --data-dir "$dir" --net-addr localhost:2023 \ - --peer localhost:2021 --peer localhost:2022 -``` - -If `"$dir"/config.json` exists, the command line options override those -read in the config file. By default, Tezos won't modify the content of an -existing `"$dir"/config.json` file. But, you may explicit ask the node -to reset or to update the file according to the command line parameters -with the following commands line: - -``` -./tezos-node config reset --data-dir "$dir" --net-addr localhost:9733 -./tezos-node config update --data-dir "$dir" --net-addr localhost:9734 -``` - -Running the node in a sandbox ------------------------------ - -To run a 'localhost-only' instance of a Tezos network, we provide two -helper scripts: - -- `./bin_node/tezos-sandboxed-node.sh` -- `./bin_client/tezos-init-sandboxed-client.sh` - -For instance, if you want to run local network with two nodes, in a -first terminal, the following command will initialize a node listening -for peers on port `19731` and listening for RPC on port `18731`. - -``` -./bin_node/tezos-sandboxed-node.sh 1 -``` - -This node will store its data in a temporary directory which will be -removed when the node is killed. - -To launch the second node, just run the following command, it will -listen on port `19739` and `18739`: - -``` -./bin_node/tezos-sandboxed-node.sh 9 -``` - -You might replace `1` or `9` by any number in between if you want to -run more than two nodes. But, if you intend to run a single node -network, you might remove the spurious "Too few connections" warnings -by lowering the number of expected connection, by running the -following command instead: - -``` -./bin_node/tezos-sandboxed-node.sh 1 --connections 0 -``` - -Once your node(s) is/are running, open a new terminal and initialize -the "sandboxed" client data: - -``` -eval `./bin_client/tezos-init-sandboxed-client.sh 1` -``` - -It will initialize the client data in a temporary directory. It will -also defines in the current shell session an alias `tezos-client` -preconfigured for communicating the same-numbered node. For instance: - -``` -$ tezos-client rpc call blocks/head/hash -{ "hash": "BLockGenesisGenesisGenesisGenesisGenesisGeneskvg68z" } -``` - -When you bootstrap a new network, the network is initialized with a -dummy economic protocol, called "genesis". If you want to run the same -protocol than the alphanet, `init-sandboxed-client` also defines an -alias `tezos-activate-alpha`, that you need to execute once for -activating the whole network. For instance: - -``` -$ tezos-client rpc call blocks/head/protocol -{ "protocol": "ProtoGenesisGenesisGenesisGenesisGenesisGenesk612im" } -$ tezos-activate-alpha -Injected BMBcK869jaHQDc -$ tezos-client rpc call blocks/head/protocol -{ "protocol": "ProtoALphaALphaALphaALphaALphaALphaALphaALphaDdp3zK" } -``` - -Configuration options ---------------------- - -Here is an example configuration file with all parameters -specified. Most of the time it uses default values, except for cases -where the default is not explanatory enough (i.e. "bootstrap-peers" is -an empty list by default). Comments are not allowed in JSON, so this -configuration file would not parse. They are just provided here to -help writing your own configuration file if needed. - - -``` -{ - - /* Location of the data dir on disk. */ - - "data-dir": "/home/tezos/my_data_dir" - - /* Configuration of net parameters */ - - "net": { - - /* Floating point number between 0 and 256 that represents a - difficulty, 24 signifies for example that at least 24 leading - zeroes are expected in the hash. */ - - "expected-proof-of-work": 24.5, - - /* List of hosts. Tezos can connect to both IPv6 and IPv4 - hosts. If the port is not specified, default port 9732 will be - assumed. */ - - "bootstrap-peers": ["::1:10732", "::ffff:192.168.1.3:9733", "mynode.tezos.com"], - - /* Specify if the network is closed or not. A closed network - allows only peers listed in "bootstrap-peers". */ - - "closed": false, - - /* Network limits */ - - "limits": { - - /* Delay granted to a peer to perform authentication, in - seconds. */ - - "authentication-timeout": 5, - - /* Strict minimum number of connections (triggers an urgent - maintenance). */ - - "min-connections": 50, - - /* Targeted number of connections to reach when bootstraping / - maintaining. */ - - "expected-connections": 100, - - /* Maximum number of connections (exceeding peers are - disconnected). */ - - "max-connections": 200, - - /* Number above which pending incoming connections are - immediately rejected. */ - - "backlog": 20, - - /* Maximum allowed number of incoming connections that are - pending authentication. */ - - "max-incoming-connections": 20, - - /* Max download and upload speeds in KiB/s. */ - - "max-download-speed": 1024, - "max-upload-speed": 1024, - - /* Size of the buffer passed to read(2). */ - - "read-buffer-size": 16384, - } - }, - - /* Configuration of rpc parameters */ - - "rpc": { - - /* Host to listen to. If the port is not specified, the default - port 8732 will be assumed. */ - - "listen-addr": "localhost:8733", - - /* Cross Origin Resource Sharing parameters, see - https://en.wikipedia.org/wiki/Cross-origin_resource_sharing. */ - - "cors-origin": [], - "cors-headers": [], - - /* Certificate and key files (necessary when TLS is used). */ - - "crt": "tezos-node.crt", - "key": "tezos-node.key" - }, - - /* Configuration of log parameters */ - - "log": { - - /* Output for the logging function. Either "stdout", "stderr" or - the name of a log file . */ - - "output": "tezos-node.log", - - /* Verbosity level: one of 'fatal', 'error', 'warn', 'notice', - 'info', 'debug'. */ - - "level": "info", - - /* Fine-grained logging instructions. Same format as described in - `tezos-node run --help`, DEBUG section. In the example below, - sections "net" and all sections starting by "client" will have - their messages logged up to the debug level, whereas the rest of - log sections will be logged up to the notice level. */ - - "rules": "client* -> debug, net -> debug, * -> notice", - - /* Format for the log file, see - http://ocsigen.org/lwt/dev/api/Lwt_log_core#2_Logtemplates. */ - - "template": "$(date) - $(section): $(message)" - }, - - /* Configuration for the validator and mempool parameters */ - - "shell": { - - /* The number of peers to synchronize with - before declaring the node 'bootstrapped'. */ - - "bootstrap_threshold": 4 - - } -} -``` - -Debugging ---------- - -It is possible to set independant log levels for different logging -sections in Tezos, as well as specifying an output file for -logging. See the description of log parameters above as well as -documentation under the DEBUG section diplayed by `tezos-node run ---help'. - - -JSON/RPC interface ------------------- - -The Tezos node provides a JSON/RPC interface. Note that it is an RPC, and it is -JSON based, but it does not follow the "JSON-RPC" protocol. It is not active by -default and it must be explicitely activated with the `--rpc-addr` option. -Typically, if you are not trying to run a local network and just want to -explore the RPC, you would run: - -``` -./tezos-node run --rpc-addr localhost -``` - -The RPC interface is self-documented and the `tezos-client` executable is able -to pretty-print the RPC API. For instance, to see the API provided by the Tezos -Shell: - -``` -./tezos-client rpc list -``` - -To get API attached to the "genesis" block, including the remote procedures -provided by the associated economic protocol version: - -``` -./tezos-client rpc list /blocks/genesis/ -``` - -You might also want the JSON schema describing the expected input and output of -a RPC. For instance: - -``` -./tezos-client rpc schema /blocks/genesis/hash -``` - -Note: you can get the same information, but as a raw JSON object, with a simple -HTTP request: - -``` -wget --post-data '{ "recursive": true }' -O - http://localhost:8732/describe -wget --post-data '{ "recursive": true }' -O - http://localhost:8732/describe/blocks/genesis -wget -O - http://localhost:8732/describe/blocks/genesis/hash -``` - - - -The minimal CLI client ----------------------- - -Work in progress. - -See `./tezos-client -help` for available commands. diff --git a/README.rst b/README.rst new file mode 100644 index 000000000..69d0e5522 --- /dev/null +++ b/README.rst @@ -0,0 +1,409 @@ +TEZOS +===== + +Tezos is a distributed consensus platform with meta-consensus +capability. Tezos not only comes to consensus about state, like BTC or +ETH. It also comes to consensus about how the protocol and the nodes +should adapt and upgrade. + +See https://www.tezos.com/ for more information about the project. + +Build instructions +------------------ + +To compile Tezos, you need an OCaml compiler (version 4.04.2) and all +the libraries listed in the various ``tezos-*.opam`` files. + +The best way to install all dependencies is by first installing +`OPAM `__, the OCaml package manager. + +Then, you need to create a new switch alias for Tezos. A switch is your +own version of the OPAM configuration, including the OCaml compiler, all +packages, and package manager configuration related to your project. +This is necessary so that the project doesn’t conflict with other OCaml +projects or other versions of Tezos. + +:: + + opam update + opam switch "tezos" --alias-of 4.04.2 + +Note that if you previously created a switch named ``tezos`` but with an +older OCaml version you need to remove the switch with +``opam switch remove "tezos"``. + +When the switch is ready, you need to activate it: + +:: + + eval `opam config env` + +Install the libraries which Tezos depends on: + +:: + + make build-deps + +While building the dependencies, ``opam`` is able to handle correctly +the OCaml libraries but it is not always able to handle all external C +libraries we depend on. On most system, it is able to suggest a call to +the system package manager but it currently does not handle version +check. In particular, the ``libsodium-dev`` packages on Ubuntu is too +old for building Tezos, we rely on version ``1.0.11`` at least. + +At last, compile the project: + +:: + + make + +This should produce three binaries: + +- ``tezos-node``: the tezos daemon itself; +- ``tezos-client``: a minimal command-line client; +- ``tezos-protocol-compiler``: a protocol compiler used for developing + new version of the economic protocol. + +Currently Tezos is being developed for Linux only. It should work on mac +OS, but it has not been tested recently. A Windows port is in progress. + +Note that, when executing ``make build-deps``, OPAM will detect if +required system dependencies are installed. However, it is not able to +detect which versions you actually have. Typically, ``make`` will +probably fail if you have an libsodium < 1.0.11. In this case, make sure +you have a recent version of libsodium and libsodium-dev, or download +and install them from, eg, https://pkgs.org/download/libsodium18 and +https://pkgs.org/download/libsodium-dev + +Running the node +---------------- + +So far there is no official Tezos network being run, but you might run a +local test network (the development team is running its own testnet, if +you’re interested in joining this network, please make a request on our +slack channel. We have limited support abilities at the moment but we’ll +try to help you best we can). + +Use the following command to run a node that will accept incoming +connections: + +:: + + ./tezos-node identity generate 24. + +This will first generate a new node identity and compute the associated +stamp of proof-of-work. Then, the node will listen to connections coming +in on ``[::]:9732``. All used data is stored at ``$HOME/.tezos-node/``. +For example, the default configuration file is at +``$HOME/.tezos-node/config.json``. + +To run multiple nodes on the same machine, you can duplicate and edit +``$HOME/.tezos-node/config.json`` while making sure they don’t share +paths to the database or any other data file (cf. options ``db.store`` ; +``db.context`` ; ``db.protocol``, ``net.peers-metadata`` and +``net.identity``). + +You could also let Tezos generate a config file by specifying options on +the command line. For instance, if ``$dir/config.json`` does not exist, +the following command will generate it and replace the default values +with the values from the command line: + +:: + + ./tezos-node run --data-dir "$dir" --net-addr localhost:9733 + +The Tezos server has a built-in mechanism to discover peers on the local +network (using UDP packets broadcasted on port 7732). + +If this mechanism is not sufficient, one can provide Tezos with a list +of initial peers, either by editing the option ``net.bootstrap-peers`` +in the ``config.json`` file, or by specifying a command line parameter: + +:: + + ./tezos-node run \ + --data-dir "$dir" --net-addr localhost:2023 \ + --peer localhost:2021 --peer localhost:2022 + +If ``"$dir"/config.json`` exists, the command line options override +those read in the config file. By default, Tezos won’t modify the +content of an existing ``"$dir"/config.json`` file. But, you may +explicit ask the node to reset or to update the file according to the +command line parameters with the following commands line: + +:: + + ./tezos-node config reset --data-dir "$dir" --net-addr localhost:9733 + ./tezos-node config update --data-dir "$dir" --net-addr localhost:9734 + +Running the node in a sandbox +----------------------------- + +To run a ‘localhost-only’ instance of a Tezos network, we provide two +helper scripts: + +- ``./bin_node/tezos-sandboxed-node.sh`` +- ``./bin_client/tezos-init-sandboxed-client.sh`` + +For instance, if you want to run local network with two nodes, in a +first terminal, the following command will initialize a node listening +for peers on port ``19731`` and listening for RPC on port ``18731``. + +:: + + ./bin_node/tezos-sandboxed-node.sh 1 + +This node will store its data in a temporary directory which will be +removed when the node is killed. + +To launch the second node, just run the following command, it will +listen on port ``19739`` and ``18739``: + +:: + + ./bin_node/tezos-sandboxed-node.sh 9 + +You might replace ``1`` or ``9`` by any number in between if you want to +run more than two nodes. But, if you intend to run a single node +network, you might remove the spurious “Too few connections” warnings by +lowering the number of expected connection, by running the following +command instead: + +:: + + ./bin_node/tezos-sandboxed-node.sh 1 --connections 0 + +Once your node(s) is/are running, open a new terminal and initialize the +“sandboxed” client data: + +:: + + eval `./bin_client/tezos-init-sandboxed-client.sh 1` + +It will initialize the client data in a temporary directory. It will +also defines in the current shell session an alias ``tezos-client`` +preconfigured for communicating the same-numbered node. For instance: + +:: + + $ tezos-client rpc call blocks/head/hash + { "hash": "BLockGenesisGenesisGenesisGenesisGenesisGeneskvg68z" } + +When you bootstrap a new network, the network is initialized with a +dummy economic protocol, called “genesis”. If you want to run the same +protocol than the alphanet, ``init-sandboxed-client`` also defines an +alias ``tezos-activate-alpha``, that you need to execute once for +activating the whole network. For instance: + +:: + + $ tezos-client rpc call blocks/head/protocol + { "protocol": "ProtoGenesisGenesisGenesisGenesisGenesisGenesk612im" } + $ tezos-activate-alpha + Injected BMBcK869jaHQDc + $ tezos-client rpc call blocks/head/protocol + { "protocol": "ProtoALphaALphaALphaALphaALphaALphaALphaALphaDdp3zK" } + +Configuration options +--------------------- + +Here is an example configuration file with all parameters specified. +Most of the time it uses default values, except for cases where the +default is not explanatory enough (i.e. “bootstrap-peers” is an empty +list by default). Comments are not allowed in JSON, so this +configuration file would not parse. They are just provided here to help +writing your own configuration file if needed. + +:: + + { + + /* Location of the data dir on disk. */ + + "data-dir": "/home/tezos/my_data_dir" + + /* Configuration of net parameters */ + + "net": { + + /* Floating point number between 0 and 256 that represents a + difficulty, 24 signifies for example that at least 24 leading + zeroes are expected in the hash. */ + + "expected-proof-of-work": 24.5, + + /* List of hosts. Tezos can connect to both IPv6 and IPv4 + hosts. If the port is not specified, default port 9732 will be + assumed. */ + + "bootstrap-peers": ["::1:10732", "::ffff:192.168.1.3:9733", "mynode.tezos.com"], + + /* Specify if the network is closed or not. A closed network + allows only peers listed in "bootstrap-peers". */ + + "closed": false, + + /* Network limits */ + + "limits": { + + /* Delay granted to a peer to perform authentication, in + seconds. */ + + "authentication-timeout": 5, + + /* Strict minimum number of connections (triggers an urgent + maintenance). */ + + "min-connections": 50, + + /* Targeted number of connections to reach when bootstraping / + maintaining. */ + + "expected-connections": 100, + + /* Maximum number of connections (exceeding peers are + disconnected). */ + + "max-connections": 200, + + /* Number above which pending incoming connections are + immediately rejected. */ + + "backlog": 20, + + /* Maximum allowed number of incoming connections that are + pending authentication. */ + + "max-incoming-connections": 20, + + /* Max download and upload speeds in KiB/s. */ + + "max-download-speed": 1024, + "max-upload-speed": 1024, + + /* Size of the buffer passed to read(2). */ + + "read-buffer-size": 16384, + } + }, + + /* Configuration of rpc parameters */ + + "rpc": { + + /* Host to listen to. If the port is not specified, the default + port 8732 will be assumed. */ + + "listen-addr": "localhost:8733", + + /* Cross Origin Resource Sharing parameters, see + https://en.wikipedia.org/wiki/Cross-origin_resource_sharing. */ + + "cors-origin": [], + "cors-headers": [], + + /* Certificate and key files (necessary when TLS is used). */ + + "crt": "tezos-node.crt", + "key": "tezos-node.key" + }, + + /* Configuration of log parameters */ + + "log": { + + /* Output for the logging function. Either "stdout", "stderr" or + the name of a log file . */ + + "output": "tezos-node.log", + + /* Verbosity level: one of 'fatal', 'error', 'warn', 'notice', + 'info', 'debug'. */ + + "level": "info", + + /* Fine-grained logging instructions. Same format as described in + `tezos-node run --help`, DEBUG section. In the example below, + sections "net" and all sections starting by "client" will have + their messages logged up to the debug level, whereas the rest of + log sections will be logged up to the notice level. */ + + "rules": "client* -> debug, net -> debug, * -> notice", + + /* Format for the log file, see + http://ocsigen.org/lwt/dev/api/Lwt_log_core#2_Logtemplates. */ + + "template": "$(date) - $(section): $(message)" + }, + + /* Configuration for the validator and mempool parameters */ + + "shell": { + + /* The number of peers to synchronize with + before declaring the node 'bootstrapped'. */ + + "bootstrap_threshold": 4 + + } + } + +Debugging +--------- + +It is possible to set independant log levels for different logging +sections in Tezos, as well as specifying an output file for logging. See +the description of log parameters above as well as documentation under +the DEBUG section diplayed by \`tezos-node run –help’. + +JSON/RPC interface +------------------ + +The Tezos node provides a JSON/RPC interface. Note that it is an RPC, +and it is JSON based, but it does not follow the “JSON-RPC” protocol. It +is not active by default and it must be explicitely activated with the +``--rpc-addr`` option. Typically, if you are not trying to run a local +network and just want to explore the RPC, you would run: + +:: + + ./tezos-node run --rpc-addr localhost + +The RPC interface is self-documented and the ``tezos-client`` executable +is able to pretty-print the RPC API. For instance, to see the API +provided by the Tezos Shell: + +:: + + ./tezos-client rpc list + +To get API attached to the “genesis” block, including the remote +procedures provided by the associated economic protocol version: + +:: + + ./tezos-client rpc list /blocks/genesis/ + +You might also want the JSON schema describing the expected input and +output of a RPC. For instance: + +:: + + ./tezos-client rpc schema /blocks/genesis/hash + +Note: you can get the same information, but as a raw JSON object, with a +simple HTTP request: + +:: + + wget --post-data '{ "recursive": true }' -O - http://localhost:8732/describe + wget --post-data '{ "recursive": true }' -O - http://localhost:8732/describe/blocks/genesis + wget -O - http://localhost:8732/describe/blocks/genesis/hash + +The minimal CLI client +---------------------- + +Work in progress. + +See ``./tezos-client -help`` for available commands. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..f55d93bb0 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,25 @@ +# You can set these variables from the command line. +SPHINXOPTS = -aE -n +SPHINXBUILD = sphinx-build +SPHINXPROJ = Tezos +SOURCEDIR = . +BUILDDIR = _build + +all: html linkcheck + +linkcheck: + $(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)" + +introduction/readme.rst: ../README.rst + sed 's/TEZOS/How to build and run/' $< > $@ + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +html: Makefile introduction/readme.rst + @$(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) + +clean: + @-rm -Rf "$(BUILDDIR)" + @-rm introduction/readme.rst diff --git a/docs/README.alphanet b/docs/README.alphanet deleted file mode 100644 index e4feeeb59..000000000 --- a/docs/README.alphanet +++ /dev/null @@ -1,321 +0,0 @@ -Tezos (alphanet) -================ - -Welcome to the Tezos alphanet, which is a pre-release network for the -Tezos blockchain. Currently, the chain is reset every few weeks. - -For news and support about the alphanet, please join IRC (`#tezos` on -freenode). Please, report bugs related to the alphanet on the IRC -channel before filling Github issues. - -For more information about the project in general, see: - - https://www.tezos.com/ - - -How to join the alphanet ? -========================== - -We provide two ways of joining the alphanet : - -- use `docker` and prebuilt binaries - (recommended way, tested on windows/mac/linux) -- manual compilation and installation - (linux and mac only) - -The `alphanet.sh` script ------------------------- - -The recommended way for running an up-to-date Tezos node connected to -the alphanet is to use `scripts/alphanet.sh`. Its only requirement is a -working installation of Docker: - - https://www.docker.com/community-edition - -First, you need to download the script: - -``` -wget https://raw.githubusercontent.com/tezos/tezos/alphanet/scripts/alphanet.sh -chmod +x alphanet.sh -``` - -You are now one step away from a working node: - -``` -./alphanet.sh start -``` - -This will launch a docker container running the various daemons that -form a working tezos node. The first launch might take a few minutes -to synchronize the chain. - -On first launch the script will also create a cryptographic identity -(nicknamed `my_identity`) and provide you with free tezzies on a fresh -account (nicknamed `my_account`). You might check your balance with: - -``` -./alphanet.sh client get balance for my_account -``` - -On some circumstances the account creation might fail. If so, see -section "Known issues" below on how to force the account creation. - -See `./alphanet.sh --help` for more informations about the script. -In particular see `./alphanet.sh client --help` and -`scripts/README.master` for more information about the client. - -Every call to `alphanet.sh` will check for updates of the node and -will fail if your node is not up-to-date. For updating the node, -simply run: - -``` -./alphanet.sh restart -``` - -If you prefer to temporarily disable automatic updates, you just -have to set an environment variable: - -``` -export TEZOS_ALPHANET_DO_NOT_PULL=yes -``` - - -Compilation from sources ------------------------- - -The `alphanet` branch in the tezos git repository will always contain -the up-to-date sources of the tezos-node required for running the -alphanet. See `docs/README.master` on how to compile it. - -Once built, you might launch the a node by running: - -``` -./tezos-node identity generate 24. -./tezos-node run --rpc-addr localhost -``` - -By default this instance will store its data in `$HOME/.tezos-node` -and will listen to incoming peers on port 9732. It will also listen -to RPC requests on port 8732 (only from `localhost`). You might find -more options by running `./tezos-node config --help`. - -If you want to stake (see below for more details), you will also have -to run: - -``` -./tezos-client launch daemon -``` - -That's all. For the rest of the document, to execute the example -commands, you will have to replace `./alphanet.sh client` by -`./tezos-client`. - - -How to observe the network ? -============================ - -The alphanet script provides a basic command `./alhpanet.sh head` that -allows you to see if your own node is synchronized. - -The Tezos client also offers a lot of commands to introspect the state of -the node, and also to list and call the RPCs of the nodes. - -Enthusiastic Tezos adopter fredcy has also developed a nice block explorer -for the alphanet. See [https://github.com/fredcy/tezos-client]. - -In an upcoming version, we will also provide an opt-in tool for node runners -that will allow us to provide a global monitoring panel of the alphanet. - - -How to obtain free Tez from the faucet contract ? -================================================= - -The alphanet contains an ad-hoc faucet contract, that will generate -new tezzies for you to test. Obviously, this contract will not be -available outside of the test network. - -First, if you don't have any cryptographic identity yet, you need to -generate one (replace `my_identity` with any name that suits you -best): - -``` -./alphanet.sh client gen keys "my_identity" -``` - -Then, you have to generate a new "free" account (replace `my_account` -with any name that suits you best and `my_identity` by the name used -in the previous command): - -``` -./alphanet.sh client originate free account "my_account" for "my_identity" -``` - -That's all. You might check your balance: - -``` -./alphanet.sh client get balance for "my_account" -``` - -If you want MORE tezzies, you need to generate as many free accounts as -you need (you should receive ꜩ100.000 per account) and then transfer -the tezzies into a single account. For instance: - -``` -./alphanet.sh client originate free account "my_alt_account" for "my_identity" -./alphanet.sh client transfer 100,000.00 from "my_alt_account" to "my_account" -fee 0.00 -./alphanet.sh client forget contract "my_alt_account" -``` - -Note that the test network is kind enough to accept transactions -without fees... - - - -How to play with smart-contracts ? -================================== - -An advanced documentation of the smart contract language is in - - `/docs/language.md` - -Some test contracts are in - - `/tests/contracts/` - -For details and examples, see: - - http://www.michelson-lang.com/ - - - -How to stake on the alphanet ? -============================== - -By default, the faucet of the alphanet (the one behind `./alphanet.sh -originate free account "my_account" for "my_identity"`) creates -contracts which are managed by `my_identity` but whose staking rights -are delegated to the baker of the block including the -origination. That way we are sure that staking rights are attributed -to an active baker. - -But, nonetheless, you might claim your staking rights! - -The following command returns the current delegate of a contract: - -``` -./alphanet.sh client get delegate for "my_account" -``` - -If it is one the following, it is indeed one of our "bootstrap" -contracts! - -- `tz1YLtLqD1fWHthSVHPD116oYvsd4PTAHUoc` -- `tz1irovm9SKduvL3npv8kDM54PSWY5VJXoyz` -- `tz1UsgSSdRwwhYrqq7iVp2jMbYvNsGbWTozp` -- `tz1TwYbKYYJxw7AyubY4A9BUm2BMCPq7moaC` -- `tz1QWft73Zhj5VSA1sCuEi9HhDDJqywE6BtC` - -You might change the delegate of a contract with a single command: - -``` -./alphanet.sh client set delegate for "my_account" to "my_identity" - -``` - -You now have staking rights! - -Well, almost. - -You should wait. - -A little bit. - -At most two cycles. Which, on the alphanet is 128 blocks (something -around 2 hours). On the mainnet, this will be between 2 weeks and a -month. - - -But, to enforce your right a last step is required. When baking or -endorsing a block, a bond is taken out of the default account -associated to the public key of the delegate. Hence, in order to -stake, you must be provisioning for bond deposit. - -``` -./alphanet.sh client transfer 50,000.00 from "my_account" to "my_identity" -``` - -On the alphanet, a bond is ꜩ1000. Hence, with the previous command you -provisioned 50 bonds. If you want more, see section "How to obtain -free Tez from the faucet contract ?". - -Now, you are settled. The `alphanet` docker image runs a baker daemon -and a endorser daemon, by default for all your keys. - -To know if you staked, just run: - -``` -./alphanet.sh baker log -./alphanet.sh endorser log -``` - -You should see lines such as: - -``` -Injected block BLxzbB7PBW1axq for bootstrap5 after BLSrg4dXzL2aqq (level 1381, slot 0, fitness 00::0000000000005441, operations 21) -``` - -Or: - -``` -Injected endorsement for block 'BLSrg4dXzL2aqq' (level 1381, slot 3, contract bootstrap5) 'oo524wKiEWBoPD' -``` - -On the alphanet, rewards for staking are credited after 24 hours. The -reward for baking or endorsing a block is ꜩ150. The safety bond is -returned together with the reward. - -To know when you will be allowed to stake in the current cycle, you -might try the following RPCs, where you replaced `tz1iFY8ads...` by -the appropriate value: - -``` -$ ./alphanet.sh client list known identities -my_identity: tz1iFY8aDskx9QGbgBy68SNAGgkc7AE2iG9H (public key known) (secret key known) -$ ./alphanet.sh client rpc call /blocks/head/proto/helpers/rights/baking/delegate/tz1iFY8aDskx9QGbgBy68SNAGgkc7AE2iG9H with '{}' -{ "ok": - [ { "level": 1400.000000, "priority": 2.000000, - "timestamp": "2017-05-19T03:21:52Z" }, - ... ] } -``` - - -Known issues -============ - -Missing account `my_account` ----------------------------- - -The chain synchronization has not been optimized yet and the -`alphanet.sh` script might misdetect the end of the synchronization -step. If so, it will try to create your free account in an outdated -context and your new account will never be included in the chain. - -To fix this, just wait for your node to be synchronized: for that run -the following command, in the middle of a (raw) json object, it should -display the date of the last block (which should not be too far in the -past): - -``` -./alphanet.sh head -``` - -Please note that the printed date is GMT, don't forget the time shift. - -Then, you need to remove from the client state the non-existant -contract and regenerate a new one: - -``` -./alphanet.sh client forget contract "my_account" -./alphanet.sh client originate free account "my_account" for "my_identity" -``` - diff --git a/docs/README.rst b/docs/README.rst new file mode 100644 index 000000000..b14fb7960 --- /dev/null +++ b/docs/README.rst @@ -0,0 +1,40 @@ +****************************** +Building documentation locally +****************************** + +Building instructions +--------------------- + +To build the documentaion, you can use the main Makefile target ``doc-html`` + +.. code:: bash + + make doc-html + +The documentation is built by Sphinx, and uses the Read The Docs theme. + +On a debian system, you can install the needed dependencies with: + +.. code:: bash + + sudo apt install \ + python3-recommonmark \ + python3-sphinx \ + python3-sphinx-rtd-theme + +OCaml documentation +------------------- + +Odoc is used for OCaml API generation, that you can install with: + +.. code:: bash + + opam install odoc + +Tezos generates the API documentation for all libraries in HTML format. The +generated HTML pages in ``_build//_doc``. It creates one sub-directory +per public library and generates an ``index.html`` file in each sub-directory. + +The documentation is not installed on the system by Tezos. It is meant to be +read locally while developing and then published on the www when releasing +packages. diff --git a/docs/README.zeronet b/docs/README.zeronet deleted file mode 100644 index ac518fb81..000000000 --- a/docs/README.zeronet +++ /dev/null @@ -1,10 +0,0 @@ -Tezos (zeronet) -=============== - -The Tezos zeronet is a development network that might be broken -multiple times a day. - -If you wish to experiment with Tezos we recommend you to test the `alphanet`, -which is a more stable test network. - -In particular, we offer no support about the zeronet. diff --git a/docs/TUTORIAL.md b/docs/TUTORIAL.md deleted file mode 100644 index aecac1651..000000000 --- a/docs/TUTORIAL.md +++ /dev/null @@ -1,65 +0,0 @@ -# Tezos Code Tutorial - -## Introduction - -The purpose of this document is to help contributors get started with the Tezos -codebase. The code is organized in several layers in a way which largely reflects the philosophy of the project. It creates a very strict separation between the "node", which implements the network protocol described in the white paper and between the protocols themselves. Of course the seed protocol itself is a very important part of the Tezos project and it follows a similar organization. The economic protocol sits on top of a few layers of abstractions dealing primarily with storing and retrieving data from the current context. - -## Overview of the source - -This section presents a brief overview of the layout of the source files and their significance. - -### node -The network shell -#### node/db -Persistent data structures used by the shell to store its state. -#### note/net -Connectivity for the gossip network and primitives to create RPC services -#### node/shell -The shell itself -#### node/updater -Manage on-the-fly updates to the protocol - -### proto -This is where the protocols live -#### proto/environment - -#### proto/current - -### utils - -### compiler - -### client -### client/embedded - -### Node, the network shell - -### Storing the context - -## Irmin - -Tezos needs to store potentially different version of its context, corresponding to potentially different branches of the blockchain. This also implies the ability to roll back the changes made by any single block, and to make atomic changes to the structure on disk for eac block to avoid corruption. - -To that extent, Tezos borrows from the MirageOS project a module called [Irmin](https://github.com/mirage/irmin "Irmin") - -> Irmin is a library to persist and synchronize distributed data structures both on-disk and in-memory. It enables a style of programming very similar to the Git workflow, where distributed nodes fork, fetch, merge and push data between each other. The general idea is that you want every active node to get a local (partial) copy of a global database and always be very explicit about how and when data is shared and migrated. - -Caveat: although Tezos **is** a distributed ledger, it does **not** rely on Irmin's distributed store capabilities. For our purposes we only use Irmin as a local storage. The git structure is particularly well suited to represent the versionning implicit in the state of a blockchain based ledger. In fact, the context of Tezos can be observed by running "git checkout" in the data directory. - -## Netbits and Jsont - -Netbits and Jsont are modules which allow for the typesafe serialization of OCaml objects in a binary format and in Json (respectively). Importantly, it does not make of the potentially brittle representation created by the compiler and access through the Obj.magic function. Serializers are created using a set of type constructors defined in a GADT. - - -## (MISC STUFF TO BE ORGANIZED IN THE DOCUMENT) - -The "Main" module represents the fixed interface between the economic protocol and the shell. - -A protocol consists of several .ml and .mli files and a file name TEZOS_PROTOCOL which lists the modules in order of inclusion for the compilation. Lower level modules sit below high level modules. - - -What are \\_repr modules? - -These modules handle the low level representation of a type, in particular its serialization in Json and in binary format. A module based on this type will often exist on top of it and provide functionality around that type. This provides finely grained layers of encapsulation to mininize the visible interfaces. - diff --git a/docs/_extensions/tezos_custom_roles.py b/docs/_extensions/tezos_custom_roles.py new file mode 100644 index 000000000..31d1ded1a --- /dev/null +++ b/docs/_extensions/tezos_custom_roles.py @@ -0,0 +1,16 @@ +from docutils import nodes +import os +import os.path + +def setup(app): + app.add_role('package', package_role) + +def package_role(name, rawtext, text, lineno, inliner, options={}, content=[]): + rel_lvl = inliner.document.current_source.replace(os.getcwd(),'').count('/') + if not os.path.exists('_build/api/odoc/'+text): + raise ValueError('opam package ' + text + ' does not exist in the odoc') + url = "api/api-inline.html#" + text + '/index.html' + for i in range(1,rel_lvl): + url = '../' + url + node = nodes.reference(rawtext, text, refuri=url, **options) + return [node], [] diff --git a/docs/api/api-inline.rst b/docs/api/api-inline.rst new file mode 100644 index 000000000..a09f0e9bf --- /dev/null +++ b/docs/api/api-inline.rst @@ -0,0 +1,19 @@ +************************** +Online OCaml Documentation +************************** + +.. raw:: html + + + + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..2d531de3f --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Tezos documentation build configuration file, created by +# sphinx-quickstart on Wed Jan 17 18:04:32 2018. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# sys.path.insert(0, os.path.abspath('.')) + +import os +import sys +sys.path.insert(0, os.path.abspath('.') + '/_extensions') + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.extlinks', 'tezos_custom_roles'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'Tezos' +copyright = '2017, Dynamic Ledger Solutions, Inc. ' +author = 'Dynamic Ledger Solutions, Inc. ' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.0.1' +# The full version, including alpha/beta/rc tags. +release = '0.0.1' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# Deactivate syntax highlighting +# - http://www.sphinx-doc.org/en/stable/markup/code.html#code-examples +# - http://www.sphinx-doc.org/en/stable/config.html#confval-highlight_language +highlight_language = 'none' +# TODO write a Pygments lexer for Michelson +# cf. http://pygments.org/docs/lexerdevelopment/ and http://pygments.org/docs/lexers/ + + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +html_theme_options = {'logo_only': True} +html_logo = "logo.svg" +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# This is required for the alabaster theme +# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +# html_sidebars = { +# '**': [ +# 'relations.html', # needs 'show_related': True theme option to display +# 'searchbox.html', +# ] +# } + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Tezosdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'Tezos.tex', 'Tezos Documentation', + 'Dynamic Ledger Solutions, Inc. \\textless{}contact@tezos.com\\textgreater{}', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'tezos', 'Tezos Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'Tezos', 'Tezos Documentation', + author, 'Tezos', 'One line description of project.', + 'Miscellaneous'), +] diff --git a/docs/data_encoding.org b/docs/data_encoding.org deleted file mode 100644 index 5276b136b..000000000 --- a/docs/data_encoding.org +++ /dev/null @@ -1,172 +0,0 @@ -* The data_encoding library -Throughout the Tezos protocol, data is serialized so that it can be used via RPC, -written to disk, or placed in a block. This serialization/deserialization is handled -via the [[../lib_data_encoding/data_encoding.mli][data_encoding library]] -by providing a set primitive encodings and a variety of combinators. - -** Examples/Tutorial -*** Encoding an integer - -Integers are defined as other concrete data types with a generic encoding type =type 'a encoding=. -This means that it is an encoding to/from type =int=. There are a variety of ways to encode an integer, -depending on what binary serialization you want to achieve: -- =Data_encoding.int8= -- =Data_encoding.uint8= -- =Data_encoding.int16= -- =Data_encoding.uint16= -- =Data_encoding.int31= -- =Data_encoding.int32= -- =Data_encoding.int64= - -For example, an encoding that represents a 31 bit integer has type -=Data_encoding.int31 = int Data_encoding.encoding=. - -#+BEGIN_SRC ocaml -let int31_encoding = Data_encoding.int31 -#+END_SRC - - -*** Encoding an object -Encoding a single integer is fairly uninteresting. The Data_encoding library provides a number of -combinators that can be used to build more complicated objects. Consider the type that represents an -interval from the first number to the second: - -#+BEGIN_SRC ocaml -type interval = int64 * int64 -#+END_SRC - -We can define an encoding for this type as: - -#+BEGIN_SRC ocaml -let interval_encoding = - Data_encoding.(obj2 (req "min" int64) (req "max" int64)) -#+END_SRC - -In the example above we construct a new value =interval_encoding= by combining -two int64 integers using the =obj2= constructor. - -The library provides diffrent constructors, i.e. for objects -that have no data (=Data_encoding.empty=), constructors for object up to 10 fields, -contructors for tuples, list, etc. - -These are serialized to binary by converting each internal object to binary and -placing them in the order of the original object and to JSON as a JSON object with field names. - -*** Lists, arrays, and options -List, Arrays and options types can by built on top of ground data types. - -#+BEGIN_SRC ocaml -type interval_list = interval list - -type interval_array = interval array - -type interval_option = interval option -#+END_SRC - -And the encoders for these types as - -#+BEGIN_SRC ocaml -let interval_list_encoding = Data_encoding.list interval_encoding -let interval_array_encoding = Data_encoding.array interval_encoding -let interval_option_encoding = Data_encoding.option interval_encoding -#+END_SRC - -*** Union types -The Tezos codebase makes heavy use of variant types. Consider the following -variant type: - -#+BEGIN_SRC ocaml -type variant = B of bool - | S of string -#+END_SRC - -Encoding for this types can be expressed as: - -#+BEGIN_SRC ocaml -let variant_encoding = - Data_encoding.(union ~tag_size:`Uint8 - [ case - bool - (function B b -> Some b | _ -> None) - (fun b -> B b) ; - case - string - (function S s -> Some s | _ -> None) - (fun s -> S s) ]) -#+END_SRC - -This variant encoding is a bit more complicated. Let's look at the parts of the type: -- We include an optimization hint to the binary encoding to inform it of the number of elements we expect in the tag. - In most cases, we can use =`Uint8=, which allows you to have up to 256 possible cases (default). -- We provide a function to wrap the datatype. The encoding works by repeatedly trying to - decode the datatype using these functions until one returns =Some payload=. This payload - is then encoded using the data_encoding specified. -- We specify a function from the encoded type to the actual datatype. - -Since the library does not provide an exhaustivity check on these constructors, -the user must be careful when constructucting unin types to avoid unfortunate runtime failures. - -** How the Data_encoding module works - -This section is 100% optional. You do not need to understand this section to use the library. - -The library uses GADTs to provide type-safe serialization/deserialization. From there, -a runtime representation of JSON objects is parsed into the typesafe version. - -First we define an untyped JSON AST: - -#+BEGIN_SRC ocaml -type json = - [ `O of (string * json) list - | `Bool of bool - | `Float of float - | `A of json list - | `Null - | `String of string ] -#+END_SRC - -This is then parsed into a typed AST ( we eliminate several cases for clarity): - -#+BEGIN_SRC ocaml -type 'a desc = - | Null : unit desc - | Empty : unit desc - | Bool : bool desc - | Int64 : Int64.t desc - | Float : float desc - | Bytes : Kind.length -> MBytes.t desc - | String : Kind.length -> string desc - | String_enum : Kind.length * (string * 'a) list -> 'a desc - | Array : 'a t -> 'a array desc - | List : 'a t -> 'a list desc - | Obj : 'a field -> 'a desc - | Objs : Kind.t * 'a t * 'b t -> ('a * 'b) desc - | Tup : 'a t -> 'a desc - | Union : Kind.t * tag_size * 'a case list -> 'a desc - | Mu : Kind.enum * string * ('a t -> 'a t) -> 'a desc - | Conv : - { proj : ('a -> 'b) ; - inj : ('b -> 'a) ; - encoding : 'b t ; - schema : Json_schema.schema option } -> 'a desc - | Describe : - { title : string option ; - description : string option ; - encoding : 'a t } -> 'a desc - | Def : { name : string ; - encoding : 'a t } -> 'a desc -#+END_SRC - -- The first set of constructures define all ground types. -- The constructors for =Bytes=, =String= and =String_enum= includes a length fields in order to provide safe binary serialization. -- The constructors for =Array= and =List= are used by the combinators we saw earlier. -- The =Obj= and =Objs= constructors create JSON objects. - These are wrapped in the =Conv= constructor to remove nesting that results when these constructors are used naively. -- The =Mu= constructor is used to create self-referential definitions. -- The =Conv= constructor allows you to clean up a nested definition or compute another type from an existing one. -- The =Describe= and =Def= constructors are used to add documentation - -The library also provides various wrappers and convenience functions to make constructing these objects easier. -Reading the documentation in the [[../src/minutils/data_encoding.mli][mli file]] should orient -you on how to use these functions and their purposes. - diff --git a/docs/error_monad.org b/docs/error_monad.org deleted file mode 100644 index e5c190a7d..000000000 --- a/docs/error_monad.org +++ /dev/null @@ -1,276 +0,0 @@ -* The Error Monad -This has been adapted from a blog post on [[www.michelson-lang.com][michelson-lang.com]]. - -If you're not familiar with monads, go take a few minutes and read a tutorial. I personally got a lot out of this [[http://homepages.inf.ed.ac.uk/wadler/papers/marktoberdorf/baastad.pdf][paper]] by Philip Wadler, but there are a ton of others available online. Find one that works for you. The error monad isn't terribly scary as Monads go, so once you feel like you understand the gist, come on back and see if you can understand what's going on. - -I'm going to omit some convenience operations that a lot of monads provide in the examples below. If you want to add them, they're not difficult. - -** Why you want the error monad -In Tezos, we don't want to have the node be crashable by an improper input. To avoid this possibility, it was decided that the system should not use exceptions for error handling. Instead, it uses an error monad. This design forces errors to be handled or carried through before an output can be used. Exceptions are still occasionally used, but this is mostly in the client and only for internal errors. - -We also mix in the Lwt library, which we use for concurrency. This is combined with the error monad and is once again used pervasively throughout the codebase. The Lwt monad is a lot like promises in other languages. - -Without further ado, let's write an error monad. - -** A simple version of the error monad -Here's a very simple error monad. - -#+BEGIN_SRC ocaml -module Error : sig - type 'a t - (* Create a value of type t *) - val return : 'a -> 'a t - (* For when a computation fails *) - val error : 'a t - (* Apply an operation to a value in the error monad *) - val (>>?) : 'a t -> ('a -> 'b t) -> 'b t (* bind *) -end = struct - type 'a t = Ok of 'a | Error - let return x = Ok x - let error = Error - let (>>?) value func = - match value with - | Ok x -> func x - | Error -> Error -end -#+END_SRC - -So, is this what Tezos uses? We actually already have a lot of the structure that we'll use later. The basic idea is that you return a value that's correct and return an error if the operation failed. Outside of the error module, you can't actually introspect an error value. You can only dispatch on the correctness/incorrectness of the value using bind. - -What's wrong here? -- We can't report any information about an error case -- We can't report error traces, something that's used to improve the quality of error messages throughout Tezos -- We can't handle some errors and continue executing - -** A slight improvement -Let's now enhance our error reporting by allowing errors to contain a description string. Now we can report messages along with our errors. Is this enough of an improvement? Not really. We don't have any flexibility about how the printing works. We still can't create error traces and we can't handle errors and resume executing the program. -#+BEGIN_SRC ocaml -module Error : sig - type 'a t - val return : 'a -> 'a t - val error : string -> 'a t - val (>>?) : 'a t -> ('a -> 'b t) -> 'b t (* bind *) - val print_value : ('a -> string) -> 'a t -> unit -end = struct - type 'a t = Ok of 'a | Error of string - let return x = Ok x - let error s = Error s - let (>>?) value func = - match value with - | Ok x -> func x - | Error s -> Error s - let print_value func = function - | Ok x -> Printf.printf "Success: %s\n" (func x) - | Error s -> Printf.printf "Error: %s\n" s -end -#+END_SRC - -** Traces -Now that we have the basic structure down, we can add a mechanism to let us include traces. As a note, the error type I had above is exactly the =result= type from the OCaml standard library. The traces are just lists of error messages. If you have a call you think might fail, and you want to provide a series of errors, you can wrap that result in the =trace= function. If that call fails, an additional error is added. -#+BEGIN_SRC ocaml -module Error : sig - type 'a t - val return : 'a -> 'a t - val error : string -> 'a t - val (>>?) : 'a t -> ('a -> 'b t) -> 'b t (* bind *) - val print_value : ('a -> string) -> 'a t -> unit - val trace : string -> 'a t -> 'a t -end = struct - type 'a t = ('a, string list) result - let return x = Ok x - let error s = Error [ s ] - let (>>?) value func = - match value with - | Ok x -> func x - | Error errs -> Error errs - let print_value func = function - | Ok x -> Printf.printf "Success: %s\n" (func x) - | Error [ s ] -> Printf.printf "Error: %s\n" s - | Error errors -> Printf.printf "Errors:\t%s\n" (String.concat "\n\t" errors) - let trace error = function - | Ok x -> Ok x - | Error errors -> Error (error :: errors) -end -#+END_SRC - -** A more descriptive message -Even though traces are nice, we really want to be able to store more interesting data in the messages. We're going to use an extensible variant type to do this. Extensible variants allow us to add a new case to a variant type at the cost of exhaustivity checking. We're going to need two new mechanisms to make this work well. The first is an error registration scheme. In the actual error monad, this involves the data encoding module, which is how all data is encoded/decoded in Tezos. This module is another decently complicated part of the codebase that should probably the subject of a future post. Since you can declare arbitrary new errors, we'll have a way of adding a printer for each error. - -When we add a new error handler, we'll use the =register_handler= function. This function will take a function that takes an error and returns a =string option=. These functions will look something like this: - -#+BEGIN_SRC ocaml -type error += Explosion_failure of string * int;; - -register_error - (function - | Explosion_failure (s, i) -> - Some (Printf.sprintf "Everything exploded: %s at %d" s i) - | _ -> None) -#+END_SRC - -I'm also renaming the =error= function to =fail=. This is the convention used by the actual Error_monad module. I'm also exposing the ='a t= type so that you can dispatch on it if you need to. This is used several times in the Tezos codebase. - -#+BEGIN_SRC ocaml -module Error : sig - type error = .. - type 'a t = ('a, error list) result - val return : 'a -> 'a t - val fail : error -> 'a t - val (>>?) : ('a -> 'b t) -> 'a t -> 'b t (* bind *) - val print_value : ('a -> string) -> 'a t -> unit - val trace : error -> 'a t -> 'a t -end = struct - type error = .. - type 'a t = ('a, error list) result - let fail error = Error [ error ] - let return x = Ok x - let (>>?) func = function - | Ok x -> func x - | Error errs -> Error errs - let registered = ref [] - let register_error handler = - registered := (handler :: !registered) - let default_handler error = - "Unregistered error: " ^ Obj.(extension_name @@ extension_constructor error) - let to_string error = - let rec find_handler = function - | [] -> default_handler error - | handler :: handlers -> - begin match handler error with - | None -> find_handler handlers - | Some s -> s - end - in find_handler !registered - let print_value func = function - | Ok x -> Printf.printf "Success: %s\n" (func x) - | Error [ s ] -> Printf.printf "Error: %s\n" (to_string s) - | Error errors -> Printf.printf "Errors:\t%s\n" (String.concat "\n\t" (List.map to_string errors)) - let trace error = function - | Ok x -> Ok x - | Error errors -> Error (error :: errors) -end -#+END_SRC - -** Putting =Lwt.t= in the mix -Tezos uses the [[http://ocsigen.org/lwt/][Lwt library]] for threading. The Lwt monad is mixed in with the error monad module. This requires us to add some extra combinators and reexport some functions from Lwt. - -I'm also renaming the type =t= to =tzresult=, as used in the Tezos codebase. - -#+BEGIN_SRC ocaml -module Error : sig - type error = .. - type 'a tzresult = ('a, error list) result - val ok : 'a -> 'a tzresult - val return : 'a -> 'a tzresult Lwt.t - val error : error -> 'a tzresult - val fail : error -> 'a tzresult Lwt.t - val (>>?) : 'a tzresult -> ('a -> 'b tzresult) -> 'b tzresult (* bind *) - val (>>=?) : 'a tzresult Lwt.t -> ('a -> 'b tzresult Lwt.t) -> 'b tzresult Lwt.t - val (>>=) : 'a Lwt.t -> ('a -> 'b Lwt.t) -> 'b Lwt.t - val print_value : ('a -> string) -> 'a tzresult Lwt.t -> unit Lwt.t - val trace : error -> 'a tzresult Lwt.t -> 'a tzresult Lwt.t -end = struct - type error = .. - type 'a tzresult = ('a, error list) result - let fail error = Lwt.return (Error [ error ]) - let error error = (Error [ error ]) - let ok x = Ok x - let return x = Lwt.return (ok x) - let (>>?) value func = - match value with - | Ok x -> func x - | Error errs -> Error errs - let (>>=) = Lwt.bind - let (>>=?) value func = - value >>= function - | Ok x -> func x - | Error errs -> Lwt.return (Error errs) - let registered = ref [] - let register_error handler = - registered := (handler :: !registered) - let default_handler error = - "Unregistered error: " ^ Obj.(extension_name @@ extension_constructor error) - let to_string error = - let rec find_handler = function - | [] -> default_handler error - | handler :: handlers -> - begin match handler error with - | None -> find_handler handlers - | Some s -> s - end - in find_handler !registered - let print_value func value = - value >>= fun value -> - begin match value with - | Ok x -> Printf.printf "Success: %s\n" (func x) - | Error [ s ] -> Printf.printf "Error: %s\n" (to_string s) - | Error errors -> Printf.printf "Errors:\t%s\n" (String.concat "\n\t" (List.map to_string errors)) - end; Lwt.return () - let trace error value = - value >>= function - | Ok x -> return x - | Error errors -> Lwt.return (Error (error :: errors)) -end -#+END_SRC -** The actual Tezos error monad -The actual Tezos error monad adds a few things. Firstly, there are three categories of errors: -- =`Temporary= - An error resulting from an operation that might be valid in the future, for example, a contract's balance being too low to execute the intended operation. This can be fixed by adding more to the contract's balance. -- =`Branch= - An error that occurs in one branch of the chain, but may not occur in a different one. For example, receiving an operation for an old or future protocol version. -- =`Permanent= - An error that is not recoverable because the operation is never going to be valid. For example, an invalid ꜩ notation. - -The registration scheme also uses data encodings. Here's an example from the [[file:~/tezos/src/node/shell/validator.ml][validator]]: - -#+BEGIN_SRC ocaml -register_error_kind - `Permanent - ~id:"validator.wrong_level" - ~title:"Wrong level" - ~description:"The block level is not the expected one" - ~pp:(fun ppf (e, g) -> - Format.fprintf ppf - "The declared level %ld is not %ld" g e) - Data_encoding.(obj2 - (req "expected" int32) - (req "provided" int32)) - (function Wrong_level (e, g) -> Some (e, g) | _ -> None) - (fun (e, g) -> Wrong_level (e, g)) -#+END_SRC - -An error takes a category, id, title, description, and encoding. You must specify a function to take an error to an optional value of the encoding type and a function to take a value of the encoded type and create an error value. A pretty printer can optionally be specified, but may also be omitted. - - -The actual error monad and it's tracing features can be seen in this function which parses contracts: - -#+BEGIN_SRC ocaml -let parse_script - : ?type_logger: (int * (Script.expr list * Script.expr list) -> unit) -> - context -> Script.storage -> Script.code -> ex_script tzresult Lwt.t - = fun ?type_logger ctxt - { storage; storage_type = init_storage_type } - { code; arg_type; ret_type; storage_type } -> - trace - (Ill_formed_type (Some "parameter", arg_type)) - (Lwt.return (parse_ty arg_type)) >>=? fun (Ex_ty arg_type) -> - trace - (Ill_formed_type (Some "return", ret_type)) - (Lwt.return (parse_ty ret_type)) >>=? fun (Ex_ty ret_type) -> - trace - (Ill_formed_type (Some "initial storage", init_storage_type)) - (Lwt.return (parse_ty init_storage_type)) >>=? fun (Ex_ty init_storage_type) -> - trace - (Ill_formed_type (Some "storage", storage_type)) - (Lwt.return (parse_ty storage_type)) >>=? fun (Ex_ty storage_type) -> - let arg_type_full = Pair_t (arg_type, storage_type) in - let ret_type_full = Pair_t (ret_type, storage_type) in - Lwt.return (ty_eq init_storage_type storage_type) >>=? fun (Eq _) -> - trace - (Ill_typed_data (None, storage, storage_type)) - (parse_data ?type_logger ctxt storage_type storage) >>=? fun storage -> - trace - (Ill_typed_contract (code, arg_type, ret_type, storage_type, [])) - (parse_returning (Toplevel { storage_type }) ctxt ?type_logger arg_type_full ret_type_full code) - >>=? fun code -> - return (Ex_script { code; arg_type; ret_type; storage; storage_type }) -#+END_SRC - -Each specific type error from the typechecking process is wrapped in a more general error that explains which part of the program was malformed. This improves the error reporting. You can also see the bind operator used between functions to continue only if an error does not occur. This function also operates in the =Lwt= monad, which is largely hidden via the error monad. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..44ccf1a62 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,53 @@ +.. Tezos documentation master file, created by + sphinx-quickstart on Sat Nov 11 11:08:48 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Tezos's documentation! +================================= + +.. toctree:: + :maxdepth: 2 + :caption: Introduction: + + introduction/readme + introduction/contributing + +.. toctree:: + :maxdepth: 2 + :caption: The Alphanet: + + introduction/alphanet + introduction/alphanet_changes + introduction/zeronet + +.. toctree:: + :maxdepth: 2 + :caption: White doc: + + whitedoc/the_big_picture + whitedoc/michelson + +.. toctree:: + :maxdepth: 2 + :caption: Tutorials: + + tutorials/data_encoding + tutorials/error_monad + tutorials/michelson_anti_patterns + tutorials/entering_alpha + tutorials/protocol_environment + +.. toctree:: + :maxdepth: 2 + :caption: OCaml API: + + README + api/api-inline + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/introduction/alphanet.rst b/docs/introduction/alphanet.rst new file mode 100644 index 000000000..37a6d0156 --- /dev/null +++ b/docs/introduction/alphanet.rst @@ -0,0 +1,309 @@ +Participating in the Alphanet +============================= + +Welcome to the Tezos alphanet, which is a pre-release network for the +Tezos blockchain. Currently, the chain is reset every few weeks. + +For news and support about the alphanet, please join IRC (``#tezos`` on +freenode). Please, report bugs related to the alphanet on the IRC +channel before filling Github issues. + +For more information about the project in general, see: + +https://www.tezos.com/ + +How to join the alphanet ? +-------------------------- + +We provide two ways of joining the alphanet : + +- use ``docker`` and prebuilt binaries (recommended way, tested on + windows/mac/linux) +- manual compilation and installation (linux and mac only) + +The ``alphanet.sh`` script +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The recommended way for running an up-to-date Tezos node connected to +the alphanet is to use ``scripts/alphanet.sh``. Its only requirement is +a working installation of Docker: + +https://www.docker.com/community-edition + +First, you need to download the script: + +:: + + wget https://raw.githubusercontent.com/tezos/tezos/alphanet/scripts/alphanet.sh + chmod +x alphanet.sh + +You are now one step away from a working node: + +:: + + ./alphanet.sh start + +This will launch a docker container running the various daemons that +form a working tezos node. The first launch might take a few minutes to +synchronize the chain. + +On first launch the script will also create a cryptographic identity +(nicknamed ``my_identity``) and provide you with free tezzies on a fresh +account (nicknamed ``my_account``). You might check your balance with: + +:: + + ./alphanet.sh client get balance for my_account + +On some circumstances the account creation might fail. If so, see +section “Known issues” below on how to force the account creation. + +See ``./alphanet.sh --help`` for more informations about the script. In +particular see ``./alphanet.sh client --help`` and +``scripts/README.master`` for more information about the client. + +Every call to ``alphanet.sh`` will check for updates of the node and +will fail if your node is not up-to-date. For updating the node, simply +run: + +:: + + ./alphanet.sh restart + +If you prefer to temporarily disable automatic updates, you just have to +set an environment variable: + +:: + + export TEZOS_ALPHANET_DO_NOT_PULL=yes + +Compilation from sources +~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``alphanet`` branch in the tezos git repository will always contain +the up-to-date sources of the tezos-node required for running the +alphanet. See ``docs/README.master`` on how to compile it. + +Once built, you might launch the a node by running: + +:: + + ./tezos-node identity generate 24. + ./tezos-node run --rpc-addr localhost + +By default this instance will store its data in ``$HOME/.tezos-node`` +and will listen to incoming peers on port 9732. It will also listen to +RPC requests on port 8732 (only from ``localhost``). You might find more +options by running ``./tezos-node config --help``. + +If you want to stake (see below for more details), you will also have to +run: + +:: + + ./tezos-client launch daemon + +That’s all. For the rest of the document, to execute the example +commands, you will have to replace ``./alphanet.sh client`` by +``./tezos-client``. + +How to observe the network ? +---------------------------- + +The alphanet script provides a basic command ``./alphanet.sh head`` that +allows you to see if your own node is synchronized. + +The Tezos client also offers a lot of commands to introspect the state +of the node, and also to list and call the RPCs of the nodes. + +Enthusiastic Tezos adopter fredcy has also developed a nice block +explorer for the alphanet. See https://github.com/fredcy/tezos-client. + +In an upcoming version, we will also provide an opt-in tool for node +runners that will allow us to provide a global monitoring panel of the +alphanet. + +How to obtain free Tez from the faucet contract ? +------------------------------------------------- + +The alphanet contains an ad-hoc faucet contract, that will generate new +tezzies for you to test. Obviously, this contract will not be available +outside of the test network. + +First, if you don’t have any cryptographic identity yet, you need to +generate one (replace ``my_identity`` with any name that suits you +best): + +:: + + ./alphanet.sh client gen keys "my_identity" + +Then, you have to generate a new “free” account (replace ``my_account`` +with any name that suits you best and ``my_identity`` by the name used +in the previous command): + +:: + + ./alphanet.sh client originate free account "my_account" for "my_identity" + +That’s all. You might check your balance: + +:: + + ./alphanet.sh client get balance for "my_account" + +If you want MORE tezzies, you need to generate as many free accounts as +you need (you should receive ꜩ100.000 per account) and then transfer the +tezzies into a single account. For instance: + +:: + + ./alphanet.sh client originate free account "my_alt_account" for "my_identity" + ./alphanet.sh client transfer 100,000.00 from "my_alt_account" to "my_account" -fee 0.00 + ./alphanet.sh client forget contract "my_alt_account" + +Note that the test network is kind enough to accept transactions without +fees… + +How to play with smart-contracts ? +---------------------------------- + +An advanced documentation of the smart contract language is in + +``/docs/language.md`` + +Some test contracts are in + +``/tests/contracts/`` + +For details and examples, see: + +http://www.michelson-lang.com/ + +How to stake on the alphanet ? +------------------------------ + +By default, the faucet of the alphanet (the one behind +``./alphanet.sh originate free account "my_account" for "my_identity"``) +creates contracts which are managed by ``my_identity`` but whose staking +rights are delegated to the baker of the block including the +origination. That way we are sure that staking rights are attributed to +an active baker. + +But, nonetheless, you might claim your staking rights! + +The following command returns the current delegate of a contract: + +:: + + ./alphanet.sh client get delegate for "my_account" + +If it is one the following, it is indeed one of our “bootstrap” +contracts! + +- ``tz1YLtLqD1fWHthSVHPD116oYvsd4PTAHUoc`` +- ``tz1irovm9SKduvL3npv8kDM54PSWY5VJXoyz`` +- ``tz1UsgSSdRwwhYrqq7iVp2jMbYvNsGbWTozp`` +- ``tz1TwYbKYYJxw7AyubY4A9BUm2BMCPq7moaC`` +- ``tz1QWft73Zhj5VSA1sCuEi9HhDDJqywE6BtC`` + +You might change the delegate of a contract with a single command: + +:: + + ./alphanet.sh client set delegate for "my_account" to "my_identity" + +You now have staking rights! + +Well, almost. + +You should wait. + +A little bit. + +At most two cycles. Which, on the alphanet is 128 blocks (something +around 2 hours). On the mainnet, this will be between 2 weeks and a +month. + +But, to enforce your right a last step is required. When baking or +endorsing a block, a bond is taken out of the default account associated +to the public key of the delegate. Hence, in order to stake, you must be +provisioning for bond deposit. + +:: + + ./alphanet.sh client transfer 50,000.00 from "my_account" to "my_identity" + +On the alphanet, a bond is ꜩ1000. Hence, with the previous command you +provisioned 50 bonds. If you want more, see section “How to obtain free +Tez from the faucet contract ?”. + +Now, you are settled. The ``alphanet`` docker image runs a baker daemon +and a endorser daemon, by default for all your keys. + +To know if you staked, just run: + +:: + + ./alphanet.sh baker log + ./alphanet.sh endorser log + +You should see lines such as: + +:: + + Injected block BLxzbB7PBW1axq for bootstrap5 after BLSrg4dXzL2aqq (level 1381, slot 0, fitness 00::0000000000005441, operations 21) + +Or: + +:: + + Injected endorsement for block 'BLSrg4dXzL2aqq' (level 1381, slot 3, contract bootstrap5) 'oo524wKiEWBoPD' + +On the alphanet, rewards for staking are credited after 24 hours. The +reward for baking or endorsing a block is ꜩ150. The safety bond is +returned together with the reward. + +To know when you will be allowed to stake in the current cycle, you +might try the following RPCs, where you replaced ``tz1iFY8ads...`` by +the appropriate value: + +:: + + $ ./alphanet.sh client list known identities + my_identity: tz1iFY8aDskx9QGbgBy68SNAGgkc7AE2iG9H (public key known) (secret key known) + $ ./alphanet.sh client rpc call /blocks/head/proto/helpers/rights/baking/delegate/tz1iFY8aDskx9QGbgBy68SNAGgkc7AE2iG9H with '{}' + { "ok": + [ { "level": 1400.000000, "priority": 2.000000, + "timestamp": "2017-05-19T03:21:52Z" }, + ... ] } + +Known issues +------------ + +Missing account ``my_account`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The chain synchronization has not been optimized yet and the +``alphanet.sh`` script might mis-detect the end of the synchronization +step. If so, it will try to create your free account in an outdated +context and your new account will never be included in the chain. + +To fix this, just wait for your node to be synchronized: for that run +the following command, in the middle of a (raw) json object, it should +display the date of the last block (which should not be too far in the +past): + +:: + + ./alphanet.sh head + +Please note that the printed date is GMT, don’t forget the time shift. + +Then, you need to remove from the client state the non-existent contract +and regenerate a new one: + +:: + + ./alphanet.sh client forget contract "my_account" + ./alphanet.sh client originate free account "my_account" for "my_identity" diff --git a/docs/introduction/alphanet_changes.md b/docs/introduction/alphanet_changes.md new file mode 120000 index 000000000..2bcab7ec4 --- /dev/null +++ b/docs/introduction/alphanet_changes.md @@ -0,0 +1 @@ +../../CHANGES.alphanet \ No newline at end of file diff --git a/docs/introduction/alphanet_changes.rst b/docs/introduction/alphanet_changes.rst new file mode 120000 index 000000000..2bcab7ec4 --- /dev/null +++ b/docs/introduction/alphanet_changes.rst @@ -0,0 +1 @@ +../../CHANGES.alphanet \ No newline at end of file diff --git a/docs/introduction/contributing.rst b/docs/introduction/contributing.rst new file mode 100644 index 000000000..aacd187bc --- /dev/null +++ b/docs/introduction/contributing.rst @@ -0,0 +1,82 @@ +How to contribute +================= + +Introduction +------------ + +The purpose of this document is to help contributors get started with +the Tezos OCaml codebase. + +First steps +----------- + +First, make sure that you are proficient enough in OCaml. The community +Website http://www.ocaml.org below gives a few pointer for that. In +particular, we use a lot of functors, and a few GADTs in the codebase, +so you may want to make sure that you master these advanced concepts. + +Then, if you don’t know well about the Lwt library, that’s what you want +to learn. This library is used extensively throughout the code base, as +that’s the one we use to handle concurrency, and Tezos is a very +concurrent system. You can use the online documentation. The `PDF +manual `__ is also quite +well written, but unfortunately not up to date, in particular the syntax +extension has changed. It is still a valid resource for the basic +concepts. The chapter on concurrency of `Real World +OCaml `__ has also been ported to Lwt. + +After that, it is a good idea to read the tutorials for +:ref:`error_monad` and +:ref:`data_encoding `, two homegrown +libraries that we use pervasively. + +Where to start +-------------- + +While you familiarize yourself with the basics as suggested above, you +can have a look at the :ref:`software architecture +` of Tezos. It will +give you the main components and their interactions, and links to the +documentations for the various parts. + +Our git workflow +---------------- + +First, the repository is http://gitlab.com/tezos/tezos, the github one +is just a clone that exists for historical reasons. So if you want to +contribute, simply create an account there. + +Then, there are many ways to use Git, here is ours. + +We use almost only merge requests to push into master. Meaning, nobody +should push directly into master. Once a merge request is ready, it is +reviewed and approved, we merge it using the ``--fast-forward`` option +of Git, in order to maintain a linear history without merge patches. + +For that to work, it means that merge requests must be direct suffixes +of the master branch. So whenever ``origin/master`` changes, you have to +rebase your branch on it, so that you patches always sit on top of +master. When that happens, you may have to edit your patches during the +rebase, and then use ``push -f`` in your branch to rewrite the history. + +We also enforce a few hygiene rules, so make sure your MR respects them: + +- Prefer small atomic commits over a large one that do many things. +- Don’t mix refactoring and new features. +- Never mix reindentation, whitespace deletion, or other style changes + with actual code changes. +- Try as much as possible to make every patch compile, not only the + last. +- If you add new functions into a documented interface, don’t forget to + extend the documentation for your addition. +- For parts whose specification is in the repository (e.g. Michelson), + make sure to keep it in sync with the implementation. +- Try and mimic the style of commit messages, and for non trivial + commits, add an extended commit message. + +As per the hygiene of MRs themselves: + +- Give appropriate titles to the MRs, and when non trivial add a + detailed motivated explanation. +- Give meaningful and consistent names to branches. +- Don’t forget to put a ``WIP:`` flag when it is a work in progress. diff --git a/docs/introduction/zeronet.rst b/docs/introduction/zeronet.rst new file mode 100644 index 000000000..05a74b1b0 --- /dev/null +++ b/docs/introduction/zeronet.rst @@ -0,0 +1,10 @@ +The zeronet network +=================== + +The Tezos zeronet is a development network that might be broken multiple +times a day. + +If you wish to experiment with Tezos we recommend you to test the +``alphanet``, which is a more stable test network. + +In particular, we offer no support about the zeronet. diff --git a/docs/logo.svg b/docs/logo.svg new file mode 100644 index 000000000..99da1d27a --- /dev/null +++ b/docs/logo.svg @@ -0,0 +1,129 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + Developer + Resources + Tezos + + diff --git a/docs/michelson.md b/docs/michelson.md deleted file mode 100644 index dc320ffce..000000000 --- a/docs/michelson.md +++ /dev/null @@ -1,2109 +0,0 @@ -Michelson: the language of Smart Contracts in Tezos -=================================================== - -The language is stack based, with high level data types and primitives -and strict static type checking. Its design cherry picks traits from -several language families. Vigilant readers will notice direct -references to Forth, Scheme, ML and Cat. - -A Michelson program is a series of instructions that are run in -sequence: each instruction receives as input the stack resulting of -the previous instruction, and rewrites it for the next one. The stack -contains both immediate values and heap allocated structures. All -values are immutable and garbage collected. - -A Michelson program receives as input a single element stack -containing an input value and the contents of a storage space. It must -return a single element stack containing an output value and the new -contents of the storage space. Alternatively, a Michelson program can -fail, explicitly using a specific opcode, or because something went -wrong that could not be caught by the type system (e.g. division by -zero, gas exhaustion). - -The types of the input, output and storage are fixed and monomorphic, -and the program is typechecked before being introduced into the -system. No smart contract execution can fail because an instruction -has been executed on a stack of unexpected length or contents. - -This specification gives the complete instruction set, type system and -semantics of the language. It is meant as a precise reference manual, -not an easy introduction. Even though, some examples are provided at -the end of the document and can be read first or at the same time as -the specification. - -Table of contents ------------------ - - * I - Semantics - * II - Type system - * III - Core data types - * IV - Core instructions - * V - Operations - * VI - Domain specific data types - * VII - Domain specific operations - * VIII - Macros - * IX - Concrete syntax - * X - JSON syntax - * XI - Examples - * XII - Full grammar - * XIII - Reference implementation - -I - Semantics -------------- - -This specification gives a detailed formal semantics of the Michelson -language. It explains in a symbolic way the computation performed by -the Michelson interpreter on a given program and initial stack to -produce the corresponding resulting stack. The Michelson interpreter -is a pure function: it only builds a result stack from the elements of -an initial one, without affecting its environment. This semantics is -then naturally given in what is called a big step form: a symbolic -definition of a recursive reference interpreter. This definition takes -the form of a list of rules that cover all the possible inputs of the -interpreter (program and stack), and describe the computation of the -corresponding resulting stacks. - -### Rules form and selection - -The rules have the main following form. - - > (syntax pattern) / (initial stack pattern) => (result stack pattern) - iff (conditions) - where (recursions) - -The left hand side of the `=>` sign is used for selecting the -rule. Given a program and an initial stack, one (and only one) rule -can be selected using the following process. First, the toplevel -structure of the program must match the syntax pattern. This is quite -simple since there is only a few non trivial patterns to deal with -instruction sequences, and the rest is made of trivial pattern that -match one specific instruction. Then, the initial stack must match the -initial stack pattern. Finally, some rules add extra conditions over -the values in the stack that follow the `iff` keyword. Sometimes, -several rules may apply in a given context. In this case, the one that -appears first in this specification is to be selected. If no rule -applies, the result is equivalent to the one for the explicit `FAIL` -instruction. This case does not happen on well-typed programs, as -explained in the next section. - -The right hand side describes the result of the interpreter if the -rule applies. It consists in a stack pattern, whose part are either -constants, or elements of the context (program and initial stack) that -have been named on the left hand side of the `=>` sign. - -### Recursive rules (big step form) - -Sometimes, the result of interpreting a program is derived from the -result of interpreting another one (as in conditionals or function -calls). In these cases, the rule contains a clause of the following -form. - - where (intermediate program) / (intermediate stack) => (partial result) - -This means that this rules applies in case interpreting the -intermediate state on the left gives the pattern on the right. - -The left hand sign of the `=>` sign is constructed from elements of -the initial state or other partial results, and the right hand side -identify parts that can be used to build the result stack of the rule. - -If the partial result pattern does not actually match the result of -the interpretation, then the result of the whole rule is equivalent to -the one for the explicit `FAIL` instruction. Again, this case does not -happen on well-typed programs, as explained in the next section. - -### Format of patterns - -Code patterns are of one of the following syntactical forms. - - * `INSTR` (an uppercase identifier) is a simple instruction - (e.g. `DROP`); - * `INSTR (arg) ...` is a compound instruction, - whose arguments can be code, data or type patterns - (e.g. `PUSH nat 3`) ; - * `{ (instr) ; ... }` is a possibly empty sequence of instructions, - (e.g. `IF { SWAP ; DROP } { DROP }`), - nested sequences can drop the braces ; - * `name` is a pattern that matches any program and names a part of - the matched program that can be used to build the result ; - * `_` is a pattern that matches any instruction. - -Stack patterns are of one of the following syntactical forms. - - * `[FAIL]` is the special failed state ; - * `[]` is the empty stack ; - * `(top) : (rest)` is a stack whose top element is matched by the - data pattern `(top)` on the left, and whose remaining elements are - matched by the stack pattern `(rest)` on the right - (e.g. `x : y : rest`) ; - * `name` is a pattern that matches any stack and names it in order - to use it to build the result ; - * `_` is a pattern that matches any stack. - -Data patterns are of one of the following syntactical forms. - - * integer/natural number literals, - (e.g. `3`) ; - * string literals, - (e.g. `"contents"`) ; - * `Tag` (capitalized) is a symbolic constant, - (e.g. `Unit`, `True`, `False`) ; - * `(Tag (arg) ...)` tagged constructed data, - (e.g. `(Pair 3 4)`) ; - * a code pattern for first class code values ; - * `name` to name a value in order to use it to build the result ; - * `_` to match any value. - -The domain of instruction names, symbolic constants and data -constructors is fixed by this specification. Michelson does not let -the programmer introduce its own types. - -Be aware that the syntax used in the specification may differ a bit -from the concrete syntax, which is presented in Section IX. In -particular, some instructions are annotated with types that are not -present in the concrete language because they are synthesized by the -typechecker. - -### Shortcuts - -Sometimes, it is easier to think (and shorter to write) in terms of -program rewriting than in terms of big step semantics. When it is the -case, and when both are equivalents, we write rules of the form: - - p / S => S'' - where p' / S' => S'' - -using the following shortcut: - - p / S => p' / S' - -The concrete language also has some syntax sugar to group some common -sequences of operations as one. This is described in this -specification using a simple regular expression style recursive -instruction rewriting. - -II - Introduction to the type system and notations --------------------------------------------------- - -This specification describes a type system for Michelson. To make -things clear, in particular to readers that are not accustomed to -reading formal programming language specifications, it does not give a -typechecking or inference algorithm. It only gives an intentional -definition of what we consider to be well-typed programs. For each -syntactical form, it describes the stacks that are considered -well-typed inputs, and the resulting outputs. - -The type system is sound, meaning that if a program can be given a -type, then if run on a well-typed input stack, the interpreter will -never apply an interpretation rule on a stack of unexpected length or -contents. Also, it will never reach a state where it cannot select an -appropriate rule to continue the execution. Well-typed programs do not -block, and do not go wrong. - -### Type notations - -The specification introduces notations for the types of values, terms -and stacks. Apart from a subset of value types that appear in the -form of type annotations in some places throughout the language, it is -important to understand that this type language only exists in the -specification. - -A stack type can be written: - - * `[]` for the empty stack ; - * `(top) : (rest)` for the stack whose first value has type `(top)` - and queue has stack type `(rest)`. - -Instructions, programs and primitives of the language are also typed, -their types are written: - - (type of stack before) -> (type of stack after) - -The types of values in the stack are written: - - * `identifier` for a primitive data-type - (e.g. `bool`), - * `identifier (arg)` for a parametric data-type with one parameter type `(arg)` - (e.g. `list nat`), - * `identifier (arg) ...` for a parametric data-type with several parameters - (e.g. `map string int`), - * `[ (type of stack before) -> (type of stack after) ]` for a code quotation, - (e.g. `[ int : int : [] -> int : [] ]`), - * `lambda (arg) (ret)` is a shortcut for `[ (arg) : [] -> (ret) : [] ]`. - -### Meta type variables - -The typing rules introduce meta type variables. To be clear, this has -nothing to do with polymorphism, which Michelson does not have. These -variables only live at the specification level, and are used to -express the consistency between the parts of the program. For -instance, the typing rule for the `IF` construct introduces meta -variables to express that both branches must have the same type. - -Here are the notations for meta type variables: - - * `'a` for a type variable, - * `'A` for a stack type variable, - * `_` for an anonymous type or stack type variable. - -### Typing rules - -The system is syntax directed, which means here that it defines a -single typing rule for each syntax construct. A typing rule restricts -the type of input stacks that are authorized for this syntax -construct, links the output type to the input type, and links both of -them to the subexpressions when needed, using meta type variables. - -Typing rules are of the form: - - (syntax pattern) - :: (type of stack before) -> (type of stack after) [rule-name] - iff (premises) - -Where premises are typing requirements over subprograms or values in -the stack, both of the form `(x) :: (type)`, meaning that value `(x)` -must have type `(type)`. - -A program is shown well-typed if one can find an instance of a rule -that applies to the toplevel program expression, with all meta type -variables replaced by non variable type expressions, and of which all -type requirements in the premises can be proven well-typed in the -same manner. For the reader unfamiliar with formal type systems, this -is called building a typing derivation. - -Here is an example typing derivation on a small program that computes -`(x+5)*10` for a given input `x`, obtained by instantiating the typing -rules for instructions `PUSH`, `ADD` and for the sequence, as found in -the next sections. When instantiating, we replace the `iff` with `by`. - - { PUSH nat 5 ; ADD ; PUSH nat 10 ; SWAP ; MUL } - :: [ nat : [] -> nat : [] ] - by { PUSH nat 5 ; ADD } - :: [ nat : [] -> nat : [] ] - by PUSH nat 5 - :: [ nat : [] -> nat : nat : [] ] - by 5 :: nat - and ADD - :: [ nat : nat : [] -> nat : [] ] - and { PUSH nat 10 ; SWAP ; MUL } - :: [ nat : [] -> nat : [] ] - by PUSH nat 10 - :: [ nat : [] -> nat : nat : [] ] - by 10 :: nat - and { SWAP ; MUL } - :: [ nat : nat : [] -> nat : [] ] - by SWAP - :: [ nat : nat : [] -> nat : nat : [] ] - and MUL - :: [ nat : nat : [] -> nat : [] ] - -Producing such a typing derivation can be done in a number of manners, -such as unification or abstract interpretation. In the implementation -of Michelson, this is done by performing a recursive symbolic -evaluation of the program on an abstract stack representing the input -type provided by the programmer, and checking that the resulting -symbolic stack is consistent with the expected result, also provided -by the programmer. - -### Annotations - -Most Instructions in the language can optionally take an annotation. -Annotations allow you to better track data, on the stack and within -pairs and unions. - -If added on the components of a type, the annotation will be -propagated by the typechecker througout access instructions. - -Annotating an instruction that produces a value on the stack will -rewrite the annotation an the toplevel of its type. - -Trying to annotate an instruction that does not produce a value will -result in a typechecking error. - -At join points in the program (`IF`, `IF_LEFT`, `IF_CONS`, `IF_NONE`, -`LOOP`), annotations must be compatible. Annotations are compatible -if both elements are annotated with the same annotation or if at least -one of the values/types is unannotated. - -Stack visualization tools like the Michelson's Emacs mode print -annotations associated with each type in the program, as propagated by -the typechecker. This is useful as a debugging aid. - -### Side note - -As with most type systems, it is incomplete. There are programs that -cannot be given a type in this type system, yet that would not go -wrong if executed. This is a necessary compromise to make the type -system usable. Also, it is important to remember that the -implementation of Michelson does not accept as many programs as the -type system describes as well-typed. This is because the -implementation uses a simple single pass typechecking algorithm, and -does not handle any form of polymorphism. - - -III - Core data types and notations ------------------------------------ - - * `string`, `nat`, `int`: - The core primitive constant types. - - * `bool`: - The type for booleans whose values are `True` and `False` - - * `unit`: - The type whose only value is `Unit`, to use as a placeholder when - some result or parameter is non necessary. For instance, when the - only goal of a contract is to update its storage. - - * `list (t)`: - A single, immutable, homogeneous linked list, whose elements are - of type `(t)`, and that we note `{}` for the empty list or - `{ first ; ... }`. In the semantics, we use chevrons to denote - a subsequence of elements. For instance `{ head ; }`. - - * `pair (l) (r)`: - A pair of values `a` and `b` of types `(l)` and `(r)`, that we - write `(Pair a b)`. - - * `option (t)`: - Optional value of type `(t)` that we note `None` or `(Some v)`. - - * `or (l) (r)`: - A union of two types: a value holding either a value `a` of type - `(l)` or a value `b` of type `(r)`, that we write `(Left a)` or - `(Right b)`. - - * `set (t)`: - Immutable sets of values of type `(t)` that we note as lists - `{ item ; ... }`, of course with their elements unique, and sorted. - - * `map (k) (t)`: - Immutable maps from keys of type `(k)` of values of type `(t)` - that we note `{ Elt key value ; ... }`, with keys sorted. - - -IV - Core instructions ----------------------- - -### Control structures - - * `FAIL`: - Explicitly abort the current program. - - :: _ -> _ - - This special instruction is callable in any context, since it - does not use its input stack (first rule below), and makes the - output useless since all subsequent instruction will simply - ignore their usual semantics to propagate the failure up to the - main result (second rule below). Its type is thus completely - generic. - - > FAIL / _ => [FAIL] - > _ / [FAIL] => [FAIL] - - * `{ I ; C }`: - Sequence. - - :: 'A -> 'C - iff I :: [ 'A -> 'B ] - C :: [ 'B -> 'C ] - - > I ; C / SA => SC - where I / SA => SB - and C / SB => SC - - * `IF bt bf`: - Conditional branching. - - :: bool : 'A -> 'B - iff bt :: [ 'A -> 'B ] - bf :: [ 'A -> 'B ] - - > IF bt bf / True : S => bt / S - > IF bt bf / False : S => bf / S - - * `LOOP body`: - A generic loop. - - :: bool : 'A -> 'A - iff body :: [ 'A -> bool : 'A ] - - > LOOP body / True : S => body ; LOOP body / S - > LOOP body / False : S => S - - * `LOOP_LEFT body`: - A loop with an accumulator - - :: (or 'a 'b) : 'A -> 'A - iff body :: [ 'a : 'A -> (or 'a 'b) : 'A ] - - > LOOP body / (Left a) : S => body ; LOOP body / (or 'a 'b) : S - > LOOP body / (Right b) : S => b : S - - * `DIP code`: - Runs code protecting the top of the stack. - - :: 'b : 'A -> 'b : 'C - iff code :: [ 'A -> 'C ] - - > DIP code / x : S => x : S' - where code / S => S' - - * `EXEC`: - Execute a function from the stack. - - :: 'a : lambda 'a 'b : 'C -> 'b : 'C - - > EXEC / a : f : S => r : S - where f / a : [] => r : [] - -### Stack operations - - * `DROP`: - Drop the top element of the stack. - - :: _ : 'A -> 'A - - > DROP / _ : S => S - - * `DUP`: - Duplicate the top of the stack. - - :: 'a : 'A -> 'a : 'a : 'A - - > DUP / x : S => x : x : S - - * `SWAP`: - Exchange the top two elements of the stack. - - :: 'a : 'b : 'A -> 'b : 'a : 'A - - > SWAP / x : y : S => y : x : S - - * `PUSH 'a x`: - Push a constant value of a given type onto the stack. - - - :: 'A -> 'a : 'A - iff x :: 'a - - > PUSH 'a x / S => x : S - - * `UNIT`: - Push a unit value onto the stack. - - :: 'A -> unit : 'A - - > UNIT / S => Unit : S - - * `LAMBDA 'a 'b code`: - Push a lambda with given parameter and return types onto the stack. - - :: 'A -> (lambda 'a 'b) : 'A - -### Generic comparison - -Comparison only works on a class of types that we call comparable. -A `COMPARE` operation is defined in an ad hoc way for each comparable -type, but the result of compare is always an `int`, which can in -turn be checked in a generic manner using the following -combinators. The result of `COMPARE` is `0` if the top two elements -of the stack are equal, negative if the first element in the stack -is less than the second, and positive otherwise. - - * `EQ`: - Checks that the top of the stack EQuals zero. - - :: int : 'S -> bool : 'S - - > EQ ; C / 0 : S => C / True : S - > EQ ; C / _ : S => C / False : S - - - * `NEQ`: - Checks that the top of the stack does Not EQual zero. - - :: int : 'S -> bool : 'S - - > NEQ ; C / 0 : S => C / False : S - > NEQ ; C / _ : S => C / True : S - - * `LT`: - Checks that the top of the stack is Less Than zero. - - :: int : 'S -> bool : 'S - - > LT ; C / v : S => C / True : S iff v < 0 - > LT ; C / _ : S => C / False : S - - * `GT`: - Checks that the top of the stack is Greater Than zero. - - :: int : 'S -> bool : 'S - - > GT ; C / v : S => C / True : S iff v > 0 - > GT ; C / _ : S => C / False : S - - * `LE`: - Checks that the top of the stack is Less Than of Equal to zero. - - :: int : 'S -> bool : 'S - - > LE ; C / v : S => C / True : S iff v <= 0 - > LE ; C / _ : S => C / False : S - - * `GE`: - Checks that the top of the stack is Greater Than of Equal to zero. - - :: int : 'S -> bool : 'S - - > GE ; C / v : S => C / True : S iff v >= 0 - > GE ; C / _ : S => C / False : S - -V - Operations --------------- - -### Operations on booleans - - * `OR` - - :: bool : bool : 'S -> bool : 'S - - > OR ; C / x : y : S => C / (x | y) : S - - * `AND` - - :: bool : bool : 'S -> bool : 'S - - > AND ; C / x : y : S => C / (x & y) : S - - * `XOR` - - :: bool : bool : 'S -> bool : 'S - - > XOR ; C / x : y : S => C / (x ^ y) : S - - * `NOT` - - :: bool : 'S -> bool : 'S - - > NOT ; C / x : S => C / ~x : S - -### Operations on integers and natural numbers - -Integers and naturals are arbitrary-precision, -meaning the only size limit is fuel. - - * `NEG` - - :: int : 'S -> int : 'S - :: nat : 'S -> int : 'S - - > NEG ; C / x : S => C / -x : S - - * `ABS` - - :: int : 'S -> nat : 'S - - > ABS ; C / x : S => C / abs (x) : S - - * `ADD` - - :: int : int : 'S -> int : 'S - :: int : nat : 'S -> int : 'S - :: nat : int : 'S -> int : 'S - :: nat : nat : 'S -> nat : 'S - - > ADD ; C / x : y : S => C / (x + y) : S - - * `SUB` - - :: int : int : 'S -> int : 'S - :: int : nat : 'S -> int : 'S - :: nat : int : 'S -> int : 'S - :: nat : nat : 'S -> int : 'S - - > SUB ; C / x : y : S => C / (x - y) : S - - * `MUL` - - :: int : int : 'S -> int : 'S - :: int : nat : 'S -> int : 'S - :: nat : int : 'S -> int : 'S - :: nat : nat : 'S -> nat : 'S - - > MUL ; C / x : y : S => C / (x * y) : S - - * `EDIV` - Perform Euclidian division - - :: int : int : 'S -> option (pair int nat) : 'S - :: int : nat : 'S -> option (pair int nat) : 'S - :: nat : int : 'S -> option (pair int nat) : 'S - :: nat : nat : 'S -> option (pair nat nat) : 'S - - > EDIV ; C / x : 0 : S => C / None - > EDIV ; C / x : y : S => C / Some (Pair (x / y) (x % y)) : S - -Bitwise logical operators are also available on unsigned integers. - - * `OR` - - :: nat : nat : 'S -> nat : 'S - - > OR ; C / x : y : S => C / (x | y) : S - - * `AND` - - :: nat : nat : 'S -> nat : 'S - - > AND ; C / x : y : S => C / (x & y) : S - - * `XOR` - - :: nat : nat : 'S -> nat : 'S - - > XOR ; C / x : y : S => C / (x ^ y) : S - - * `NOT` - The return type of `NOT` is an `int` and not a `nat`. - This is because the sign is also negated. - The resulting integer is computed using two's complement. - For instance, the boolean negation of `0` is `-1`. - - :: nat : 'S -> int : 'S - :: int : 'S -> int : 'S - - > NOT ; C / x : S => C / ~x : S - - * `LSL` - - :: nat : nat : 'S -> nat : 'S - - > LSL ; C / x : s : S => C / (x << s) : S - iff s <= 256 - > LSL ; C / x : s : S => [FAIL] - - * `LSR` - - :: nat : nat : 'S -> nat : 'S - - > LSR ; C / x : s : S => C / (x >>> s) : S - - * `COMPARE`: - Integer/natural comparison - - :: int : int : 'S -> int : 'S - :: nat : nat : 'S -> int : 'S - - > COMPARE ; C / x : y : S => C / -1 : S iff x < y - > COMPARE ; C / x : y : S => C / 0 : S iff x = y - > COMPARE ; C / x : y : S => C / 1 : S iff x > y - -### Operations on strings - -Strings are mostly used for naming things without having to rely on -external ID databases. So what can be done is basically use string -constants as is, concatenate them and use them as keys. - - * `CONCAT`: - String concatenation. - - :: string : string : 'S -> string : 'S - - * `COMPARE`: - Lexicographic comparison. - - :: string : string : 'S -> int : 'S - -### Operations on pairs - - * `PAIR`: - Build a pair from the stack's top two elements. - - :: 'a : 'b : 'S -> pair 'a 'b : 'S - - > PAIR ; C / a : b : S => C / (Pair a b) : S - - * `CAR`: - Access the left part of a pair. - - :: pair 'a _ : 'S -> 'a : 'S - - > Car ; C / (Pair a _) : S => C / a : S - - * `CDR`: - Access the right part of a pair. - - :: pair _ 'b : 'S -> 'b : 'S - - > Car ; C / (Pair _ b) : S => C / b : S - -### Operations on sets - - * `EMPTY_SET 'elt`: - Build a new, empty set for elements of a given type. - - :: 'S -> set 'elt : 'S - - The `'elt` type must be comparable (the `COMPARE` primitive must - be defined over it). - - * `MEM`: - Check for the presence of an element in a set. - - :: 'elt : set 'elt : 'S -> bool : 'S - - * `UPDATE`: - Inserts or removes an element in a set, replacing a previous value. - - :: 'elt : bool : set 'elt : 'S -> set 'elt : 'S - - * `MAP`: - Apply a function on a map and return the map of results under - the same bindings. - - The `'b` type must be comparable (the `COMPARE` primitive must be - defined over it). - - :: lambda 'elt 'b : set 'elt : 'S -> set 'b : 'S - - * `MAP body`: - Apply the body expression to each element of the set. - The body sequence has access to the stack. - - :: (set 'elt) : 'A -> (set 'b) : 'A - iff body :: [ 'elt : 'A -> 'b : 'A ] - - * `REDUCE`: - Apply a function on a set passing the result of each - application to the next one and return the last. - - :: lambda (pair 'elt * 'b) 'b : set 'elt : 'b : 'S -> 'b : 'S - - * `ITER body`: - Apply the body expression to each element of a set. - The body sequence has access to the stack. - - :: (set 'elt) : 'A -> 'A - iff body :: [ 'elt : 'A -> 'A ] - - * `SIZE`: - Get the cardinality of the set. - - :: set 'elt : 'S -> nat : 'S - -### Operations on maps - - * `EMPTY_MAP 'key 'val`: - Build a new, empty map. - - The `'key` type must be comparable (the `COMPARE` primitive must be - defined over it). - - :: 'S -> map 'key 'val : 'S - - * `GET`: - Access an element in a map, returns an optional value to be - checked with `IF_SOME`. - - :: 'key : map 'key 'val : 'S -> option 'val : 'S - - * `MEM`: - Check for the presence of an element in a map. - - :: 'key : map 'key 'val : 'S -> bool : 'S - - * `UPDATE`: - Assign or remove an element in a map. - - :: 'key : option 'val : map 'key 'val : 'S -> map 'key 'val : 'S - - * `MAP`: - Apply a function on a map and return the map of results under - the same bindings. - - :: lambda (pair 'key 'val) 'b : map 'key 'val : 'S -> map 'key 'b : 'S - - * `MAP body`: - Apply the body expression to each element of a map. - The body sequence has access to the stack. - - :: (map 'key 'val) : 'A -> (map 'key 'b) : 'A - iff body :: [ (pair 'key 'val) : 'A -> 'b : 'A ] - - * `REDUCE`: - Apply a function on a map passing the result of each - application to the next one and return the last. - - :: lambda (pair (pair 'key 'val) 'b) 'b : map 'key 'val : 'b : 'S -> 'b : 'S - - * `ITER body`: - Apply the body expression to each element of a map. - The body sequence has access to the stack. - - :: (map 'elt 'val) : 'A -> 'A - iff body :: [ (pair 'elt 'val) : 'A -> 'A ] - - * `SIZE`: - Get the cardinality of the map. - - :: map 'key 'val : 'S -> nat : 'S - -### Operations on optional values - - * `SOME`: - Pack a present optional value. - - :: 'a : 'S -> 'a? : 'S - - > SOME ; C / v :: S => C / (Some v) :: S - - * `NONE 'a`: - The absent optional value. - - :: 'S -> 'a? : 'S - - > NONE ; C / v :: S => C / None :: S - - * `IF_NONE bt bf`: - Inspect an optional value. - - :: 'a? : 'S -> 'b : 'S - iff bt :: [ 'S -> 'b : 'S] - bf :: [ 'a : 'S -> 'b : 'S] - - > IF_NONE ; C / (None) : S => bt ; C / S - > IF_NONE ; C / (Some a) : S => bf ; C / a : S - -### Operations on unions - - * `LEFT 'b`: - Pack a value in a union (left case). - - :: 'a : 'S -> or 'a 'b : 'S - - > LEFT ; C / v :: S => C / (Left v) :: S - - * `RIGHT 'a`: - Pack a value in a union (right case). - - :: 'b : 'S -> or 'a 'b : 'S - - > RIGHT ; C / v :: S => C / (Right v) :: S - - * `IF_LEFT bt bf`: - Inspect a value of a variant type. - - :: or 'a 'b : 'S -> 'c : 'S - iff bt :: [ 'a : 'S -> 'c : 'S] - bf :: [ 'b : 'S -> 'c : 'S] - - > IF_LEFT ; C / (Left a) : S => bt ; C / a : S - > IF_LEFT ; C / (Right b) : S => bf ; C / b : S - - * `IF_RIGHT bt bf`: - Inspect a value of a variant type. - - :: or 'a 'b : 'S -> 'c : 'S - iff bt :: [ 'b : 'S -> 'c : 'S] - bf :: [ 'a : 'S -> 'c : 'S] - - > IF_LEFT ; C / (Right b) : S => bt ; C / b : S - > IF_RIGHT ; C / (Left a) : S => bf ; C / a : S - - -### Operations on lists - - * `CONS`: - Prepend an element to a list. - - :: 'a : list 'a : 'S -> list 'a : 'S - - > CONS ; C / a : { } : S => C / { a ; } : S - - * `NIL 'a`: - The empty list. - - :: 'S -> list 'a : 'S - - > NIL ; C / S => C / {} : S - - * `IF_CONS bt bf`: - Inspect an optional value. - - :: list 'a : 'S -> 'b : 'S - iff bt :: [ 'a : list 'a : 'S -> 'b : 'S] - bf :: [ 'S -> 'b : 'S] - - > IF_CONS ; C / { a ; } : S => bt ; C / a : { } : S - > IF_CONS ; C / {} : S => bf ; C / S - - * `MAP`: - Apply a function on a list from left to right and - return the list of results in the same order. - - :: lambda 'a 'b : list 'a : 'S -> list 'b : 'S - - * `MAP body`: - Apply the body expression to each element of the list. - The body sequence has access to the stack. - - :: (list 'elt) : 'A -> (list 'b) : 'A - iff body :: [ 'elt : 'A -> 'b : 'A ] - - * `REDUCE`: - Apply a function on a list from left to right - passing the result of each application to the next one - and return the last. - - :: lambda (pair 'a 'b) 'b : list 'a : 'b : 'S -> 'b : 'S - - * `SIZE`: - Get the number of elements in the list. - - :: list 'elt : 'S -> nat : 'S - - * `ITER body`: - Apply the body expression to each element of a list. - The body sequence has access to the stack. - - :: (list 'elt) : 'A -> 'A - iff body :: [ 'elt : 'A -> 'A ] - -VI - Domain specific data types -------------------------------- - - * `timestamp`: - Dates in the real world. - - * `tez`: - A specific type for manipulating tokens. - - * `contract 'param 'result`: - A contract, with the type of its code. - - * `key`: - A public cryptography key. - - * `key_hash`: - The hash of a public cryptography key. - - * `signature`: - A cryptographic signature. - - -VII - Domain specific operations --------------------------------- - -### Operations on timestamps - -Timestamp immediates can be obtained by the `NOW` operation, or -retrieved from script parameters or globals. - - * `ADD` - Increment / decrement a timestamp of the given number of seconds. - - :: timestamp : int : 'S -> timestamp : 'S - :: int : timestamp : 'S -> timestamp : 'S - - > ADD ; C / seconds : nat (t) : S => C / (seconds + t) : S - > ADD ; C / nat (t) : seconds : S => C / (t + seconds) : S - - * `SUB` - Subtract a number of seconds from a timestamp. - - :: timestamp : int : 'S -> timestamp : 'S - - > SUB ; C / seconds : nat (t) : S => C / (seconds - t) : S - - * `SUB` - Subtract two timestamps. - - :: timestamp : timestamp : 'S -> int : 'S - - > SUB ; C / seconds(t1) : seconds(t2) : S => C / (t1 - t2) : S - - * `COMPARE`: - Timestamp comparison. - - :: timestamp : timestamp : 'S -> int : 'S - -### Operations on Tez - -Tez are internally represented by a 64 bit signed integer. -There are restrictions to prevent creating a negative amount of tez. -Operations are limited to prevent overflow and mixing them with -other numerical types by mistake. They are also mandatorily checked -for under/overflows. - - * `ADD`: - - :: tez : tez : 'S -> tez : 'S - - > ADD ; C / x : y : S => [FAIL] on overflow - > ADD ; C / x : y : S => C / (x + y) : S - - * `SUB`: - - :: tez : tez : 'S -> tez : 'S - - > SUB ; C / x : y : S => [FAIL] iff x < y - > SUB ; C / x : y : S => C / (x - y) : S - - * `MUL` - - :: tez : nat : 'S -> tez : 'S - :: nat : tez : 'S -> tez : 'S - - > MUL ; C / x : y : S => [FAIL] on overflow - > MUL ; C / x : y : S => C / (x * y) : S - - * `EDIV` - - :: tez : nat : 'S -> option (pair tez tez) : 'S - :: tez : tez : 'S -> option (pair nat tez) : 'S - - > EDIV ; C / x : 0 : S => C / None - > EDIV ; C / x : y : S => C / Some (Pair (x / y) (x % y)) : S - - * `COMPARE`: - - :: tez : tez : 'S -> int : 'S - -### Operations on contracts - - * `MANAGER`: - Access the manager of a contract. - - :: contract 'p 'r : 'S -> key_hash : 'S - - * `CREATE_CONTRACT`: - Forge a new contract. - - - :: key_hash : key_hash? : bool : bool : tez : lambda (pair 'p 'g) (pair 'r 'g) : 'g : 'S - -> contract 'p 'r : 'S - - As with non code-emitted originations the - contract code takes as argument the transferred amount plus an - ad-hoc argument and returns an ad-hoc value. The code also takes - the global data and returns it to be stored and retrieved on the - next transaction. These data are initialized by another - parameter. The calling convention for the code is as follows: - `(Pair arg globals)) -> (Pair ret globals)`, as - extrapolatable from the instruction type. The first parameters are - the manager, optional delegate, then spendable and delegatable - flags and finally the initial amount taken from the currently - executed contract. The contract is returned as a first class - value to be called immediately or stored. - - * `CREATE_CONTRACT { storage 'g ; parameter 'p ; return 'r ; code ... }`: - Forge a new contract from a literal. - - - :: key_hash : key_hash? : bool : bool : tez : 'g : 'S - -> contract 'p 'r : 'S - - Originate a contract based on a literal. This is currently the only way to include - transfers inside of an originated contract. The first parameters are - the manager, optional delegate, then spendable and delegatable - flags and finally the initial amount taken from the currently - executed contract. The contract is returned as a first class - value to be called immediately or stored. - - * `CREATE_ACCOUNT`: - Forge an account (a contract without code). - - :: key_hash : key_hash? : bool : tez : 'S -> contract unit unit : 'S - - Take as argument the manager, optional delegate, the delegatable - flag and finally the initial amount taken from the currently - executed contract. - - * `TRANSFER_TOKENS`: - Forge and evaluate a transaction. - - :: 'p : tez : contract 'p 'r : 'g : [] -> 'r : 'g : [] - - The parameter and return value must be consistent with the ones - expected by the contract, unit for an account. To preserve the - global consistency of the system, the current contract's storage - must be updated before passing the control to another script. For - this, the script must put the partially updated storage on the - stack ('g is the type of the contract's storage). If a recursive - call to the current contract happened, the updated storage is put - on the stack next to the return value. Nothing else can remain - on the stack during a nested call. If some local values have to - be kept for after the nested call, they have to be stored - explicitly in a transient part of the storage. A trivial example - of that is to reserve a boolean in the storage, initialized to - false, reset to false at the end of each contract execution, and - set to true during a nested call. This thus gives an easy way - for a contract to prevent recursive call (the contract just fails - if the boolean is true). - - * `BALANCE`: - Push the current amount of tez of the current contract. - - :: 'S -> tez :: 'S - - * `SOURCE 'p 'r`: - Push the source contract of the current transaction. - - :: 'S -> contract 'p 'r :: 'S - - * `SELF`: - Push the current contract. - - :: 'S -> contract 'p 'r :: 'S - where contract 'p 'r is the type of the current contract - - * `AMOUNT`: - Push the amount of the current transaction. - - :: 'S -> tez :: 'S - - * `DEFAULT_ACCOUNT`: - Return a default contract with the given public/private key pair. - Any funds deposited in this contract can immediately be spent by the - holder of the private key. This contract cannot execute Michelson code - and will always exist on the blockchain. - - :: key_hash : 'S -> contract unit unit :: 'S - -### Special operations - - * `STEPS_TO_QUOTA`: - Push the remaining steps before the contract execution must terminate. - - :: 'S -> nat :: 'S - - * `NOW`: - Push the timestamp of the block whose validation triggered this - execution (does not change during the execution of the contract). - - :: 'S -> timestamp :: 'S - -### Cryptographic primitives - - * `HASH_KEY`: - Compute the b58check of a public key. - - :: key : 'S -> key_hash : 'S - - * `H`: - Compute a cryptographic hash of the value contents using the - Blake2B cryptographic hash function. - - :: 'a : 'S -> string : 'S - - * `CHECK_SIGNATURE`: - Check that a sequence of bytes has been signed with a given key. - - :: key : pair signature string : 'S -> bool : 'S - - * `COMPARE`: - - :: key_hash : key_hash : 'S -> int : 'S - -VIII - Macros -------------- - -In addition to the operations above, -several extensions have been added to the language's concreate syntax. -If you are interacting with the node via RPC, bypassing the client, -which expands away these macros, you will need to desurgar them yourself. - -These macros are designed to be unambiguous and reversable, -meaning that errors are reported in terms of resugared syntax. -Below you'll see these macros defined in terms of other syntactic forms. -That is how these macros are seen by the node. - -### Compare -Syntactic sugar exists for merging `COMPARE` and comparison -combinators, and also for branching. - - * `CMP{EQ|NEQ|LT|GT|LE|GE}` - - > CMP(\op) ; C / S => COMPARE ; (\op) ; C / S - - * `IF{EQ|NEQ|LT|GT|LE|GE} bt bf` - - > IF(\op) ; C / S => (\op) ; IF bt bf ; C / S - - * `IFCMP{EQ|NEQ|LT|GT|LE|GE} bt bf` - - > IFCMP(\op) ; C / S => COMPARE ; (\op) ; IF bt bf ; C / S - -### Assertion Macros -All assertion operations are syntactic sugar for conditionals -with a `FAIL` instruction in the appropriate branch. -When possible, use them to increase clarity about illegal states. - - * `ASSERT`: - - > IF {} {FAIL} - - * `ASSERT_{EQ|NEQ|LT|LE|GT|GE}`: - - > ASSERT_(\op) => IF(\op) {} {FAIL} - - * `ASSERT_CMP{EQ|NEQ|LT|LE|GT|GE}`: - - > ASSERT_CMP(\op) => IFCMP(\op) {} {FAIL} - - * `ASSERT_NONE`: - Equivalent to ``. - - > ASSERT_NONE => IF_NONE {} {FAIL} - - * `ASSERT_SOME`: - Equivalent to `IF_NONE {FAIL} {}`. - - > ASSERT_NONE => IF_NONE {FAIL} {} - - * `ASSERT_LEFT`: - - > ASSERT_LEFT => IF_LEFT {} {FAIL} - - * `ASSERT_RIGHT`: - - > ASSERT_RIGHT => IF_LEFT {FAIL} {} - - -### Syntactic Conveniences -These are macros are simply more convenient syntax for various common operations. - - - * `DII+P code`: - A syntactic sugar for working deeper in the stack. - - > DII(\rest=I*)P code / S => DIP (DI(\rest)P code) / S - - - * `DUU+P`: - A syntactic sugar for duplicating the `n`th element of the stack. - - > DUU(\rest=U*)P / S => DIP (DU(\rest)P) ; SWAP / S - - * `P(A*AI)+R`: - A syntactic sugar for building nested pairs in bulk. - - > P(\fst=A*)AI(\rest=(A*AI)+)R ; C / S => P(\fst)AIR ; P(\rest)R ; C / S - > PA(\rest=A*)AIR ; C / S => DIP (P(\rest)AIR) ; C / S - - * `C[AD]+R`: - A syntactic sugar for accessing fields in nested pairs. - - > CA(\rest=[AD]+)R ; C / S => CAR ; C(\rest)R ; C / S - > CD(\rest=[AD]+)R ; C / S => CDR ; C(\rest)R ; C / S - - - * `IF_SOME bt bf`: - Inspect an optional value. - - :: 'a? : 'S -> 'b : 'S - iff bt :: [ 'a : 'S -> 'b : 'S] - bf :: [ 'S -> 'b : 'S] - - > IF_SOME ; C / (Some a) : S => bt ; C / a : S - > IF_SOME ; C / (None) : S => bf ; C / S - - * `SET_CAR`: - Set the first value of a pair. - - > SET_CAR => CDR ; SWAP ; PAIR - - * `SET_CDR`: - Set the first value of a pair. - - > SET_CDR => CAR ; PAIR - - * `SET_C[AD]+R`: - A syntactic sugar for setting fields in nested pairs. - - > SET_CA(\rest=[AD]+)R ; C / S => - { DUP ; DIP { CAR ; SET_C(\rest)R } ; CDR ; SWAP ; PAIR } ; C / S - > SET_CD(\rest=[AD]+)R ; C / S => - { DUP ; DIP { CDR ; SET_C(\rest)R } ; CAR ; PAIR } ; C / S - - * `MAP_CAR` code: - Transform the first value of a pair. - - > SET_CAR => DUP ; CDR ; SWAP ; code ; CAR ; PAIR - - * `MAP_CDR` code: - Transform the first value of a pair. - - > SET_CDR => DUP ; CDR ; code ; SWAP ; CAR ; PAIR - - * `MAP_C[AD]+R` code: - A syntactic sugar for transforming fields in nested pairs. - - > MAP_CA(\rest=[AD]+)R ; C / S => - { DUP ; DIP { CAR ; MAP_C(\rest)R code } ; CDR ; SWAP ; PAIR } ; C / S - > MAP_CD(\rest=[AD]+)R ; C / S => - { DUP ; DIP { CDR ; MAP_C(\rest)R code } ; CAR ; PAIR } ; C / S - -IX - Concrete syntax ----------------------- - -The concrete language is very close to the formal notation of the -specification. Its structure is extremely simple: an expression in the -language can only be one of the four following constructs. - - 1. An integer. - 1. A character string. - 2. The application of a primitive to a sequence of expressions. - 3. A sequence of expressions. - -This simple four cases notation is called Micheline. - -### Constants - -There are two kinds of constants: - - 1. Integers or naturals in decimal (no prefix), - hexadecimal (0x prefix), octal (0o prefix) or binary (0b prefix). - 2. Strings with usual escapes `\n`, `\t`, `\b`, `\r`, `\\`, `\"`. - The encoding of a Michelson source file must be UTF-8, - and non-ASCII characters can only appear in comments. - No line break can appear in a string. - Any non-printable characters must be escaped using - two hexadecimal characters, as in '\xHH' or the predefine - escape sequences above.. - -### Primitive applications - -A primitive application is a name followed by arguments - - prim arg1 arg2 - -When a primitive application is the argument to another primitive -application, it must be wrapped with parentheses. - - prim (prim1 arg11 arg12) (prim2 arg21 arg22) - -### Sequences - -Successive expression can be grouped as a single sequence expression -using curly braces as delimiters and semicolon as separators. - - { expr1 ; expr2 ; expr3 ; expr4 } - -A sequence can be passed as argument to a primitive. - - prim arg1 arg2 { arg3_expr1 ; arg3_expr2 } - -Primitive applications right inside a sequence cannot be wrapped. - - { (prim arg1 arg2) } # is not ok - -### Indentation - -To remove ambiguities for human readers, the parser enforces some -indentation rules. - - - For sequences: - - All expressions in a sequence must be aligned on the same column. - - An exception is made when consecutive expressions fit on the same - line, as long as the first of them is correctly aligned. - - All expressions in a sequence must be indented to the right of the - opening curly brace by at least one column. - - The closing curly brace cannot be on the left of the opening one. - - For primitive applications: - - All arguments in an application must be aligned on the same column. - - An exception is made when consecutive arguments fit on the same - line, as long as the first of them is correctly aligned. - - All arguments in a sequence must be indented to the right of the - primitive name by at least one column. - -### Annotations - -Sequences and primitive applications can receive an annotation. - -An annotation is a lowercase identifier that starts with an `@` sign. -It comes after the opening curly brace for sequence, and after the -primitive name for primitive applications. - - { @annot - expr ; - expr ; - ... } - - (prim @annot arg arg ...) - -### Differences with the formal notation - -The concrete syntax follows the same lexical conventions as the -specification: instructions are represented by uppercase identifiers, -type constructors by lowercase identifiers, and constant constructors -are Capitalized. - -All domain specific constants are Micheline strings with specific formats: - - - `tez` amounts are written using the same notation as JSON schemas - and the command line client: thousands are optionally separated by - comas, and centiles, if present, must be prefixed by a period. - - in regexp form: `([0-9]{1,3}(,[0-9]{3})+)|[0-9]+(\.[0.9]{2})?` - - `"1234567"` means 123456700 tez centiles - - `"1,234,567"` means 123456700 tez centiles - - `"1234567.89"` means 123456789 tez centiles - - `"1,234,567.00"` means 123456789 tez centiles - - `"1234,567"` is invalid - - `"1,234,567."` is invalid - - `"1,234,567.0"` is invalid - - `timestamp`s are written using `RFC 339` notation. - - `contract`s are the raw strings returned by JSON RPCs or the command - line interface and cannot be forged by hand so their format is of - no interest here. - - `key`s are `Blake2B` hashes of `ed25519` public keys encoded in - `base58` format with the following custom alphabet: - `"eXMNE9qvHPQDdcFx5J86rT7VRm2atAypGhgLfbS3CKjnksB4"`. - - `signature`s are `ed25519` signatures as a series of hex-encoded bytes. - -To prevent errors, control flow primitives that take instructions as -parameters require sequences in the concrete syntax. - - IF { instr1_true ; instr2_true ; ... } - { instr1_false ; instr2_false ; ... } - -### Main program structure - -The toplevel of a smart contract file must be an undelimited sequence -of four primitive applications (in no particular order) that provide -its `parameter`, `return` and `storage` types, as well as its `code`. - -See the next section for a concrete example. - -### Comments - -A hash sign (`#`) anywhere outside of a string literal will make the -rest of the line (and itself) completely ignored, as in the following -example. - - { PUSH nat 1 ; # pushes 1 - PUSH nat 2 ; # pushes 2 - ADD } # computes 2 + 1 - -Comments that span on multiple lines or that stop before the end of -the line can also be written, using C-like delimiters (`/* ... */`). - -X - JSON syntax ---------------- - -Micheline expressions are encoded in JSON like this: - - - An integer `N` is an object with a single field `"int"` - whose valus is the decimal representation as a string. - - `{ "int": "N" }` - - - A string `"contents"` is an object with a single field `"string"` - whose valus is the decimal representation as a string. - - `{ "string": "contents" }` - - - A sequence is a JSON array. - - `[ expr, ... ]` - - - A primitive application is an object with two fields - `"prim"` for the primitive name and `"args"` for the - arguments (that must contain an array). - A third optionnal field `"annot"` may contains an annotation, - including the `@` sign. - - { "prim": "pair", - "args": [ { "prim": "nat", args: [] }, - { "prim": "nat", args: [] } ], - "annot": "@numbers" }` - -As in the concrete syntax, all domain specific constants are encoded -as strings. - -XI - Examples -------------- - -Contracts in the system are stored as a piece of code and a global -data storage. The type of the global data of the storage is fixed for -each contract at origination time. This is ensured statically by -checking on origination that the code preserves the type of the global -data. For this, the code of the contract is checked to be of the -following type lambda (pair 'arg 'global) -> (pair 'ret -'global) where 'global is the type of the original global store given -on origination. The contract also takes a parameter and -returns a value, hence the complete calling convention above. - -### Empty contract - -Any contract with the same `parameter` and `return` types -may be written with an empty sequence in its `code` section. -The simplest contract is the contract for which the -`parameter`, `storage`, and `return` are all of type `unit`. -This contract is as follows: - - code {}; - storage unit; - parameter unit; - return unit; - -### Reservoir contract - -We want to create a contract that stores tez until a timestamp `T` or a -maximum amount `N` is reached. Whenever `N` is reached before `T`, all tokens -are reversed to an account `B` (and the contract is automatically -deleted). Any call to the contract's code performed after `T` will -otherwise transfer the tokens to another account `A`. - -We want to build this contract in a reusable manner, so we do not -hard-code the parameters. Instead, we assume that the global data of -the contract are `(Pair (Pair T N) (Pair A B))`. - -Hence, the global data of the contract has the following type - - 'g = - pair - (pair timestamp tez) - (pair (contract unit unit) (contract unit unit)) - -Following the contract calling convention, the code is a lambda of type - - lambda - (pair unit 'g) - (pair unit 'g) - -written as - - lambda - (pair - unit - (pair - (pair timestamp tez) - (pair (contract unit unit) (contract unit unit)))) - (pair - unit - (pair - (pair timestamp tez) - (pair (contract unit unit) (contract unit unit)))) - -The complete source `reservoir.tz` is: - - parameter timestamp ; - storage - (pair - (pair timestamp tez) # T N - (pair (contract unit unit) (contract unit unit))) ; # A B - return unit ; - code - { DUP ; CDAAR ; # T - NOW ; - COMPARE ; LE ; - IF { DUP ; CDADR ; # N - BALANCE ; - COMPARE ; LE ; - IF { CDR ; UNIT ; PAIR } - { DUP ; CDDDR ; # B - BALANCE ; UNIT ; - DIIIP { CDR } ; - TRANSFER_TOKENS ; - PAIR } } - { DUP ; CDDAR ; # A - BALANCE ; - UNIT ; - DIIIP { CDR } ; - TRANSFER_TOKENS ; - PAIR } } - -### Reservoir contract (variant with broker and status) - -We basically want the same contract as the previous one, but instead -of destroying it, we want to keep it alive, storing a flag `S` so that -we can tell afterwards if the tokens have been transferred to `A` or `B`. -We also want a broker `X` to get some fee `P` in any case. - -We thus add variables `P` and `S` and `X` to the global data of the -contract, now `(Pair (S, Pair (T, Pair (Pair P N) (Pair X (Pair A B)))))`. -`P` is the fee for broker `A`, `S` is the state, as a string `"open"`, -`"timeout"` or `"success"`. - -At the beginning of the transaction: - - S is accessible via a CDAR - T via a CDDAR - P via a CDDDAAR - N via a CDDDADR - X via a CDDDDAR - A via a CDDDDDAR - B via a CDDDDDDR - -For the contract to stay alive, we test that all least `(Tez "1.00")` is -still available after each transaction. This value is given as an -example and must be updated according to the actual Tezos minimal -value for contract balance. - -The complete source `scrutable_reservoir.tz` is: - - parameter timestamp ; - storage - (pair - string # S - (pair - timestamp # T - (pair - (pair tez tez) ; # P N - (pair - (contract unit unit) # X - (pair (contract unit unit) (contract unit unit)))))) ; # A B - return unit ; - code - { DUP ; CDAR # S - PUSH string "open" ; - COMPARE ; NEQ ; - IF { FAIL } # on "success", "timeout" or a bad init value - { DUP ; CDDAR ; # T - NOW ; - COMPARE ; LT ; - IF { # Before timeout - # We compute ((1 + P) + N) tez for keeping the contract alive - PUSH tez "1.00" ; - DIP { DUP ; CDDDAAR } ; ADD ; # P - DIP { DUP ; CDDDADR } ; ADD ; # N - # We compare to the cumulated amount - BALANCE ; - COMPARE; LT ; - IF { # Not enough cash, we just accept the transaction - # and leave the global untouched - CDR } - { # Enough cash, successful ending - # We update the global - CDDR ; PUSH string "success" ; PAIR ; - # We transfer the fee to the broker - DUP ; CDDAAR ; # P - DIP { DUP ; CDDDAR } # X - UNIT ; TRANSFER_TOKENS ; DROP ; - # We transfer the rest to A - DUP ; CDDADR ; # N - DIP { DUP ; CDDDDAR } # A - UNIT ; TRANSFER_TOKENS ; DROP } } - { # After timeout, we refund - # We update the global - CDDR ; PUSH string "timeout" ; PAIR ; - # We try to transfer the fee to the broker - PUSH tez "1.00" ; BALANCE ; SUB ; # available - DIP { DUP ; CDDAAR } ; # P - COMPARE ; LT ; # available < P - IF { PUSH tez "1.00" ; BALANCE ; SUB ; # available - DIP { DUP ; CDDDAR } # X - UNIT ; TRANSFER_TOKENS ; DROP } - { DUP ; CDDAAR ; # P - DIP { DUP ; CDDDAR } # X - UNIT ; TRANSFER_TOKENS ; DROP } - # We transfer the rest to B - PUSH tez "1.00" ; BALANCE ; SUB ; # available - DIP { DUP ; CDDDDDR } # B - UNIT ; TRANSFER_TOKENS ; DROP } } - # return Unit - UNIT ; PAIR } - -### Forward contract - -We want to write a forward contract on dried peas. The contract takes -as global data the tons of peas `Q`, the expected delivery date `T`, the -contract agreement date `Z`, a strike `K`, a collateral `C` per ton of dried -peas, and the accounts of the buyer `B`, the seller `S` and the warehouse `W`. - -These parameters as grouped in the global storage as follows: - - Pair - (Pair (Pair Q (Pair T Z))) - (Pair - (Pair K C) - (Pair (Pair B S) W)) - -of type - - pair - (pair nat (pair timestamp timestamp)) - (pair - (pair tez tez) - (pair (pair account account) account)) - -The 24 hours after timestamp `Z` are for the buyer and seller to store -their collateral `(Q * C)`. For this, the contract takes a string as -parameter, matching `"buyer"` or `"seller"` indicating the party for which -the tokens are transferred. At the end of this day, each of them can -send a transaction to send its tokens back. For this, we need to store -who already paid and how much, as a `(pair tez tez)` where the left -component is the buyer and the right one the seller. - -After the first day, nothing cam happen until `T`. - -During the 24 hours after `T`, the buyer must pay `(Q * K)` to the -contract, minus the amount already sent. - -After this day, if the buyer didn't pay enough then any transaction -will send all the tokens to the seller. - -Otherwise, the seller must deliver at least `Q` tons of dried peas to -the warehouse, in the next 24 hours. When the amount is equal to or -exceeds `Q`, all the tokens are transferred to the seller and the contract -is destroyed. For storing the quantity of peas already delivered, we -add a counter of type `nat` in the global storage. For knowing this -quantity, we accept messages from W with a partial amount of delivered -peas as argument. - -After this day, any transaction will send all the tokens to the buyer -(not enough peas have been delivered in time). - -Hence, the global storage is a pair, with the counters on the left, -and the constant parameters on the right, initially as follows. - - Pair - (Pair 0 (Pair 0_00 0_00)) - (Pair - (Pair (Pair Q (Pair T Z))) - (Pair - (Pair K C) - (Pair (Pair B S) W))) - -of type - - pair - (pair nat (pair tez tez)) - (pair - (pair nat (pair timestamp timestamp)) - (pair - (pair tez tez) - (pair (pair account account) account))) - -The parameter of the transaction will be either a transfer from the -buyer or the seller or a delivery notification from the warehouse of -type `(or string nat)`. - -At the beginning of the transaction: - - Q is accessible via a CDDAAR - T via a CDDADAR - Z via a CDDADDR - K via a CDDDAAR - C via a CDDDADR - B via a CDDDDAAR - S via a CDDDDADR - W via a CDDDDDR - the delivery counter via a CDAAR - the amount versed by the seller via a CDADDR - the argument via a CAR - -The contract returns a unit value, and we assume that it is created -with the minimum amount, set to `(Tez "1.00")`. - -The complete source `forward.tz` is: - - parameter (or string nat) ; - return unit ; - storage - (pair - (pair nat (pair tez tez)) # counter from_buyer from_seller - (pair - (pair nat (pair timestamp timestamp)) # Q T Z - (pair - (pair tez tez) # K C - (pair - (pair (contract unit unit) (contract unit unit)) # B S - (contract unit unit))))) ; # W - code - { DUP ; CDDADDR ; # Z - PUSH nat 86400 ; SWAP ; ADD ; # one day in second - NOW ; COMPARE ; LT ; - IF { # Before Z + 24 - DUP ; CAR ; # we must receive (Left "buyer") or (Left "seller") - IF_LEFT - { DUP ; PUSH string "buyer" ; COMPARE ; EQ ; - IF { DROP ; - DUP ; CDADAR ; # amount already versed by the buyer - DIP { AMOUNT } ; ADD ; # transaction - # then we rebuild the globals - DIP { DUP ; CDADDR } ; PAIR ; # seller amount - PUSH nat 0 ; PAIR ; # delivery counter at 0 - DIP { CDDR } ; PAIR ; # parameters - # and return Unit - UNIT ; PAIR } - { PUSH string "seller" ; COMPARE ; EQ ; - IF { DUP ; CDADDR ; # amount already versed by the seller - DIP { AMOUNT } ; ADD ; # transaction - # then we rebuild the globals - DIP { DUP ; CDADAR } ; SWAP ; PAIR ; # buyer amount - PUSH nat 0 ; PAIR ; # delivery counter at 0 - DIP { CDDR } ; PAIR ; # parameters - # and return Unit - UNIT ; PAIR } - { FAIL } } } # (Left _) - { FAIL } } # (Right _) - { # After Z + 24 - # test if the required amount is reached - DUP ; CDDAAR ; # Q - DIP { DUP ; CDDDADR } ; MUL ; # C - PUSH nat 2 ; MUL ; - PUSH tez "1.00" ; ADD ; - BALANCE ; COMPARE ; LT ; # balance < 2 * (Q * C) + 1 - IF { # refund the parties - CDR ; DUP ; CADAR ; # amount versed by the buyer - DIP { DUP ; CDDDAAR } # B - UNIT ; TRANSFER_TOKENS ; DROP - DUP ; CADDR ; # amount versed by the seller - DIP { DUP ; CDDDADR } # S - UNIT ; TRANSFER_TOKENS ; DROP - BALANCE ; # bonus to the warehouse to destroy the account - DIP { DUP ; CDDDDR } # W - UNIT ; TRANSFER_TOKENS ; DROP - # return unit, don't change the global - # since the contract will be destroyed - UNIT ; PAIR } - { # otherwise continue - DUP ; CDDADAR # T - NOW ; COMPARE ; LT - IF { FAIL } # Between Z + 24 and T - { # after T - DUP ; CDDADAR # T - PUSH nat 86400 ; ADD # one day in second - NOW ; COMPARE ; LT - IF { # Between T and T + 24 - # we only accept transactions from the buyer - DUP ; CAR ; # we must receive (Left "buyer") - IF_LEFT - { PUSH string "buyer" ; COMPARE ; EQ ; - IF { DUP ; CDADAR ; # amount already versed by the buyer - DIP { AMOUNT } ; ADD ; # transaction - # The amount must not exceed Q * K - DUP ; - DIIP { DUP ; CDDAAR ; # Q - DIP { DUP ; CDDDAAR } ; MUL ; } ; # K - DIP { COMPARE ; GT ; # new amount > Q * K - IF { FAIL } { } } ; # abort or continue - # then we rebuild the globals - DIP { DUP ; CDADDR } ; PAIR ; # seller amount - PUSH nat 0 ; PAIR ; # delivery counter at 0 - DIP { CDDR } ; PAIR ; # parameters - # and return Unit - UNIT ; PAIR } - { FAIL } } # (Left _) - { FAIL } } # (Right _) - { # After T + 24 - # test if the required payment is reached - DUP ; CDDAAR ; # Q - DIP { DUP ; CDDDAAR } ; MUL ; # K - DIP { DUP ; CDADAR } ; # amount already versed by the buyer - COMPARE ; NEQ ; - IF { # not reached, pay the seller and destroy the contract - BALANCE ; - DIP { DUP ; CDDDDADR } # S - DIIP { CDR } ; - UNIT ; TRANSFER_TOKENS ; DROP ; - # and return Unit - UNIT ; PAIR } - { # otherwise continue - DUP ; CDDADAR # T - PUSH nat 86400 ; ADD ; - PUSH nat 86400 ; ADD ; # two days in second - NOW ; COMPARE ; LT - IF { # Between T + 24 and T + 48 - # We accept only delivery notifications, from W - DUP ; CDDDDDR ; MANAGER ; # W - SOURCE unit unit ; MANAGER ; - COMPARE ; NEQ ; - IF { FAIL } {} # fail if not the warehouse - DUP ; CAR ; # we must receive (Right amount) - IF_LEFT - { FAIL } # (Left _) - { # We increment the counter - DIP { DUP ; CDAAR } ; ADD ; - # And rebuild the globals in advance - DIP { DUP ; CDADR } ; PAIR ; - DIP { CDDR } ; PAIR ; - UNIT ; PAIR ; - # We test if enough have been delivered - DUP ; CDAAR ; - DIP { DUP ; CDDAAR } ; - COMPARE ; LT ; # counter < Q - IF { CDR } # wait for more - { # Transfer all the money to the seller - BALANCE ; # and destroy the contract - DIP { DUP ; CDDDDADR } # S - DIIP { CDR } ; - UNIT ; TRANSFER_TOKENS ; DROP } } ; - UNIT ; PAIR } - { # after T + 48, transfer everything to the buyer - BALANCE ; # and destroy the contract - DIP { DUP ; CDDDDAAR } # B - DIIP { CDR } ; - UNIT ; TRANSFER_TOKENS ; DROP ; - # and return unit - UNIT ; PAIR } } } } } } } - -XII - Full grammar ----------------- - - ::= - | - | - | - | - | - | - | - | - | - | Unit - | True - | False - | Pair - | Left - | Right - | Some - | None - | { ; ... } - | { Elt ; ... } - | instruction - ::= - | { ... } - | DROP - | DUP - | SWAP - | PUSH - | SOME - | NONE - | UNIT - | IF_NONE { ... } { ... } - | PAIR - | CAR - | CDR - | LEFT - | RIGHT - | IF_LEFT { ... } { ... } - | NIL - | CONS - | IF_CONS { ... } { ... } - | EMPTY_SET - | EMPTY_MAP - | MAP - | MAP { ... } - | REDUCE - | ITER { ... } - | MEM - | GET - | UPDATE - | IF { ... } { ... } - | LOOP { ... } - | LOOP_LEFT { ... } - | LAMBDA { ... } - | EXEC - | DIP { ... } - | FAIL - | CONCAT - | ADD - | SUB - | MUL - | DIV - | ABS - | NEG - | MOD - | LSL - | LSR - | OR - | AND - | XOR - | NOT - | COMPARE - | EQ - | NEQ - | LT - | GT - | LE - | GE - | INT - | MANAGER - | SELF - | TRANSFER_TOKENS - | CREATE_ACCOUNT - | CREATE_CONTRACT - | DEFAULT_ACCOUNT - | NOW - | AMOUNT - | BALANCE - | CHECK_SIGNATURE - | H - | HASH_KEY - | STEPS_TO_QUOTA - | SOURCE - ::= - | - | key - | unit - | signature - | option - | list - | set - | contract - | pair - | or - | lambda - | map - ::= - | int - | nat - | string - | tez - | bool - | key_hash - | timestamp - -XIII - Reference implementation ------------------------------ - -The language is implemented in OCaml as follows: - - * The lower internal representation is written as a GADT whose type - parameters encode exactly the typing rules given in this - specification. In other words, if a program written in this - representation is accepted by OCaml's typechecker, it is - mandatorily type-safe. This of course also valid for programs not - handwritten but generated by OCaml code, so we are sure that any - manipulated code is type-safe. - - In the end, what remains to be checked is the encoding of the - typing rules as OCaml types, which boils down to half a line of - code for each instruction. Everything else is left to the - venerable and well trusted OCaml. - - * The interpreter is basically the direct transcription of the - rewriting rules presented above. It takes an instruction, a stack - and transforms it. OCaml's typechecker ensures that the - transformation respects the pre and post stack types declared by - the GADT case for each instruction. - - The only things that remain to we reviewed are value dependent - choices, such as that we did not swap true and false when - interpreting the If instruction. - - * The input, untyped internal representation is an OCaml ADT with - the only 5 grammar constructions: `String`, `Int`, `Seq` and - `Prim`. It is the target language for the parser, since not all - parsable programs are well typed, and thus could simply not be - constructed using the GADT. - - * The typechecker is a simple function that recognizes the abstract - grammar described in section X by pattern matching, producing the - well-typed, corresponding GADT expressions. It is mostly a - checker, not a full inferer, and thus takes some annotations - (basically the input and output of the program, of lambdas and of - uninitialized maps and sets). It works by performing a - symbolic evaluation of the program, transforming a symbolic - stack. It only needs one pass over the whole program. - - Here again, OCaml does most of the checking, the structure of the - function is very simple, what we have to check is that we - transform a `Prim ("If", ...)` into an `If`, a `Prim ("Dup", ...)` - into a `Dup`, etc. diff --git a/docs/michelson_anti_patterns.org b/docs/michelson_anti_patterns.org deleted file mode 100644 index 8f21c06e7..000000000 --- a/docs/michelson_anti_patterns.org +++ /dev/null @@ -1,88 +0,0 @@ -* Michelson Anti-Patterns -Even though Michelson is designed to make it easy to write secure contracts and difficult to write vulnerable ones, it is still possible to write buggy contracts that leak data and funds. This is a list of mistakes that you can make when writing or interacting with contracts on the Tezos blockchain and alternative ways to write code that avoid these problems. - -_Note_: We are currently reworking the concurrency model of Michelson (how and when subtransactions ar made), so that some of these patterns will be prevented by the language itself. - -** Refunding to a list of contracts -One common pattern in contracts is to refund a group of people's funds at once. -This is problematic if you accepted arbitrary contracts as a malicious user can -do cause various issues for you. - -*** Possible issues: - -- One contract swallows all the gas through a series of callbacks -- One contract writes transactions until the block is full -- Reentrancy bugs. Michelson intentionally makes these difficult to write, but it is still possible if you try. -- A contract calls the `FAIL` instruction, stopping all computation. - - -*** Alternatives/Solutions: - -- Create a default account from people's keys. Default accounts cannot execute code, avoiding the bugs above. Have people submit keys rather than contracts. -- Have people pull their funds individually. Each user can break their own withdrawal only. *This does not protect against reentrancy bugs.* - -** Avoid batch operations when users can increase the size of the batch -Contracts that rely on linear or super-linear operations are vulnerable to -malicious users supplying values until the contract cannot finish without -running into fuel limits. This can deadlock your contract. - -*** Possible issues: -- Malicious users can force your contract into a pathological worst case, stopping it from finishing with available gas. Note that in the absence of hard gas limits, this can still be disabling as node operators may not want to run contracts that take more than a certain amount of gas. -- You may hit the slow case of an amortized algorithm or data structure at an inopportune time, using up all of your contract's available gas. - -*** Alternatives/Solutions: -- Avoid data structures and algorithms that rely on amortized operations, especially when users may add data. -- Restrict the amount of data your contract can store to a level that will not overwhelm the available gas. -- Write your contract so that it may pause and resume batch operations. This would complicate these sequences and require constant checking of available gas, but it prevents various attacks. - -*Do not assume an attack will be prohibitively expensive* -Cryptocurrencies have extreme price fluctuations frequently and an extremely motivated attacker may decide that an enormous expense is justified. Remember, an attack that disables a contract is not just targeted at the authors, but also the users of that contract. - -** Signatures alone do not prevent replay attacks -If your contract uses signatures to authenticate messages, beware of replay attacks. If a user ever signs a piece of data, you /must/ make sure that that piece of data is never again a valid message to the contract. If you do not do this, anyone else can call your contract with the same input and piggyback on the earlier approval. - -*** Possible issues: -- A previously approved action can be replayed. - -*** Alternatives/Solutions -- Use an internal counter to make the data you ask users to sign unique. This counter should be per key so that users can find out what they need to approve. This should be paired with a signed hash of your contract to prevent cross-contract replays. -- Use the =SOURCE= instruction to verify that the expected sender is the source of the message. - -*** Do not assume users will use a unique key for every smart contract -Users should always use a different key for every contract with which they interact. If this is not the case, a message the user signed for another contract can be sent to your contract. An internal counter alone does not protect against this attack. It /must/ be paired with a hash of your contract. You must verify the source of the message. - -** Storing/transferring private data -Once data is published to anyone, including broadcasting a transaction, that data is public. Never transmit secret information via any part of the blockchain ecosystem. As soon as you have broadcast a transaction including that piece of information, anyone can see it. Furthermore, malicious nodes in the system can manipulate unsigned transactions by delaying, modifying, or reordering them. - -*** Possible Issues -- If data is not signed, it can be modified -- Transactions can be delayed -- Secret information will become public - -*** Alternatives/Solutions -- Do not store private information on the blockchain or broadcast it in transactions. -- Sign all transactions that contain information that, if manipulated, could be abused. -- Use counters to enforce transaction orders. -This will at least create a logical clock on messages sent to your contract. - -** Not setting all state before a transfer -Reentrancy is a potential issue on the blockchain. When a contract makes a transfer to another contract, that contract can execute its own code, -and can make arbitrary further transfers, including back to the original contract. If state has not been updated before the transfer is made, a contract can call back in and execute actions based on old state. - -*** Possible Issues -- Multiple withdrawals/actions -- Generating illegal state if state is updated twice later - -*** Alternatives/Solutions -- Forbid reentrancy by means of a flag in your storage, unless you have a good reason to allow users to reenter your contract, this is likely the best option. -- Only make transfers to trusted contracts or default accounts. Default accounts cannot execute code, so it is always safe to transfer to them. Before trusting a contract, make sure that its behavior cannot be modified and that you have an extremely high degree of confidence in it. - -** Do not store funds for others in spendable contracts -Tezos allows contracts to be marked as spendable. Managers of spendable contracts can make transfers using the funds stored inside the contract. This can subvert guarantees about the contract's behavior that are visibile in the code. - -*** Possible Issues -- The funds of a contract can be removed. -- A contract may not be able to meet its obligations - -*** Alternatives/Solutions -- Do not store funds in spendable contracts that you do not control. diff --git a/docs/tutorials/data_encoding.rst b/docs/tutorials/data_encoding.rst new file mode 100644 index 000000000..4a097c7d2 --- /dev/null +++ b/docs/tutorials/data_encoding.rst @@ -0,0 +1,206 @@ +.. _data_encoding: + +The ``data_encoding`` library +============================= + +Throughout the Tezos protocol, data is serialized so that it can be used +via RPC, written to disk, or placed in a block. This +serialization/de-serialization is handled via the :package:`tezos-data-encoding` +library by providing a set primitive encodings and a variety of combinators. + +Examples/Tutorial +----------------- + +Encoding an integer +~~~~~~~~~~~~~~~~~~~ + +Integers are defined as other concrete data types with a generic +encoding type ``type 'a encoding``. This means that it is an encoding +to/from type ``int``. There are a variety of ways to encode an integer, +depending on what binary serialization you want to achieve: + +- ``Data_encoding.int8`` +- ``Data_encoding.uint8`` +- ``Data_encoding.int16`` +- ``Data_encoding.uint16`` +- ``Data_encoding.int31`` +- ``Data_encoding.int32`` +- ``Data_encoding.int64`` + +For example, an encoding that represents a 31 bit integer has type +``Data_encoding.int31 = int Data_encoding.encoding``. + +.. code:: ocaml + + let int31_encoding = Data_encoding.int31 + +Encoding an object +~~~~~~~~~~~~~~~~~~ + +Encoding a single integer is fairly uninteresting. The Dataencoding +library provides a number of combinators that can be used to build more +complicated objects. Consider the type that represents an interval from +the first number to the second: + +.. code:: ocaml + + type interval = int64 * int64 + +We can define an encoding for this type as: + +.. code:: ocaml + + let interval_encoding = + Data_encoding.(obj2 (req "min" int64) (req "max" int64)) + +In the example above we construct a new value ``interval_encoding`` by +combining two int64 integers using the ``obj2`` constructor. + +The library provides different constructors, i.e. for objects that have +no data (``Data_encoding.empty``), constructors for object up to 10 +fields, constructors for tuples, list, etc. + +These are serialized to binary by converting each internal object to +binary and placing them in the order of the original object and to JSON +as a JSON object with field names. + +Lists, arrays, and options +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +List, Arrays and options types can by built on top of ground data types. + +.. code:: ocaml + + type interval_list = interval list + + type interval_array = interval array + + type interval_option = interval option + +And the encoders for these types as + +.. code:: ocaml + + let interval_list_encoding = Data_encoding.list interval_encoding + let interval_array_encoding = Data_encoding.array interval_encoding + let interval_option_encoding = Data_encoding.option interval_encoding + +Union types +~~~~~~~~~~~ + +The Tezos codebase makes heavy use of variant types. Consider the +following variant type: + +.. code:: ocaml + + type variant = B of bool + | S of string + +Encoding for this types can be expressed as: + +.. code:: ocaml + + let variant_encoding = + Data_encoding.(union ~tag_size:`Uint8 + [ case + bool + (function B b -> Some b | _ -> None) + (fun b -> B b) ; + case + string + (function S s -> Some s | _ -> None) + (fun s -> S s) ]) + +This variant encoding is a bit more complicated. Let’s look at the parts +of the type: + +- We include an optimization hint to the binary encoding to inform it + of the number of elements we expect in the tag. In most cases, we can + use :literal:`\`Uint8`, which allows you to have up to 256 possible + cases (default). +- We provide a function to wrap the datatype. The encoding works by + repeatedly trying to decode the datatype using these functions until + one returns ``Some payload``. This payload is then encoded using the + dataencoding specified. +- We specify a function from the encoded type to the actual datatype. + +Since the library does not provide an exhaustive check on these +constructors, the user must be careful when constructing unin types to +avoid unfortunate runtime failures. + +How the Dataencoding module works +--------------------------------- + +This section is 100% optional. You do not need to understand this +section to use the library. + +The library uses GADTs to provide type-safe +serialization/de-serialization. From there, a runtime representation of +JSON objects is parsed into the type-safe version. + +First we define an untyped JSON AST: + +.. code:: ocaml + + type json = + [ `O of (string * json) list + | `Bool of bool + | `Float of float + | `A of json list + | `Null + | `String of string ] + +This is then parsed into a typed AST ( we eliminate several cases for +clarity): + +.. code:: ocaml + + type 'a desc = + | Null : unit desc + | Empty : unit desc + | Bool : bool desc + | Int64 : Int64.t desc + | Float : float desc + | Bytes : Kind.length -> MBytes.t desc + | String : Kind.length -> string desc + | String_enum : Kind.length * (string * 'a) list -> 'a desc + | Array : 'a t -> 'a array desc + | List : 'a t -> 'a list desc + | Obj : 'a field -> 'a desc + | Objs : Kind.t * 'a t * 'b t -> ('a * 'b) desc + | Tup : 'a t -> 'a desc + | Union : Kind.t * tag_size * 'a case list -> 'a desc + | Mu : Kind.enum * string * ('a t -> 'a t) -> 'a desc + | Conv : + { proj : ('a -> 'b) ; + inj : ('b -> 'a) ; + encoding : 'b t ; + schema : Json_schema.schema option } -> 'a desc + | Describe : + { title : string option ; + description : string option ; + encoding : 'a t } -> 'a desc + | Def : { name : string ; + encoding : 'a t } -> 'a desc + +- The first set of constructors define all ground types. +- The constructors for ``Bytes``, ``String`` and ``String_enum`` + includes a length fields in order to provide safe binary + serialization. +- The constructors for ``Array`` and ``List`` are used by the + combinators we saw earlier. +- The ``Obj`` and ``Objs`` constructors create JSON objects. These are + wrapped in the ``Conv`` constructor to remove nesting that results + when these constructors are used naively. +- The ``Mu`` constructor is used to create self-referential + definitions. +- The ``Conv`` constructor allows you to clean up a nested definition + or compute another type from an existing one. +- The ``Describe`` and ``Def`` constructors are used to add + documentation + +The library also provides various wrappers and convenience functions to +make constructing these objects easier. Reading the documentation in the +`mli file +<../api/odoc/tezos-data-encoding/Tezos_data_encoding/Data_encoding/index.html>`__ +should orient you on how to use these functions and their purposes. diff --git a/docs/tutorials/entering_alpha.rst b/docs/tutorials/entering_alpha.rst new file mode 100644 index 000000000..0deffaf29 --- /dev/null +++ b/docs/tutorials/entering_alpha.rst @@ -0,0 +1,196 @@ +How to start reading protocol Alpha +=================================== + +Protocol Alpha, whose Alpha has nothing to do with the one in Alphanet, +is the name of the initial economic protocol. Alpha is a placeholder +name, while we decide on the naming convention for protocol versions. + +Before reading that document, you may want to: + +- read the whitepaper, +- read :ref:`how the economic protocol is + sandboxed `. + +As all protocols, Alpha is made of a series of OCaml interface and +implementation files, accompanied by a ``TEZOS_PROTOCOL`` file. + +The ``TEZOS_PROTOCOL`` structure +-------------------------------- + +If you look at this file in the repository, you will see that it is +composed of the hash of the sources, and the list of its modules, in +linking order. + +Protocol Alpha is structured as a tower of abstraction layers, a coding +discipline that we designed to have OCaml check as many invariants as +possible at typing time. You will also see empty lines in +``TEZOS_PROTOCOL`` that denotate these layers of abstraction. + +These layers follow the linking order: the first modules are the tower’s +foundation that talk to the raw key-value store, and going forward in +the module list means climbing up the abstraction tower. + +The big abstraction barrier: ``Tezos_context`` +---------------------------------------------- + +the proof-of-stake algorithm, as described in the white paper, relies on +an abstract state of the ledger, that is read and transformed during +validation of a block. + +Due to the polymorphic nature of Tezos, the ledger’s state (that we call +**context** in the code), cannot be specific to protocol Alpha’s need. +The proof-of-stake is thus implemented over a generic key-value store +whose keys and associated binary data must implement the abstract +structure. + +The ``Tezos_context`` module enforces the separation of concerns +between, on one hand, mapping the abstract state of the ledger to the +concrete structure of the key-value store, and, on the other hand, +implementing the proof-of-stake algorithm over this state. + +In more practical terms, ``Tezos_context`` defines a type ``t`` that +represents a state of the ledger. This state is an abstracted out +version of the key-value store that can only be manipulated through the +use of the few selected manipulations reexported by ``Tezos_context``, +that always preserve the well-typed aspect and internal consistency +invariants of the state. + +When validating a block, the low-level state that result from the +predecessor block is read from the disk, then abstracted out to a +``Tezos_context.t``, which is then only updated by high level operations +that preserve consistency, and finally, the low level state is extracted +to be committed on disk. + +This way, we have two well separated parts in the code. The code below +``Tezos_context`` implements the ledger’s state storage, while the code +on top of it is the proof-of-stake algorithm. Thanks to this barrier, +the latter can remain nice, readable OCaml that only manipulates plain +OCaml values. + +Below the ``Tezos_context`` +--------------------------- + +For this part, in a first discovery of the source code, you can start by +relying mostly on this coarse grained description, with a little bit of +cherry-picking when you’re curious about how a specific invariant is +enforced. + +The ``*_repr`` modules +~~~~~~~~~~~~~~~~~~~~~~ + +These modules abstract the values of the raw key-value context by using +:ref:`Data_encoding`. + +These modules define the data types used by the protocol that need to be +serialized (amounts, contract handles, script expressions, etc.). For +each type, it also defines its serialization format using +:ref:`Data_encoding`. + +Above this layer, the code should never see the byte sequences in the +database, the ones of transmitted blocks and operations, or the raw JSON +of data transmitted via RPCs. It only manipulates OCaml values. + +The ``Storage`` module and storage functors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Even with the concrete formats of values in the context abstracted out, +type (or consistency) errors can still occur if the code accesses a +value with a wrong key, or a key bound to another value. The next +abstraction barrier is a remedy to that. + +The storage module is the single place in the protocol where key +litterals are defined. Hence, it is the only module necessary to audit, +to know that the keys are not colliding. + +It also abstracts the keys, so that each kind of key get its own +accessors. For instance, module ``Storage.Contract.Balance`` contains +accessors specific to contracts’ balances. + +Moreover, the keys bear the type of the values they point to. For +instance, only values of type ``Tez_repr.t`` can by stored at keys +``Storage.Contract.Balance``. And in case a key is not a global key, but +a parametric one, this key is parametered by an OCaml value, and not the +raw key part. + +So in the end, the only way to be used when accessing a contract balance +is ``Storage.Contract.Balance.get``, which takes a ``Contract_repr.t`` +and gives a ``Tez_repr.t``. + +All these well-typed operations are generated by a set of functors, that +come just before ``Storage`` in ``TEZOS_CONTEXT``. + +The ``*_storage`` modules +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The two previous steps ensure that the ledger’s state is always accessed +and updated in a well-typed way. + +However, it does not enforce that, for instance, when a contract is +deleted, all of the keys that store its state in the context are indeed +deleted. + +This last series of modules named ``*_storage`` is there to enforce just +that kind of invariants: ensuring the insternal consistency of the +context structure. + +These transaction do not go as far as checking that, for instance, when +the destination of a transaction is credited, the source is also +debitted, as in some cases, it might not be the case. + +Above the ``Tezos_context`` +--------------------------- + +The three next sections describe the main entrypoints to the protocol: +validation of blocks by the shell (that we often also call application), +smart contracts, and RPC services. + +The ``Main`` module is the entrypoint that’s used by the shell. It +respects the module type that all protocol must follow. For that, its +code is mostly plumbing, + +Starting from ``Apply`` +~~~~~~~~~~~~~~~~~~~~~~~ + +This is were you want to start on your first read. Even if some plumbing +code is woven in, such as error cases declaration and registration, most +of the proof-of-stake code has been written in a verbose style, to be +understood with minimum OCaml knowledge. + +You want to start from the shell entry points (validation of the block +header, validation of an operation, finalization of a block validation), +and follow the control flow until you hit the ``Tezos_context`` +abstraction barrier. This will lead you to reading modules ``Baking`` +and ``Amendment``. + +Smart contracts +~~~~~~~~~~~~~~~ + +From ``Apply``, you will also end up in modules ``Script_ir_translator`` +and ``Script_interpreter``. The former is the typechecker of Michelson +that is called when creating a new smart contract, and the latter is the +interpreter that is called when transfering tokens to a new smart +contract. + +Protocol RPC API +~~~~~~~~~~~~~~~~ + +Finally, the RPCs specific to Alpha are also defined above the +``Tezos_context`` barrier. The definition is split into two parts. + +The first part, ``Services``, defines the RPC API: URL schemes with the +types of parameters, and input and output JSON schemas. This interface +serves three purposes. As it is thourouhgly tyoed, it makes sure that +the handlers have the right input and output types. It is also used by +the client to perform RPC calls, to make sure that the URL schemes and +JSON formats and consistent between the two parties. These two features +are extremely useful when refactoring, as the OCaml typechecker will +help us track the effects of an RPC API change on the whole codebase. +The third purpose is of course, to make automatic documentation +generation possible (as in ``tezos client rpc list/format``). + +It can be useful if you are a third party developer who wants to read +the OCaml definition of the service hierarchy directly, instead of the +automatically generated JSON hierarchy. + +The second part, ``Services_registration``, is responsible for plugging +the OCaml handler functions that implement the RPC API. diff --git a/docs/tutorials/error_monad.rst b/docs/tutorials/error_monad.rst new file mode 100644 index 000000000..af1723111 --- /dev/null +++ b/docs/tutorials/error_monad.rst @@ -0,0 +1,366 @@ +.. _error_monad: + +The Error Monad +=============== + +This has been adapted from a blog post on *michelson-lang.com*. + +If you’re not familiar with monads, go take a few minutes and read a +tutorial. I personally got a lot out of this +`paper `__ +by Philip Wadler, but there are a ton of others available online. Find +one that works for you. The error monad isn’t terribly scary as Monads +go, so once you feel like you understand the gist, come on back and see +if you can understand what’s going on. + +I’m going to omit some convenience operations that a lot of monads +provide in the examples below. If you want to add them, they’re not +difficult. + +Why you want the error monad +---------------------------- + +In Tezos, we don’t want to have the node be crashable by an improper +input. To avoid this possibility, it was decided that the system should +not use exceptions for error handling. Instead, it uses an error monad. +This design forces errors to be handled or carried through before an +output can be used. Exceptions are still occasionally used, but this is +mostly in the client and only for internal errors. + +We also mix in the Lwt library, which we use for concurrency. This is +combined with the error monad and is once again used pervasively +throughout the codebase. The Lwt monad is a lot like promises in other +languages. + +Without further ado, let’s write an error monad. + +A simple version of the error monad +----------------------------------- + +Here’s a very simple error monad. + +.. code:: ocaml + + module Error : sig + type 'a t + (* Create a value of type t *) + val return : 'a -> 'a t + (* For when a computation fails *) + val error : 'a t + (* Apply an operation to a value in the error monad *) + val (>>?) : 'a t -> ('a -> 'b t) -> 'b t (* bind *) + end = struct + type 'a t = Ok of 'a | Error + let return x = Ok x + let error = Error + let (>>?) value func = + match value with + | Ok x -> func x + | Error -> Error + end + +So, is this what Tezos uses? We actually already have a lot of the +structure that we’ll use later. The basic idea is that you return a +value that’s correct and return an error if the operation failed. +Outside of the error module, you can’t actually introspect an error +value. You can only dispatch on the correctness/incorrectness of the +value using bind. + +What’s wrong here? + +- We can’t report any information about an error case +- We can’t report error traces, something that’s used to improve the + quality of error messages throughout Tezos +- We can’t handle some errors and continue executing + +A slight improvement +-------------------- + +Let’s now enhance our error reporting by allowing errors to contain a +description string. Now we can report messages along with our errors. Is +this enough of an improvement? Not really. We don’t have any flexibility +about how the printing works. We still can’t create error traces and we +can’t handle errors and resume executing the program. + +.. code:: ocaml + + module Error : sig + type 'a t + val return : 'a -> 'a t + val error : string -> 'a t + val (>>?) : 'a t -> ('a -> 'b t) -> 'b t (* bind *) + val print_value : ('a -> string) -> 'a t -> unit + end = struct + type 'a t = Ok of 'a | Error of string + let return x = Ok x + let error s = Error s + let (>>?) value func = + match value with + | Ok x -> func x + | Error s -> Error s + let print_value func = function + | Ok x -> Printf.printf "Success: %s\n" (func x) + | Error s -> Printf.printf "Error: %s\n" s + end + +Traces +------ + +Now that we have the basic structure down, we can add a mechanism to let +us include traces. As a note, the error type I had above is exactly the +``result`` type from the OCaml standard library. The traces are just +lists of error messages. If you have a call you think might fail, and +you want to provide a series of errors, you can wrap that result in the +``trace`` function. If that call fails, an additional error is added. + +.. code:: ocaml + + module Error : sig + type 'a t + val return : 'a -> 'a t + val error : string -> 'a t + val (>>?) : 'a t -> ('a -> 'b t) -> 'b t (* bind *) + val print_value : ('a -> string) -> 'a t -> unit + val trace : string -> 'a t -> 'a t + end = struct + type 'a t = ('a, string list) result + let return x = Ok x + let error s = Error [ s ] + let (>>?) value func = + match value with + | Ok x -> func x + | Error errs -> Error errs + let print_value func = function + | Ok x -> Printf.printf "Success: %s\n" (func x) + | Error [ s ] -> Printf.printf "Error: %s\n" s + | Error errors -> Printf.printf "Errors:\t%s\n" (String.concat "\n\t" errors) + let trace error = function + | Ok x -> Ok x + | Error errors -> Error (error :: errors) + end + +A more descriptive message +-------------------------- + +Even though traces are nice, we really want to be able to store more +interesting data in the messages. We’re going to use an extensible +variant type to do this. Extensible variants allow us to add a new case +to a variant type at the cost of exhaustivity checking. We’re going to +need two new mechanisms to make this work well. The first is an error +registration scheme. In the actual error monad, this involves the data +encoding module, which is how all data is encoded/decoded in Tezos. This +module is another decently complicated part of the codebase that should +probably the subject of a future post. Since you can declare arbitrary +new errors, we’ll have a way of adding a printer for each error. + +When we add a new error handler, we’ll use the ``register_handler`` +function. This function will take a function that takes an error and +returns a ``string option``. These functions will look something like +this: + +.. code:: ocaml + + type error += Explosion_failure of string * int;; + + register_error + (function + | Explosion_failure (s, i) -> + Some (Printf.sprintf "Everything exploded: %s at %d" s i) + | _ -> None) + +I’m also renaming the ``error`` function to ``fail``. This is the +convention used by the actual Errormonad module. I’m also exposing the +``'a t`` type so that you can dispatch on it if you need to. This is +used several times in the Tezos codebase. + +.. code:: ocaml + + module Error : sig + type error = .. + type 'a t = ('a, error list) result + val return : 'a -> 'a t + val fail : error -> 'a t + val (>>?) : ('a -> 'b t) -> 'a t -> 'b t (* bind *) + val print_value : ('a -> string) -> 'a t -> unit + val trace : error -> 'a t -> 'a t + end = struct + type error = .. + type 'a t = ('a, error list) result + let fail error = Error [ error ] + let return x = Ok x + let (>>?) func = function + | Ok x -> func x + | Error errs -> Error errs + let registered = ref [] + let register_error handler = + registered := (handler :: !registered) + let default_handler error = + "Unregistered error: " ^ Obj.(extension_name @@ extension_constructor error) + let to_string error = + let rec find_handler = function + | [] -> default_handler error + | handler :: handlers -> + begin match handler error with + | None -> find_handler handlers + | Some s -> s + end + in find_handler !registered + let print_value func = function + | Ok x -> Printf.printf "Success: %s\n" (func x) + | Error [ s ] -> Printf.printf "Error: %s\n" (to_string s) + | Error errors -> Printf.printf "Errors:\t%s\n" (String.concat "\n\t" (List.map to_string errors)) + let trace error = function + | Ok x -> Ok x + | Error errors -> Error (error :: errors) + end + +Putting ``Lwt.t`` in the mix +---------------------------- + +Tezos uses the `Lwt library `__ for threading. +The Lwt monad is mixed in with the error monad module. This requires us +to add some extra combinators and reexport some functions from Lwt. + +I’m also renaming the type ``t`` to ``tzresult``, as used in the Tezos +codebase. + +.. code:: ocaml + + module Error : sig + type error = .. + type 'a tzresult = ('a, error list) result + val ok : 'a -> 'a tzresult + val return : 'a -> 'a tzresult Lwt.t + val error : error -> 'a tzresult + val fail : error -> 'a tzresult Lwt.t + val (>>?) : 'a tzresult -> ('a -> 'b tzresult) -> 'b tzresult (* bind *) + val (>>=?) : 'a tzresult Lwt.t -> ('a -> 'b tzresult Lwt.t) -> 'b tzresult Lwt.t + val (>>=) : 'a Lwt.t -> ('a -> 'b Lwt.t) -> 'b Lwt.t + val print_value : ('a -> string) -> 'a tzresult Lwt.t -> unit Lwt.t + val trace : error -> 'a tzresult Lwt.t -> 'a tzresult Lwt.t + end = struct + type error = .. + type 'a tzresult = ('a, error list) result + let fail error = Lwt.return (Error [ error ]) + let error error = (Error [ error ]) + let ok x = Ok x + let return x = Lwt.return (ok x) + let (>>?) value func = + match value with + | Ok x -> func x + | Error errs -> Error errs + let (>>=) = Lwt.bind + let (>>=?) value func = + value >>= function + | Ok x -> func x + | Error errs -> Lwt.return (Error errs) + let registered = ref [] + let register_error handler = + registered := (handler :: !registered) + let default_handler error = + "Unregistered error: " ^ Obj.(extension_name @@ extension_constructor error) + let to_string error = + let rec find_handler = function + | [] -> default_handler error + | handler :: handlers -> + begin match handler error with + | None -> find_handler handlers + | Some s -> s + end + in find_handler !registered + let print_value func value = + value >>= fun value -> + begin match value with + | Ok x -> Printf.printf "Success: %s\n" (func x) + | Error [ s ] -> Printf.printf "Error: %s\n" (to_string s) + | Error errors -> Printf.printf "Errors:\t%s\n" (String.concat "\n\t" (List.map to_string errors)) + end; Lwt.return () + let trace error value = + value >>= function + | Ok x -> return x + | Error errors -> Lwt.return (Error (error :: errors)) + end + +The actual Tezos error monad +---------------------------- + +The actual Tezos error monad adds a few things. Firstly, there are three +categories of errors: + +- :literal:`\`Temporary` - An error resulting from an operation that + might be valid in the future, for example, a contract’s balance being + too low to execute the intended operation. This can be fixed by + adding more to the contract’s balance. +- :literal:`\`Branch` - An error that occurs in one branch of the + chain, but may not occur in a different one. For example, receiving + an operation for an old or future protocol version. +- :literal:`\`Permanent` - An error that is not recoverable because the + operation is never going to be valid. For example, an invalid ꜩ + notation. + +The registration scheme also uses data encodings. Here’s an example from +the `validator <../api/odoc/tezos-node-shell/Tezos_node_shell/Validator/index.html>`__: + +.. code:: ocaml + + register_error_kind + `Permanent + ~id:"validator.wrong_level" + ~title:"Wrong level" + ~description:"The block level is not the expected one" + ~pp:(fun ppf (e, g) -> + Format.fprintf ppf + "The declared level %ld is not %ld" g e) + Data_encoding.(obj2 + (req "expected" int32) + (req "provided" int32)) + (function Wrong_level (e, g) -> Some (e, g) | _ -> None) + (fun (e, g) -> Wrong_level (e, g)) + +An error takes a category, id, title, description, and encoding. You +must specify a function to take an error to an optional value of the +encoding type and a function to take a value of the encoded type and +create an error value. A pretty printer can optionally be specified, but +may also be omitted. + +The actual error monad and it’s tracing features can be seen in this +function which parses contracts: + +.. code:: ocaml + + let parse_script + : ?type_logger: (int * (Script.expr list * Script.expr list) -> unit) -> + context -> Script.storage -> Script.code -> ex_script tzresult Lwt.t + = fun ?type_logger ctxt + { storage; storage_type = init_storage_type } + { code; arg_type; ret_type; storage_type } -> + trace + (Ill_formed_type (Some "parameter", arg_type)) + (Lwt.return (parse_ty arg_type)) >>=? fun (Ex_ty arg_type) -> + trace + (Ill_formed_type (Some "return", ret_type)) + (Lwt.return (parse_ty ret_type)) >>=? fun (Ex_ty ret_type) -> + trace + (Ill_formed_type (Some "initial storage", init_storage_type)) + (Lwt.return (parse_ty init_storage_type)) >>=? fun (Ex_ty init_storage_type) -> + trace + (Ill_formed_type (Some "storage", storage_type)) + (Lwt.return (parse_ty storage_type)) >>=? fun (Ex_ty storage_type) -> + let arg_type_full = Pair_t (arg_type, storage_type) in + let ret_type_full = Pair_t (ret_type, storage_type) in + Lwt.return (ty_eq init_storage_type storage_type) >>=? fun (Eq _) -> + trace + (Ill_typed_data (None, storage, storage_type)) + (parse_data ?type_logger ctxt storage_type storage) >>=? fun storage -> + trace + (Ill_typed_contract (code, arg_type, ret_type, storage_type, [])) + (parse_returning (Toplevel { storage_type }) ctxt ?type_logger arg_type_full ret_type_full code) + >>=? fun code -> + return (Ex_script { code; arg_type; ret_type; storage; storage_type }) + +Each specific type error from the typechecking process is wrapped in a +more general error that explains which part of the program was +malformed. This improves the error reporting. You can also see the bind +operator used between functions to continue only if an error does not +occur. This function also operates in the ``Lwt`` monad, which is +largely hidden via the error monad. diff --git a/docs/tutorials/michelson_anti_patterns.rst b/docs/tutorials/michelson_anti_patterns.rst new file mode 100644 index 000000000..6ba1c5649 --- /dev/null +++ b/docs/tutorials/michelson_anti_patterns.rst @@ -0,0 +1,203 @@ +Michelson Anti-Patterns +======================= + +Even though Michelson is designed to make it easy to write secure +contracts and difficult to write vulnerable ones, it is still possible +to write buggy contracts that leak data and funds. This is a list of +mistakes that you can make when writing or interacting with contracts on +the Tezos blockchain and alternative ways to write code that avoid these +problems. + +Note: We are currently reworking the concurrency model of Michelson (how +and when sub-transactions are made), so that some of these patterns will +be prevented by the language itself. + +Refunding to a list of contracts +-------------------------------- + +One common pattern in contracts is to refund a group of people’s funds +at once. This is problematic if you accepted arbitrary contracts as a +malicious user can do cause various issues for you. + +Possible issues: +~~~~~~~~~~~~~~~~ + +- One contract swallows all the gas through a series of callbacks +- One contract writes transactions until the block is full +- Reentrancy bugs. Michelson intentionally makes these difficult to + write, but it is still possible if you try. +- A contract calls the \`FAIL\` instruction, stopping all computation. + +Alternatives/Solutions: +~~~~~~~~~~~~~~~~~~~~~~~ + +- Create a default account from people’s keys. Default accounts cannot + execute code, avoiding the bugs above. Have people submit keys rather + than contracts. +- Have people pull their funds individually. Each user can break their + own withdrawal only. **This does not protect against reentrancy + bugs.** + +Avoid batch operations when users can increase the size of the batch +-------------------------------------------------------------------- + +Contracts that rely on linear or super-linear operations are vulnerable +to malicious users supplying values until the contract cannot finish +without running into fuel limits. This can deadlock your contract. + +.. _possible-issues-1: + +Possible issues: +~~~~~~~~~~~~~~~~ + +- Malicious users can force your contract into a pathological worst + case, stopping it from finishing with available gas. Note that in the + absence of hard gas limits, this can still be disabling as node + operators may not want to run contracts that take more than a certain + amount of gas. +- You may hit the slow case of an amortized algorithm or data structure + at an inopportune time, using up all of your contract’s available + gas. + +.. _alternativessolutions-1: + +Alternatives/Solutions: +~~~~~~~~~~~~~~~~~~~~~~~ + +- Avoid data structures and algorithms that rely on amortized + operations, especially when users may add data. +- Restrict the amount of data your contract can store to a level that + will not overwhelm the available gas. +- Write your contract so that it may pause and resume batch operations. + This would complicate these sequences and require constant checking + of available gas, but it prevents various attacks. + +\*Do not assume an attack will be prohibitively expensive\* +Cryptocurrencies have extreme price fluctuations frequently and an +extremely motivated attacker may decide that an enormous expense is +justified. Remember, an attack that disables a contract is not just +targeted at the authors, but also the users of that contract. + +Signatures alone do not prevent replay attacks +---------------------------------------------- + +If your contract uses signatures to authenticate messages, beware of +replay attacks. If a user ever signs a piece of data, you *must* make +sure that that piece of data is never again a valid message to the +contract. If you do not do this, anyone else can call your contract with +the same input and piggyback on the earlier approval. + +.. _possible-issues-2: + +Possible issues: +~~~~~~~~~~~~~~~~ + +- A previously approved action can be replayed. + +.. _alternativessolutions-2: + +Alternatives/Solutions +~~~~~~~~~~~~~~~~~~~~~~ + +- Use an internal counter to make the data you ask users to sign + unique. This counter should be per key so that users can find out + what they need to approve. This should be paired with a signed hash + of your contract to prevent cross-contract replays. +- Use the ``SOURCE`` instruction to verify that the expected sender is + the source of the message. + +Do not assume users will use a unique key for every smart contract +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Users should always use a different key for every contract with which +they interact. If this is not the case, a message the user signed for +another contract can be sent to your contract. An internal counter alone +does not protect against this attack. It *must* be paired with a hash of +your contract. You must verify the source of the message. + +Storing/transferring private data +--------------------------------- + +Once data is published to anyone, including broadcasting a transaction, +that data is public. Never transmit secret information via any part of +the blockchain ecosystem. As soon as you have broadcast a transaction +including that piece of information, anyone can see it. Furthermore, +malicious nodes in the system can manipulate unsigned transactions by +delaying, modifying, or reordering them. + +.. _possible-issues-3: + +Possible Issues +~~~~~~~~~~~~~~~ + +- If data is not signed, it can be modified +- Transactions can be delayed +- Secret information will become public + +.. _alternativessolutions-3: + +Alternatives/Solutions +~~~~~~~~~~~~~~~~~~~~~~ + +- Do not store private information on the blockchain or broadcast it in + transactions. +- Sign all transactions that contain information that, if manipulated, + could be abused. +- Use counters to enforce transaction orders. + +This will at least create a logical clock on messages sent to your +contract. + +Not setting all state before a transfer +--------------------------------------- + +Reentrancy is a potential issue on the blockchain. When a contract makes +a transfer to another contract, that contract can execute its own code, +and can make arbitrary further transfers, including back to the original +contract. If state has not been updated before the transfer is made, a +contract can call back in and execute actions based on old state. + +.. _possible-issues-4: + +Possible Issues +~~~~~~~~~~~~~~~ + +- Multiple withdrawals/actions +- Generating illegal state if state is updated twice later + +.. _alternativessolutions-4: + +Alternatives/Solutions +~~~~~~~~~~~~~~~~~~~~~~ + +- Forbid reentrancy by means of a flag in your storage, unless you have + a good reason to allow users to reenter your contract, this is likely + the best option. +- Only make transfers to trusted contracts or default accounts. Default + accounts cannot execute code, so it is always safe to transfer to + them. Before trusting a contract, make sure that its behavior cannot + be modified and that you have an extremely high degree of confidence + in it. + +Do not store funds for others in spendable contracts +---------------------------------------------------- + +Tezos allows contracts to be marked as spendable. Managers of spendable +contracts can make transfers using the funds stored inside the contract. +This can subvert guarantees about the contract’s behavior that are +visible in the code. + +.. _possible-issues-5: + +Possible Issues +~~~~~~~~~~~~~~~ + +- The funds of a contract can be removed. +- A contract may not be able to meet its obligations + +.. _alternativessolutions-5: + +Alternatives/Solutions +~~~~~~~~~~~~~~~~~~~~~~ + +- Do not store funds in spendable contracts that you do not control. diff --git a/docs/tutorials/protocol_environment.rst b/docs/tutorials/protocol_environment.rst new file mode 100644 index 000000000..31be8eeda --- /dev/null +++ b/docs/tutorials/protocol_environment.rst @@ -0,0 +1,48 @@ +.. _protocol_environment: + +Economic protocol sandboxing +============================ + +In Alpha, as in any sound future protocols, updates are approved by +voting. That way, the responsibility of switching to a new protocol code +is the responsibility of voters, and one could argue that it is up to +them to check that the code does not call, for instance, unsafe array +access functions. + +Yet, we decided to introduce a minimum level of machine checks, by +compiling with a specific compiler that checks that no known-unsafe +function is used. This static form of sandboxing is performed by the +OCaml typechecker: we simply compile protocols in a restricted set of +modules with restricted interfaces that hide any unsafe, non wanted +feature. + +Another goal of that specific environment is maintaining a stable OCaml +API for protocol development. Imagine that at some point, the OCaml +standard library changes (a function is added or removed, a type is +changed), then we will be able to upgrade to the new OCaml while still +remaining compatible with past protocols, by providing an adapter layer. + +Here is a quick description of each file in this environment: + +- Files ``array.mli``, ``buffer.mli``, ``bytes.mli``, ``format.mli``, + ``int32.mli``, ``int64.mli``, ``list.mli``, ``map.mli``, + ``pervasives.mli``, ``set.mli`` and ``string.mli`` are stripped down + interfaces to the OCaml standard library modules. The removed + elements are: effects on toplevel references or channels, unsafe + functions, functions that are known sources of bugs, and anything + deprecated. +- As we removed polymorphic comparison operators, ``compare.mli`` + implements monomorphic operators for standard OCaml and Tezos types. + An example use is ``Compare.Int.(3 = 4)`` instead of plain OCaml + ``(3 = 4)``. +- Files ``lwt*`` is the stripped down interface to Lwt, of which we + removed any non deterministic functions, since we only use Lwt for + asynchronous access to the storage. +- Files ``data_encoding.mli``, ``error_monad.mli``, ``mBytes.mli``, + ``hash.mli``, ``base58.mli``, ``blake2B.mli``, ``ed25519.mli``, + ``hex_encode.mli``, ``json.mli``, ``time.mli``, ``z.mli``, + ``micheline.mli`` and files ``RPC_*`` are stripped down versions of + the Tezos standard library. +- Files ``tezos_data.mli``, ``context.mli``, ``fitness.mli`` and + ``updater.mli`` are interfaces to the shell’s data definitions and + storage accessors that are accessible to the protocol. diff --git a/docs/whitedoc/michelson.rst b/docs/whitedoc/michelson.rst new file mode 100644 index 000000000..40f1bebd3 --- /dev/null +++ b/docs/whitedoc/michelson.rst @@ -0,0 +1,2344 @@ +Michelson: the language of Smart Contracts in Tezos +=================================================== + +The language is stack based, with high level data types and primitives +and strict static type checking. Its design cherry picks traits from +several language families. Vigilant readers will notice direct +references to Forth, Scheme, ML and Cat. + +A Michelson program is a series of instructions that are run in +sequence: each instruction receives as input the stack resulting of the +previous instruction, and rewrites it for the next one. The stack +contains both immediate values and heap allocated structures. All values +are immutable and garbage collected. + +A Michelson program receives as input a single element stack containing +an input value and the contents of a storage space. It must return a +single element stack containing an output value and the new contents of +the storage space. Alternatively, a Michelson program can fail, +explicitly using a specific opcode, or because something went wrong that +could not be caught by the type system (e.g. division by zero, gas +exhaustion). + +The types of the input, output and storage are fixed and monomorphic, +and the program is typechecked before being introduced into the system. +No smart contract execution can fail because an instruction has been +executed on a stack of unexpected length or contents. + +This specification gives the complete instruction set, type system and +semantics of the language. It is meant as a precise reference manual, +not an easy introduction. Even though, some examples are provided at the +end of the document and can be read first or at the same time as the +specification. + +Table of contents +----------------- + +- I - Semantics +- II - Type system +- III - Core data types +- IV - Core instructions +- V - Operations +- VI - Domain specific data types +- VII - Domain specific operations +- VIII - Macros +- IX - Concrete syntax +- X - JSON syntax +- XI - Examples +- XII - Full grammar +- XIII - Reference implementation + +I - Semantics +------------- + +This specification gives a detailed formal semantics of the Michelson +language. It explains in a symbolic way the computation performed by the +Michelson interpreter on a given program and initial stack to produce +the corresponding resulting stack. The Michelson interpreter is a pure +function: it only builds a result stack from the elements of an initial +one, without affecting its environment. This semantics is then naturally +given in what is called a big step form: a symbolic definition of a +recursive reference interpreter. This definition takes the form of a +list of rules that cover all the possible inputs of the interpreter +(program and stack), and describe the computation of the corresponding +resulting stacks. + +Rules form and selection +~~~~~~~~~~~~~~~~~~~~~~~~ + +The rules have the main following form. + +:: + + > (syntax pattern) / (initial stack pattern) => (result stack pattern) + iff (conditions) + where (recursions) + +The left hand side of the ``=>`` sign is used for selecting the rule. +Given a program and an initial stack, one (and only one) rule can be +selected using the following process. First, the toplevel structure of +the program must match the syntax pattern. This is quite simple since +there is only a few non trivial patterns to deal with instruction +sequences, and the rest is made of trivial pattern that match one +specific instruction. Then, the initial stack must match the initial +stack pattern. Finally, some rules add extra conditions over the values +in the stack that follow the ``iff`` keyword. Sometimes, several rules +may apply in a given context. In this case, the one that appears first +in this specification is to be selected. If no rule applies, the result +is equivalent to the one for the explicit ``FAIL`` instruction. This +case does not happen on well-typed programs, as explained in the next +section. + +The right hand side describes the result of the interpreter if the rule +applies. It consists in a stack pattern, whose part are either +constants, or elements of the context (program and initial stack) that +have been named on the left hand side of the ``=>`` sign. + +Recursive rules (big step form) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes, the result of interpreting a program is derived from the +result of interpreting another one (as in conditionals or function +calls). In these cases, the rule contains a clause of the following +form. + +:: + + where (intermediate program) / (intermediate stack) => (partial result) + +This means that this rules applies in case interpreting the intermediate +state on the left gives the pattern on the right. + +The left hand sign of the ``=>`` sign is constructed from elements of +the initial state or other partial results, and the right hand side +identify parts that can be used to build the result stack of the rule. + +If the partial result pattern does not actually match the result of the +interpretation, then the result of the whole rule is equivalent to the +one for the explicit ``FAIL`` instruction. Again, this case does not +happen on well-typed programs, as explained in the next section. + +Format of patterns +~~~~~~~~~~~~~~~~~~ + +Code patterns are of one of the following syntactical forms. + +- ``INSTR`` (an uppercase identifier) is a simple instruction (e.g. + ``DROP``); +- ``INSTR (arg) ...`` is a compound instruction, whose arguments can be + code, data or type patterns (e.g. ``PUSH nat 3``) ; +- ``{ (instr) ; ... }`` is a possibly empty sequence of instructions, + (e.g. ``IF { SWAP ; DROP } { DROP }``), nested sequences can drop the + braces ; +- ``name`` is a pattern that matches any program and names a part of + the matched program that can be used to build the result ; +- ``_`` is a pattern that matches any instruction. + +Stack patterns are of one of the following syntactical forms. + +- ``[FAIL]`` is the special failed state ; +- ``[]`` is the empty stack ; +- ``(top) : (rest)`` is a stack whose top element is matched by the + data pattern ``(top)`` on the left, and whose remaining elements are + matched by the stack pattern ``(rest)`` on the right (e.g. + ``x : y : rest``) ; +- ``name`` is a pattern that matches any stack and names it in order to + use it to build the result ; +- ``_`` is a pattern that matches any stack. + +Data patterns are of one of the following syntactical forms. + +- integer/natural number literals, (e.g. ``3``) ; +- string literals, (e.g. ``"contents"``) ; +- ``Tag`` (capitalized) is a symbolic constant, (e.g. ``Unit``, + ``True``, ``False``) ; +- ``(Tag (arg) ...)`` tagged constructed data, (e.g. ``(Pair 3 4)``) ; +- a code pattern for first class code values ; +- ``name`` to name a value in order to use it to build the result ; +- ``_`` to match any value. + +The domain of instruction names, symbolic constants and data +constructors is fixed by this specification. Michelson does not let the +programmer introduce its own types. + +Be aware that the syntax used in the specification may differ a bit from +the concrete syntax, which is presented in Section IX. In particular, +some instructions are annotated with types that are not present in the +concrete language because they are synthesized by the typechecker. + +Shortcuts +~~~~~~~~~ + +Sometimes, it is easier to think (and shorter to write) in terms of +program rewriting than in terms of big step semantics. When it is the +case, and when both are equivalents, we write rules of the form: + +:: + + p / S => S'' + where p' / S' => S'' + +using the following shortcut: + +:: + + p / S => p' / S' + +The concrete language also has some syntax sugar to group some common +sequences of operations as one. This is described in this specification +using a simple regular expression style recursive instruction rewriting. + +II - Introduction to the type system and notations +-------------------------------------------------- + +This specification describes a type system for Michelson. To make things +clear, in particular to readers that are not accustomed to reading +formal programming language specifications, it does not give a +typechecking or inference algorithm. It only gives an intentional +definition of what we consider to be well-typed programs. For each +syntactical form, it describes the stacks that are considered well-typed +inputs, and the resulting outputs. + +The type system is sound, meaning that if a program can be given a type, +then if run on a well-typed input stack, the interpreter will never +apply an interpretation rule on a stack of unexpected length or +contents. Also, it will never reach a state where it cannot select an +appropriate rule to continue the execution. Well-typed programs do not +block, and do not go wrong. + +Type notations +~~~~~~~~~~~~~~ + +The specification introduces notations for the types of values, terms +and stacks. Apart from a subset of value types that appear in the form +of type annotations in some places throughout the language, it is +important to understand that this type language only exists in the +specification. + +A stack type can be written: + +- ``[]`` for the empty stack ; +- ``(top) : (rest)`` for the stack whose first value has type ``(top)`` + and queue has stack type ``(rest)``. + +Instructions, programs and primitives of the language are also typed, +their types are written: + +:: + + (type of stack before) -> (type of stack after) + +The types of values in the stack are written: + +- ``identifier`` for a primitive data-type (e.g. ``bool``), +- ``identifier (arg)`` for a parametric data-type with one parameter + type ``(arg)`` (e.g. ``list nat``), +- ``identifier (arg) ...`` for a parametric data-type with several + parameters (e.g. ``map string int``), +- ``[ (type of stack before) -> (type of stack after) ]`` for a code + quotation, (e.g. ``[ int : int : [] -> int : [] ]``), +- ``lambda (arg) (ret)`` is a shortcut for + ``[ (arg) : [] -> (ret) : [] ]``. + +Meta type variables +~~~~~~~~~~~~~~~~~~~ + +The typing rules introduce meta type variables. To be clear, this has +nothing to do with polymorphism, which Michelson does not have. These +variables only live at the specification level, and are used to express +the consistency between the parts of the program. For instance, the +typing rule for the ``IF`` construct introduces meta variables to +express that both branches must have the same type. + +Here are the notations for meta type variables: + +- ``'a`` for a type variable, +- ``'A`` for a stack type variable, +- ``_`` for an anonymous type or stack type variable. + +Typing rules +~~~~~~~~~~~~ + +The system is syntax directed, which means here that it defines a single +typing rule for each syntax construct. A typing rule restricts the type +of input stacks that are authorized for this syntax construct, links the +output type to the input type, and links both of them to the +subexpressions when needed, using meta type variables. + +Typing rules are of the form: + +:: + + (syntax pattern) + :: (type of stack before) -> (type of stack after) [rule-name] + iff (premises) + +Where premises are typing requirements over subprograms or values in the +stack, both of the form ``(x) :: (type)``, meaning that value ``(x)`` +must have type ``(type)``. + +A program is shown well-typed if one can find an instance of a rule that +applies to the toplevel program expression, with all meta type variables +replaced by non variable type expressions, and of which all type +requirements in the premises can be proven well-typed in the same +manner. For the reader unfamiliar with formal type systems, this is +called building a typing derivation. + +Here is an example typing derivation on a small program that computes +``(x+5)*10`` for a given input ``x``, obtained by instantiating the +typing rules for instructions ``PUSH``, ``ADD`` and for the sequence, as +found in the next sections. When instantiating, we replace the ``iff`` +with ``by``. + +:: + + { PUSH nat 5 ; ADD ; PUSH nat 10 ; SWAP ; MUL } + :: [ nat : [] -> nat : [] ] + by { PUSH nat 5 ; ADD } + :: [ nat : [] -> nat : [] ] + by PUSH nat 5 + :: [ nat : [] -> nat : nat : [] ] + by 5 :: nat + and ADD + :: [ nat : nat : [] -> nat : [] ] + and { PUSH nat 10 ; SWAP ; MUL } + :: [ nat : [] -> nat : [] ] + by PUSH nat 10 + :: [ nat : [] -> nat : nat : [] ] + by 10 :: nat + and { SWAP ; MUL } + :: [ nat : nat : [] -> nat : [] ] + by SWAP + :: [ nat : nat : [] -> nat : nat : [] ] + and MUL + :: [ nat : nat : [] -> nat : [] ] + +Producing such a typing derivation can be done in a number of manners, +such as unification or abstract interpretation. In the implementation of +Michelson, this is done by performing a recursive symbolic evaluation of +the program on an abstract stack representing the input type provided by +the programmer, and checking that the resulting symbolic stack is +consistent with the expected result, also provided by the programmer. + +Annotations +~~~~~~~~~~~ + +Most Instructions in the language can optionally take an annotation. +Annotations allow you to better track data, on the stack and within +pairs and unions. + +If added on the components of a type, the annotation will be propagated +by the typechecker througout access instructions. + +Annotating an instruction that produces a value on the stack will +rewrite the annotation an the toplevel of its type. + +Trying to annotate an instruction that does not produce a value will +result in a typechecking error. + +At join points in the program (``IF``, ``IF_LEFT``, ``IF_CONS``, +``IF_NONE``, ``LOOP``), annotations must be compatible. Annotations are +compatible if both elements are annotated with the same annotation or if +at least one of the values/types is unannotated. + +Stack visualization tools like the Michelson’s Emacs mode print +annotations associated with each type in the program, as propagated by +the typechecker. This is useful as a debugging aid. + +Side note +~~~~~~~~~ + +As with most type systems, it is incomplete. There are programs that +cannot be given a type in this type system, yet that would not go wrong +if executed. This is a necessary compromise to make the type system +usable. Also, it is important to remember that the implementation of +Michelson does not accept as many programs as the type system describes +as well-typed. This is because the implementation uses a simple single +pass typechecking algorithm, and does not handle any form of +polymorphism. + +III - Core data types and notations +----------------------------------- + +- ``string``, ``nat``, ``int``: The core primitive constant types. + +- ``bool``: The type for booleans whose values are ``True`` and + ``False`` + +- ``unit``: The type whose only value is ``Unit``, to use as a + placeholder when some result or parameter is non necessary. For + instance, when the only goal of a contract is to update its storage. + +- ``list (t)``: A single, immutable, homogeneous linked list, whose + elements are of type ``(t)``, and that we note ``{}`` for the empty + list or ``{ first ; ... }``. In the semantics, we use chevrons to + denote a subsequence of elements. For instance ``{ head ; }``. + +- ``pair (l) (r)``: A pair of values ``a`` and ``b`` of types ``(l)`` + and ``(r)``, that we write ``(Pair a b)``. + +- ``option (t)``: Optional value of type ``(t)`` that we note ``None`` + or ``(Some v)``. + +- ``or (l) (r)``: A union of two types: a value holding either a value + ``a`` of type ``(l)`` or a value ``b`` of type ``(r)``, that we write + ``(Left a)`` or ``(Right b)``. + +- ``set (t)``: Immutable sets of values of type ``(t)`` that we note as + lists ``{ item ; ... }``, of course with their elements unique, and + sorted. + +- ``map (k) (t)``: Immutable maps from keys of type ``(k)`` of values + of type ``(t)`` that we note ``{ Elt key value ; ... }``, with keys + sorted. + +IV - Core instructions +---------------------- + +Control structures +~~~~~~~~~~~~~~~~~~ + +- ``FAIL``: Explicitly abort the current program. + + :: \_ -> \_ + + This special instruction is callable in any context, since it does + not use its input stack (first rule below), and makes the output + useless since all subsequent instruction will simply ignore their + usual semantics to propagate the failure up to the main result + (second rule below). Its type is thus completely generic. + +:: + + > FAIL / _ => [FAIL] + > _ / [FAIL] => [FAIL] + +- ``{ I ; C }``: Sequence. + +:: + + :: 'A -> 'C + iff I :: [ 'A -> 'B ] + C :: [ 'B -> 'C ] + + > I ; C / SA => SC + where I / SA => SB + and C / SB => SC + +- ``IF bt bf``: Conditional branching. + +:: + + :: bool : 'A -> 'B + iff bt :: [ 'A -> 'B ] + bf :: [ 'A -> 'B ] + + > IF bt bf / True : S => bt / S + > IF bt bf / False : S => bf / S + +- ``LOOP body``: A generic loop. + +:: + + :: bool : 'A -> 'A + iff body :: [ 'A -> bool : 'A ] + + > LOOP body / True : S => body ; LOOP body / S + > LOOP body / False : S => S + +- ``LOOP_LEFT body``: A loop with an accumulator + +:: + + :: (or 'a 'b) : 'A -> 'A + iff body :: [ 'a : 'A -> (or 'a 'b) : 'A ] + + > LOOP body / (Left a) : S => body ; LOOP body / (or 'a 'b) : S + > LOOP body / (Right b) : S => b : S + +- ``DIP code``: Runs code protecting the top of the stack. + +:: + + :: 'b : 'A -> 'b : 'C + iff code :: [ 'A -> 'C ] + + > DIP code / x : S => x : S' + where code / S => S' + +- ``EXEC``: Execute a function from the stack. + +:: + + :: 'a : lambda 'a 'b : 'C -> 'b : 'C + + > EXEC / a : f : S => r : S + where f / a : [] => r : [] + +Stack operations +~~~~~~~~~~~~~~~~ + +- ``DROP``: Drop the top element of the stack. + +:: + + :: _ : 'A -> 'A + + > DROP / _ : S => S + +- ``DUP``: Duplicate the top of the stack. + +:: + + :: 'a : 'A -> 'a : 'a : 'A + + > DUP / x : S => x : x : S + +- ``SWAP``: Exchange the top two elements of the stack. + +:: + + :: 'a : 'b : 'A -> 'b : 'a : 'A + + > SWAP / x : y : S => y : x : S + +- ``PUSH 'a x``: Push a constant value of a given type onto the stack. + +:: + + :: 'A -> 'a : 'A + iff x :: 'a + + > PUSH 'a x / S => x : S + +- ``UNIT``: Push a unit value onto the stack. + +:: + + :: 'A -> unit : 'A + + > UNIT / S => Unit : S + +- ``LAMBDA 'a 'b code``: Push a lambda with given parameter and return + types onto the stack. + +:: + + :: 'A -> (lambda 'a 'b) : 'A + +Generic comparison +~~~~~~~~~~~~~~~~~~ + +Comparison only works on a class of types that we call comparable. A +``COMPARE`` operation is defined in an ad hoc way for each comparable +type, but the result of compare is always an ``int``, which can in turn +be checked in a generic manner using the following combinators. The +result of ``COMPARE`` is ``0`` if the top two elements of the stack are +equal, negative if the first element in the stack is less than the +second, and positive otherwise. + +- ``EQ``: Checks that the top of the stack EQuals zero. + +:: + + :: int : 'S -> bool : 'S + + > EQ ; C / 0 : S => C / True : S + > EQ ; C / _ : S => C / False : S + +- ``NEQ``: Checks that the top of the stack does Not EQual zero. + +:: + + :: int : 'S -> bool : 'S + + > NEQ ; C / 0 : S => C / False : S + > NEQ ; C / _ : S => C / True : S + +- ``LT``: Checks that the top of the stack is Less Than zero. + +:: + + :: int : 'S -> bool : 'S + + > LT ; C / v : S => C / True : S iff v < 0 + > LT ; C / _ : S => C / False : S + +- ``GT``: Checks that the top of the stack is Greater Than zero. + +:: + + :: int : 'S -> bool : 'S + + > GT ; C / v : S => C / True : S iff v > 0 + > GT ; C / _ : S => C / False : S + +- ``LE``: Checks that the top of the stack is Less Than of Equal to + zero. + +:: + + :: int : 'S -> bool : 'S + + > LE ; C / v : S => C / True : S iff v <= 0 + > LE ; C / _ : S => C / False : S + +- ``GE``: Checks that the top of the stack is Greater Than of Equal to + zero. + +:: + + :: int : 'S -> bool : 'S + + > GE ; C / v : S => C / True : S iff v >= 0 + > GE ; C / _ : S => C / False : S + +V - Operations +-------------- + +Operations on booleans +~~~~~~~~~~~~~~~~~~~~~~ + +- ``OR`` + +:: + + :: bool : bool : 'S -> bool : 'S + + > OR ; C / x : y : S => C / (x | y) : S + +- ``AND`` + +:: + + :: bool : bool : 'S -> bool : 'S + + > AND ; C / x : y : S => C / (x & y) : S + +- ``XOR`` + +:: + + :: bool : bool : 'S -> bool : 'S + + > XOR ; C / x : y : S => C / (x ^ y) : S + +- ``NOT`` + +:: + + :: bool : 'S -> bool : 'S + + > NOT ; C / x : S => C / ~x : S + +Operations on integers and natural numbers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Integers and naturals are arbitrary-precision, meaning the only size +limit is fuel. + +- ``NEG`` + +:: + + :: int : 'S -> int : 'S + :: nat : 'S -> int : 'S + + > NEG ; C / x : S => C / -x : S + +- ``ABS`` + +:: + + :: int : 'S -> nat : 'S + + > ABS ; C / x : S => C / abs (x) : S + +- ``ADD`` + +:: + + :: int : int : 'S -> int : 'S + :: int : nat : 'S -> int : 'S + :: nat : int : 'S -> int : 'S + :: nat : nat : 'S -> nat : 'S + + > ADD ; C / x : y : S => C / (x + y) : S + +- ``SUB`` + +:: + + :: int : int : 'S -> int : 'S + :: int : nat : 'S -> int : 'S + :: nat : int : 'S -> int : 'S + :: nat : nat : 'S -> int : 'S + + > SUB ; C / x : y : S => C / (x - y) : S + +- ``MUL`` + +:: + + :: int : int : 'S -> int : 'S + :: int : nat : 'S -> int : 'S + :: nat : int : 'S -> int : 'S + :: nat : nat : 'S -> nat : 'S + + > MUL ; C / x : y : S => C / (x * y) : S + +- ``EDIV`` Perform Euclidian division + +:: + + :: int : int : 'S -> option (pair int nat) : 'S + :: int : nat : 'S -> option (pair int nat) : 'S + :: nat : int : 'S -> option (pair int nat) : 'S + :: nat : nat : 'S -> option (pair nat nat) : 'S + + > EDIV ; C / x : 0 : S => C / None + > EDIV ; C / x : y : S => C / Some (Pair (x / y) (x % y)) : S + +Bitwise logical operators are also available on unsigned integers. + +- ``OR`` + +:: + + :: nat : nat : 'S -> nat : 'S + + > OR ; C / x : y : S => C / (x | y) : S + +- ``AND`` + +:: + + :: nat : nat : 'S -> nat : 'S + + > AND ; C / x : y : S => C / (x & y) : S + +- ``XOR`` + +:: + + :: nat : nat : 'S -> nat : 'S + + > XOR ; C / x : y : S => C / (x ^ y) : S + +- ``NOT`` The return type of ``NOT`` is an ``int`` and not a ``nat``. + This is because the sign is also negated. The resulting integer is + computed using two’s complement. For instance, the boolean negation + of ``0`` is ``-1``. + +:: + + :: nat : 'S -> int : 'S + :: int : 'S -> int : 'S + + > NOT ; C / x : S => C / ~x : S + +- ``LSL`` + +:: + + :: nat : nat : 'S -> nat : 'S + + > LSL ; C / x : s : S => C / (x << s) : S + iff s <= 256 + > LSL ; C / x : s : S => [FAIL] + +- ``LSR`` + +:: + + :: nat : nat : 'S -> nat : 'S + + > LSR ; C / x : s : S => C / (x >>> s) : S + +- ``COMPARE``: Integer/natural comparison + +:: + + :: int : int : 'S -> int : 'S + :: nat : nat : 'S -> int : 'S + + > COMPARE ; C / x : y : S => C / -1 : S iff x < y + > COMPARE ; C / x : y : S => C / 0 : S iff x = y + > COMPARE ; C / x : y : S => C / 1 : S iff x > y + +Operations on strings +~~~~~~~~~~~~~~~~~~~~~ + +Strings are mostly used for naming things without having to rely on +external ID databases. So what can be done is basically use string +constants as is, concatenate them and use them as keys. + +- ``CONCAT``: String concatenation. + +:: + + :: string : string : 'S -> string : 'S + +- ``COMPARE``: Lexicographic comparison. + +:: + + :: string : string : 'S -> int : 'S + +Operations on pairs +~~~~~~~~~~~~~~~~~~~ + +- ``PAIR``: Build a pair from the stack’s top two elements. + +:: + + :: 'a : 'b : 'S -> pair 'a 'b : 'S + + > PAIR ; C / a : b : S => C / (Pair a b) : S + +- ``CAR``: Access the left part of a pair. + +:: + + :: pair 'a _ : 'S -> 'a : 'S + + > Car ; C / (Pair a _) : S => C / a : S + +- ``CDR``: Access the right part of a pair. + +:: + + :: pair _ 'b : 'S -> 'b : 'S + + > Car ; C / (Pair _ b) : S => C / b : S + +Operations on sets +~~~~~~~~~~~~~~~~~~ + +- ``EMPTY_SET 'elt``: Build a new, empty set for elements of a given + type. + +:: + + :: 'S -> set 'elt : 'S + + The `'elt` type must be comparable (the `COMPARE` primitive must + be defined over it). + +- ``MEM``: Check for the presence of an element in a set. + +:: + + :: 'elt : set 'elt : 'S -> bool : 'S + +- ``UPDATE``: Inserts or removes an element in a set, replacing a + previous value. + +:: + + :: 'elt : bool : set 'elt : 'S -> set 'elt : 'S + +- ``MAP``: Apply a function on a map and return the map of results + under the same bindings. + + The ``'b`` type must be comparable (the ``COMPARE`` primitive must be + defined over it). + +:: + + :: lambda 'elt 'b : set 'elt : 'S -> set 'b : 'S + +- ``MAP body``: Apply the body expression to each element of the set. + The body sequence has access to the stack. + +:: + + :: (set 'elt) : 'A -> (set 'b) : 'A + iff body :: [ 'elt : 'A -> 'b : 'A ] + +- ``REDUCE``: Apply a function on a set passing the result of each + application to the next one and return the last. + +:: + + :: lambda (pair 'elt * 'b) 'b : set 'elt : 'b : 'S -> 'b : 'S + +- ``ITER body``: Apply the body expression to each element of a set. + The body sequence has access to the stack. + +:: + + :: (set 'elt) : 'A -> 'A + iff body :: [ 'elt : 'A -> 'A ] + +- ``SIZE``: Get the cardinality of the set. + +:: + + :: set 'elt : 'S -> nat : 'S + +Operations on maps +~~~~~~~~~~~~~~~~~~ + +- ``EMPTY_MAP 'key 'val``: Build a new, empty map. + + The ``'key`` type must be comparable (the ``COMPARE`` primitive must + be defined over it). + +:: + + :: 'S -> map 'key 'val : 'S + +- ``GET``: Access an element in a map, returns an optional value to be + checked with ``IF_SOME``. + +:: + + :: 'key : map 'key 'val : 'S -> option 'val : 'S + +- ``MEM``: Check for the presence of an element in a map. + +:: + + :: 'key : map 'key 'val : 'S -> bool : 'S + +- ``UPDATE``: Assign or remove an element in a map. + +:: + + :: 'key : option 'val : map 'key 'val : 'S -> map 'key 'val : 'S + +- ``MAP``: Apply a function on a map and return the map of results + under the same bindings. + +:: + + :: lambda (pair 'key 'val) 'b : map 'key 'val : 'S -> map 'key 'b : 'S + +- ``MAP body``: Apply the body expression to each element of a map. The + body sequence has access to the stack. + +:: + + :: (map 'key 'val) : 'A -> (map 'key 'b) : 'A + iff body :: [ (pair 'key 'val) : 'A -> 'b : 'A ] + +- ``REDUCE``: Apply a function on a map passing the result of each + application to the next one and return the last. + +:: + + :: lambda (pair (pair 'key 'val) 'b) 'b : map 'key 'val : 'b : 'S -> 'b : 'S + +- ``ITER body``: Apply the body expression to each element of a map. + The body sequence has access to the stack. + +:: + + :: (map 'elt 'val) : 'A -> 'A + iff body :: [ (pair 'elt 'val) : 'A -> 'A ] + +- ``SIZE``: Get the cardinality of the map. + +:: + + :: map 'key 'val : 'S -> nat : 'S + +Operations on optional values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- ``SOME``: Pack a present optional value. + +:: + + :: 'a : 'S -> 'a? : 'S + + > SOME ; C / v :: S => C / (Some v) :: S + +- ``NONE 'a``: The absent optional value. + +:: + + :: 'S -> 'a? : 'S + + > NONE ; C / v :: S => C / None :: S + +- ``IF_NONE bt bf``: Inspect an optional value. + +:: + + :: 'a? : 'S -> 'b : 'S + iff bt :: [ 'S -> 'b : 'S] + bf :: [ 'a : 'S -> 'b : 'S] + + > IF_NONE ; C / (None) : S => bt ; C / S + > IF_NONE ; C / (Some a) : S => bf ; C / a : S + +Operations on unions +~~~~~~~~~~~~~~~~~~~~ + +- ``LEFT 'b``: Pack a value in a union (left case). + +:: + + :: 'a : 'S -> or 'a 'b : 'S + + > LEFT ; C / v :: S => C / (Left v) :: S + +- ``RIGHT 'a``: Pack a value in a union (right case). + +:: + + :: 'b : 'S -> or 'a 'b : 'S + + > RIGHT ; C / v :: S => C / (Right v) :: S + +- ``IF_LEFT bt bf``: Inspect a value of a variant type. + +:: + + :: or 'a 'b : 'S -> 'c : 'S + iff bt :: [ 'a : 'S -> 'c : 'S] + bf :: [ 'b : 'S -> 'c : 'S] + + > IF_LEFT ; C / (Left a) : S => bt ; C / a : S + > IF_LEFT ; C / (Right b) : S => bf ; C / b : S + +- ``IF_RIGHT bt bf``: Inspect a value of a variant type. + +:: + + :: or 'a 'b : 'S -> 'c : 'S + iff bt :: [ 'b : 'S -> 'c : 'S] + bf :: [ 'a : 'S -> 'c : 'S] + + > IF_LEFT ; C / (Right b) : S => bt ; C / b : S + > IF_RIGHT ; C / (Left a) : S => bf ; C / a : S + +Operations on lists +~~~~~~~~~~~~~~~~~~~ + +- ``CONS``: Prepend an element to a list. + +:: + + :: 'a : list 'a : 'S -> list 'a : 'S + + > CONS ; C / a : { } : S => C / { a ; } : S + +- ``NIL 'a``: The empty list. + +:: + + :: 'S -> list 'a : 'S + + > NIL ; C / S => C / {} : S + +- ``IF_CONS bt bf``: Inspect an optional value. + +:: + + :: list 'a : 'S -> 'b : 'S + iff bt :: [ 'a : list 'a : 'S -> 'b : 'S] + bf :: [ 'S -> 'b : 'S] + + > IF_CONS ; C / { a ; } : S => bt ; C / a : { } : S + > IF_CONS ; C / {} : S => bf ; C / S + +- ``MAP``: Apply a function on a list from left to right and return the + list of results in the same order. + +:: + + :: lambda 'a 'b : list 'a : 'S -> list 'b : 'S + +- ``MAP body``: Apply the body expression to each element of the list. + The body sequence has access to the stack. + +:: + + :: (list 'elt) : 'A -> (list 'b) : 'A + iff body :: [ 'elt : 'A -> 'b : 'A ] + +- ``REDUCE``: Apply a function on a list from left to right passing the + result of each application to the next one and return the last. + +:: + + :: lambda (pair 'a 'b) 'b : list 'a : 'b : 'S -> 'b : 'S + +- ``SIZE``: Get the number of elements in the list. + +:: + + :: list 'elt : 'S -> nat : 'S + +- ``ITER body``: Apply the body expression to each element of a list. + The body sequence has access to the stack. + +:: + + :: (list 'elt) : 'A -> 'A + iff body :: [ 'elt : 'A -> 'A ] + +VI - Domain specific data types +------------------------------- + +- ``timestamp``: Dates in the real world. + +- ``tez``: A specific type for manipulating tokens. + +- ``contract 'param 'result``: A contract, with the type of its code. + +- ``key``: A public cryptography key. + +- ``key_hash``: The hash of a public cryptography key. + +- ``signature``: A cryptographic signature. + +VII - Domain specific operations +-------------------------------- + +Operations on timestamps +~~~~~~~~~~~~~~~~~~~~~~~~ + +Current Timestamps can be obtained by the ``NOW`` operation, or +retrieved from script parameters or globals. + +- ``ADD`` Increment / decrement a timestamp of the given number of + seconds. + +:: + + :: timestamp : int : 'S -> timestamp : 'S + :: int : timestamp : 'S -> timestamp : 'S + + > ADD ; C / seconds : nat (t) : S => C / (seconds + t) : S + > ADD ; C / nat (t) : seconds : S => C / (t + seconds) : S + +- ``SUB`` Subtract a number of seconds from a timestamp. + +:: + + :: timestamp : int : 'S -> timestamp : 'S + + > SUB ; C / seconds : nat (t) : S => C / (seconds - t) : S + +- ``SUB`` Subtract two timestamps. + +:: + + :: timestamp : timestamp : 'S -> int : 'S + + > SUB ; C / seconds(t1) : seconds(t2) : S => C / (t1 - t2) : S + +- ``COMPARE``: Timestamp comparison. + +:: + + :: timestamp : timestamp : 'S -> int : 'S + +Operations on Tez +~~~~~~~~~~~~~~~~~ + +Tez are internally represented by a 64 bit signed integer. There are +restrictions to prevent creating a negative amount of tez. Operations +are limited to prevent overflow and mixing them with other numerical +types by mistake. They are also mandatory checked for under/overflows. + +- ``ADD``: + +:: + + :: tez : tez : 'S -> tez : 'S + + > ADD ; C / x : y : S => [FAIL] on overflow + > ADD ; C / x : y : S => C / (x + y) : S + +- ``SUB``: + +:: + + :: tez : tez : 'S -> tez : 'S + + > SUB ; C / x : y : S => [FAIL] iff x < y + > SUB ; C / x : y : S => C / (x - y) : S + +- ``MUL`` + +:: + + :: tez : nat : 'S -> tez : 'S + :: nat : tez : 'S -> tez : 'S + + > MUL ; C / x : y : S => [FAIL] on overflow + > MUL ; C / x : y : S => C / (x * y) : S + +- ``EDIV`` + +:: + + :: tez : nat : 'S -> option (pair tez tez) : 'S + :: tez : tez : 'S -> option (pair nat tez) : 'S + + > EDIV ; C / x : 0 : S => C / None + > EDIV ; C / x : y : S => C / Some (Pair (x / y) (x % y)) : S + +- ``COMPARE``: + + :: tez : tez : ’S -> int : ’S + +Operations on contracts +~~~~~~~~~~~~~~~~~~~~~~~ + +- ``MANAGER``: Access the manager of a contract. + +:: + + :: contract 'p 'r : 'S -> key_hash : 'S + +- ``CREATE_CONTRACT``: Forge a new contract. + +:: + + :: key_hash : key_hash? : bool : bool : tez : lambda (pair 'p 'g) (pair 'r 'g) : 'g : 'S + -> contract 'p 'r : 'S + +As with non code-emitted originations the contract code takes as +argument the transferred amount plus an ad-hoc argument and returns an +ad-hoc value. The code also takes the global data and returns it to be +stored and retrieved on the next transaction. These data are initialized +by another parameter. The calling convention for the code is as follows: +``(Pair arg globals)) -> (Pair ret globals)``, as extrapolatable from +the instruction type. The first parameters are the manager, optional +delegate, then spendable and delegatable flags and finally the initial +amount taken from the currently executed contract. The contract is +returned as a first class value to be called immediately or stored. + +- ``CREATE_CONTRACT { storage 'g ; parameter 'p ; return 'r ; code ... }``: + Forge a new contract from a literal. + +:: + + :: key_hash : key_hash? : bool : bool : tez : 'g : 'S + -> contract 'p 'r : 'S + +Originate a contract based on a literal. This is currently the only way +to include transfers inside of an originated contract. The first +parameters are the manager, optional delegate, then spendable and +delegatable flags and finally the initial amount taken from the +currently executed contract. The contract is returned as a first class +value to be called immediately or stored. + +- ``CREATE_ACCOUNT``: Forge an account (a contract without code). + +:: + + :: key_hash : key_hash? : bool : tez : 'S -> contract unit unit : 'S + +Take as argument the manager, optional delegate, the delegatable flag +and finally the initial amount taken from the currently executed +contract. + +- ``TRANSFER_TOKENS``: Forge and evaluate a transaction. + +:: + + :: 'p : tez : contract 'p 'r : 'g : [] -> 'r : 'g : [] + +The parameter and return value must be consistent with the ones expected +by the contract, unit for an account. To preserve the global consistency +of the system, the current contract’s storage must be updated before +passing the control to another script. For this, the script must put the +partially updated storage on the stack (’g is the type of the contract’s +storage). If a recursive call to the current contract happened, the +updated storage is put on the stack next to the return value. Nothing +else can remain on the stack during a nested call. If some local values +have to be kept for after the nested call, they have to be stored +explicitly in a transient part of the storage. A trivial example of that +is to reserve a boolean in the storage, initialized to false, reset to +false at the end of each contract execution, and set to true during a +nested call. This thus gives an easy way for a contract to prevent +recursive call (the contract just fails if the boolean is true). + +- ``BALANCE``: Push the current amount of tez of the current contract. + +:: + + :: 'S -> tez :: 'S + +- ``SOURCE 'p 'r``: Push the source contract of the current + transaction. + +:: + + :: 'S -> contract 'p 'r :: 'S + +- ``SELF``: Push the current contract. + +:: + + :: 'S -> contract 'p 'r :: 'S + where contract 'p 'r is the type of the current contract + +- ``AMOUNT``: Push the amount of the current transaction. + +:: + + :: 'S -> tez :: 'S + +- ``DEFAULT_ACCOUNT``: Return a default contract with the given + public/private key pair. Any funds deposited in this contract can + immediately be spent by the holder of the private key. This contract + cannot execute Michelson code and will always exist on the + blockchain. + +:: + + :: key_hash : 'S -> contract unit unit :: 'S + +Special operations +~~~~~~~~~~~~~~~~~~ + +- ``STEPS_TO_QUOTA``: Push the remaining steps before the contract + execution must terminate. + +:: + + :: 'S -> nat :: 'S + +- ``NOW``: Push the timestamp of the block whose validation triggered + this execution (does not change during the execution of the + contract). + +:: + + :: 'S -> timestamp :: 'S + +Cryptographic primitives +~~~~~~~~~~~~~~~~~~~~~~~~ + +- ``HASH_KEY``: Compute the b58check of a public key. + +:: + + :: key : 'S -> key_hash : 'S + +- ``H``: Compute a cryptographic hash of the value contents using the + Blake2B cryptographic hash function. + +:: + + :: 'a : 'S -> string : 'S + +- ``CHECK_SIGNATURE``: Check that a sequence of bytes has been signed + with a given key. + +:: + + :: key : pair signature string : 'S -> bool : 'S + +- ``COMPARE``: + +:: + + :: key_hash : key_hash : 'S -> int : 'S + +VIII - Macros +------------- + +In addition to the operations above, several extensions have been added +to the language’s concrete syntax. If you are interacting with the node +via RPC, bypassing the client, which expands away these macros, you will +need to de-surgar them yourself. + +These macros are designed to be unambiguous and reversible, meaning that +errors are reported in terms of de-sugared syntax. Below you’ll see +these macros defined in terms of other syntactic forms. That is how +these macros are seen by the node. + +Compare +~~~~~~~ + +Syntactic sugar exists for merging ``COMPARE`` and comparison +combinators, and also for branching. + +- ``CMP{EQ|NEQ|LT|GT|LE|GE}`` + +:: + + > CMP(\op) ; C / S => COMPARE ; (\op) ; C / S + +- ``IF{EQ|NEQ|LT|GT|LE|GE} bt bf`` + +:: + + > IF(\op) ; C / S => (\op) ; IF bt bf ; C / S + +- ``IFCMP{EQ|NEQ|LT|GT|LE|GE} bt bf`` + +:: + + > IFCMP(\op) ; C / S => COMPARE ; (\op) ; IF bt bf ; C / S + +Assertion Macros +~~~~~~~~~~~~~~~~ + +All assertion operations are syntactic sugar for conditionals with a +``FAIL`` instruction in the appropriate branch. When possible, use them +to increase clarity about illegal states. + +- ``ASSERT``: + +:: + + > IF {} {FAIL} + +- ``ASSERT_{EQ|NEQ|LT|LE|GT|GE}``: + +:: + + > ASSERT_(\op) => IF(\op) {} {FAIL} + +- ``ASSERT_CMP{EQ|NEQ|LT|LE|GT|GE}``: + +:: + + > ASSERT_CMP(\op) => IFCMP(\op) {} {FAIL} + +- ``ASSERT_NONE``: Equivalent to \``. + +:: + + > ASSERT_NONE => IF_NONE {} {FAIL} + +- ``ASSERT_SOME``: Equivalent to ``IF_NONE {FAIL} {}``. + +:: + + > ASSERT_NONE => IF_NONE {FAIL} {} + +- ``ASSERT_LEFT``: + +:: + + > ASSERT_LEFT => IF_LEFT {} {FAIL} + +- ``ASSERT_RIGHT``: + +:: + + > ASSERT_RIGHT => IF_LEFT {FAIL} {} + +Syntactic Conveniences +~~~~~~~~~~~~~~~~~~~~~~ + +These are macros are simply more convenient syntax for various common +operations. + +- ``DII+P code``: A syntactic sugar for working deeper in the stack. + +:: + + > DII(\rest=I*)P code / S => DIP (DI(\rest)P code) / S + +- ``DUU+P``: A syntactic sugar for duplicating the ``n``\ th element of + the stack. + +:: + + > DUU(\rest=U*)P / S => DIP (DU(\rest)P) ; SWAP / S + +- ``P(A*AI)+R``: A syntactic sugar for building nested pairs in bulk. + +:: + + > P(\fst=A*)AI(\rest=(A*AI)+)R ; C / S => P(\fst)AIR ; P(\rest)R ; C / S + > PA(\rest=A*)AIR ; C / S => DIP (P(\rest)AIR) ; C / S + +- ``C[AD]+R``: A syntactic sugar for accessing fields in nested pairs. + +:: + + > CA(\rest=[AD]+)R ; C / S => CAR ; C(\rest)R ; C / S + > CD(\rest=[AD]+)R ; C / S => CDR ; C(\rest)R ; C / S + +- ``IF_SOME bt bf``: Inspect an optional value. + +:: + + :: 'a? : 'S -> 'b : 'S + iff bt :: [ 'a : 'S -> 'b : 'S] + bf :: [ 'S -> 'b : 'S] + + > IF_SOME ; C / (Some a) : S => bt ; C / a : S + > IF_SOME ; C / (None) : S => bf ; C / S + +- ``SET_CAR``: Set the first value of a pair. + +:: + + > SET_CAR => CDR ; SWAP ; PAIR + +- ``SET_CDR``: Set the first value of a pair. + +:: + + > SET_CDR => CAR ; PAIR + +- ``SET_C[AD]+R``: A syntactic sugar for setting fields in nested + pairs. + +:: + + > SET_CA(\rest=[AD]+)R ; C / S => + { DUP ; DIP { CAR ; SET_C(\rest)R } ; CDR ; SWAP ; PAIR } ; C / S + > SET_CD(\rest=[AD]+)R ; C / S => + { DUP ; DIP { CDR ; SET_C(\rest)R } ; CAR ; PAIR } ; C / S + +- ``MAP_CAR`` code: Transform the first value of a pair. + +:: + + > SET_CAR => DUP ; CDR ; SWAP ; code ; CAR ; PAIR + +- ``MAP_CDR`` code: Transform the first value of a pair. + +:: + + > SET_CDR => DUP ; CDR ; code ; SWAP ; CAR ; PAIR + +- ``MAP_C[AD]+R`` code: A syntactic sugar for transforming fields in + nested pairs. + +:: + + > MAP_CA(\rest=[AD]+)R ; C / S => + { DUP ; DIP { CAR ; MAP_C(\rest)R code } ; CDR ; SWAP ; PAIR } ; C / S + > MAP_CD(\rest=[AD]+)R ; C / S => + { DUP ; DIP { CDR ; MAP_C(\rest)R code } ; CAR ; PAIR } ; C / S + +IX - Concrete syntax +-------------------- + +The concrete language is very close to the formal notation of the +specification. Its structure is extremely simple: an expression in the +language can only be one of the four following constructs. + +1. An integer. +2. A character string. +3. The application of a primitive to a sequence of expressions. +4. A sequence of expressions. + +This simple four cases notation is called Micheline. + +Constants +~~~~~~~~~ + +There are two kinds of constants: + +1. Integers or naturals in decimal (no prefix), hexadecimal (0x prefix), + octal (0o prefix) or binary (0b prefix). +2. Strings with usual escapes ``\n``, ``\t``, ``\b``, ``\r``, ``\\``, + ``\"``. The encoding of a Michelson source file must be UTF-8, and + non-ASCII characters can only appear in comments. No line break can + appear in a string. Any non-printable characters must be escaped + using two hexadecimal characters, as in ``\xHH`` or the + predefine escape sequences above.. + +Primitive applications +~~~~~~~~~~~~~~~~~~~~~~ + +A primitive application is a name followed by arguments + +:: + + prim arg1 arg2 + +When a primitive application is the argument to another primitive +application, it must be wrapped with parentheses. + +:: + + prim (prim1 arg11 arg12) (prim2 arg21 arg22) + +Sequences +~~~~~~~~~ + +Successive expression can be grouped as a single sequence expression +using curly braces as delimiters and semicolon as separators. + +:: + + { expr1 ; expr2 ; expr3 ; expr4 } + +A sequence can be passed as argument to a primitive. + +:: + + prim arg1 arg2 { arg3_expr1 ; arg3_expr2 } + +Primitive applications right inside a sequence cannot be wrapped. + +:: + + { (prim arg1 arg2) } # is not ok + +Indentation +~~~~~~~~~~~ + +To remove ambiguities for human readers, the parser enforces some +indentation rules. + +- For sequences: + + - All expressions in a sequence must be aligned on the same column. + - An exception is made when consecutive expressions fit on the same + line, as long as the first of them is correctly aligned. + - All expressions in a sequence must be indented to the right of the + opening curly brace by at least one column. + - The closing curly brace cannot be on the left of the opening one. + +- For primitive applications: + + - All arguments in an application must be aligned on the same + column. + - An exception is made when consecutive arguments fit on the same + line, as long as the first of them is correctly aligned. + - All arguments in a sequence must be indented to the right of the + primitive name by at least one column. + +.. _annotations-1: + +Annotations +~~~~~~~~~~~ + +Sequences and primitive applications can receive an annotation. + +An annotation is a lowercase identifier that starts with an ``@`` sign. +It comes after the opening curly brace for sequence, and after the +primitive name for primitive applications. + +:: + + { @annot + expr ; + expr ; + ... } + + (prim @annot arg arg ...) + +Differences with the formal notation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The concrete syntax follows the same lexical conventions as the +specification: instructions are represented by uppercase identifiers, +type constructors by lowercase identifiers, and constant constructors +are Capitalized. + +All domain specific constants are Micheline strings with specific +formats: + +- ``tez`` amounts are written using the same notation as JSON schemas + and the command line client: thousands are optionally separated by + commas, and so goes for mutez. + + - in regexp form: ``([0-9]{1,3}(,[0-9]{3})+)|[0-9]+(\.[0.9]{2})?`` + - ``"1234567"`` means 1234567 tez + - ``"1,234,567"`` means 1234567 tez + - ``"1234567.89"`` means 1234567890000 mutez + - ``"1,234,567.0"`` means 123456789 tez + - ``"10,123.456,789"`` means 10123456789 mutez + - ``"1234,567"`` is invalid + - ``"1,234,567.123456"`` is invalid + +- ``timestamp``\ s are written using ``RFC 339`` notation. +- ``contract``\ s are the raw strings returned by JSON RPCs or the + command line interface and cannot be forged by hand so their format + is of no interest here. +- ``key``\ s are ``Blake2B`` hashes of ``ed25519`` public keys encoded + in ``base58`` format with the following custom alphabet: + ``"eXMNE9qvHPQDdcFx5J86rT7VRm2atAypGhgLfbS3CKjnksB4"``. +- ``signature``\ s are ``ed25519`` signatures as a series of + hex-encoded bytes. + +To prevent errors, control flow primitives that take instructions as +parameters require sequences in the concrete syntax. + +:: + + IF { instr1_true ; instr2_true ; ... } + { instr1_false ; instr2_false ; ... } + +Main program structure +~~~~~~~~~~~~~~~~~~~~~~ + +The toplevel of a smart contract file must be an un-delimited sequence +of four primitive applications (in no particular order) that provide its +``parameter``, ``return`` and ``storage`` types, as well as its +``code``. + +See the next section for a concrete example. + +Comments +~~~~~~~~ + +A hash sign (``#``) anywhere outside of a string literal will make the +rest of the line (and itself) completely ignored, as in the following +example. + +:: + + { PUSH nat 1 ; # pushes 1 + PUSH nat 2 ; # pushes 2 + ADD } # computes 2 + 1 + +Comments that span on multiple lines or that stop before the end of the +line can also be written, using C-like delimiters (``/* ... */``). + +X - JSON syntax +--------------- + +Micheline expressions are encoded in JSON like this: + +- An integer ``N`` is an object with a single field ``"int"`` whose + valus is the decimal representation as a string. + + ``{ "int": "N" }`` + +- A string ``"contents"`` is an object with a single field ``"string"`` + whose valus is the decimal representation as a string. + + ``{ "string": "contents" }`` + +- A sequence is a JSON array. + + ``[ expr, ... ]`` + +- A primitive application is an object with two fields ``"prim"`` for + the primitive name and ``"args"`` for the arguments (that must + contain an array). A third optionnal field ``"annot"`` may contains + an annotation, including the ``@`` sign. + + { “prim”: “pair”, “args”: [ { “prim”: “nat”, args: [] }, { “prim”: + “nat”, args: [] } ], “annot”: “@numbers” }\` + +As in the concrete syntax, all domain specific constants are encoded as +strings. + +XI - Examples +------------- + +Contracts in the system are stored as a piece of code and a global data +storage. The type of the global data of the storage is fixed for each +contract at origination time. This is ensured statically by checking on +origination that the code preserves the type of the global data. For +this, the code of the contract is checked to be of the following type +lambda (pair ’arg ’global) -> (pair ’ret ’global) where ’global is the +type of the original global store given on origination. The contract +also takes a parameter and returns a value, hence the complete calling +convention above. + +Empty contract +~~~~~~~~~~~~~~ + +Any contract with the same ``parameter`` and ``return`` types may be +written with an empty sequence in its ``code`` section. The simplest +contract is the contract for which the ``parameter``, ``storage``, and +``return`` are all of type ``unit``. This contract is as follows: + +:: + + code {}; + storage unit; + parameter unit; + return unit; + +Reservoir contract +~~~~~~~~~~~~~~~~~~ + +We want to create a contract that stores tez until a timestamp ``T`` or +a maximum amount ``N`` is reached. Whenever ``N`` is reached before +``T``, all tokens are reversed to an account ``B`` (and the contract is +automatically deleted). Any call to the contract’s code performed after +``T`` will otherwise transfer the tokens to another account ``A``. + +We want to build this contract in a reusable manner, so we do not +hard-code the parameters. Instead, we assume that the global data of the +contract are ``(Pair (Pair T N) (Pair A B))``. + +Hence, the global data of the contract has the following type + +:: + + 'g = + pair + (pair timestamp tez) + (pair (contract unit unit) (contract unit unit)) + +Following the contract calling convention, the code is a lambda of type + +:: + + lambda + (pair unit 'g) + (pair unit 'g) + +written as + +:: + + lambda + (pair + unit + (pair + (pair timestamp tez) + (pair (contract unit unit) (contract unit unit)))) + (pair + unit + (pair + (pair timestamp tez) + (pair (contract unit unit) (contract unit unit)))) + +The complete source ``reservoir.tz`` is: + +:: + + parameter timestamp ; + storage + (pair + (pair timestamp tez) # T N + (pair (contract unit unit) (contract unit unit))) ; # A B + return unit ; + code + { DUP ; CDAAR ; # T + NOW ; + COMPARE ; LE ; + IF { DUP ; CDADR ; # N + BALANCE ; + COMPARE ; LE ; + IF { CDR ; UNIT ; PAIR } + { DUP ; CDDDR ; # B + BALANCE ; UNIT ; + DIIIP { CDR } ; + TRANSFER_TOKENS ; + PAIR } } + { DUP ; CDDAR ; # A + BALANCE ; + UNIT ; + DIIIP { CDR } ; + TRANSFER_TOKENS ; + PAIR } } + +Reservoir contract (variant with broker and status) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We basically want the same contract as the previous one, but instead of +destroying it, we want to keep it alive, storing a flag ``S`` so that we +can tell afterwards if the tokens have been transferred to ``A`` or +``B``. We also want a broker ``X`` to get some fee ``P`` in any case. + +We thus add variables ``P`` and ``S`` and ``X`` to the global data of +the contract, now +``(Pair (S, Pair (T, Pair (Pair P N) (Pair X (Pair A B)))))``. ``P`` is +the fee for broker ``A``, ``S`` is the state, as a string ``"open"``, +``"timeout"`` or ``"success"``. + +At the beginning of the transaction: + +:: + + S is accessible via a CDAR + T via a CDDAR + P via a CDDDAAR + N via a CDDDADR + X via a CDDDDAR + A via a CDDDDDAR + B via a CDDDDDDR + +For the contract to stay alive, we test that all least ``(Tez "1.00")`` +is still available after each transaction. This value is given as an +example and must be updated according to the actual Tezos minimal value +for contract balance. + +The complete source ``scrutable_reservoir.tz`` is: + +:: + + parameter timestamp ; + storage + (pair + string # S + (pair + timestamp # T + (pair + (pair tez tez) ; # P N + (pair + (contract unit unit) # X + (pair (contract unit unit) (contract unit unit)))))) ; # A B + return unit ; + code + { DUP ; CDAR # S + PUSH string "open" ; + COMPARE ; NEQ ; + IF { FAIL } # on "success", "timeout" or a bad init value + { DUP ; CDDAR ; # T + NOW ; + COMPARE ; LT ; + IF { # Before timeout + # We compute ((1 + P) + N) tez for keeping the contract alive + PUSH tez "1.00" ; + DIP { DUP ; CDDDAAR } ; ADD ; # P + DIP { DUP ; CDDDADR } ; ADD ; # N + # We compare to the cumulated amount + BALANCE ; + COMPARE; LT ; + IF { # Not enough cash, we just accept the transaction + # and leave the global untouched + CDR } + { # Enough cash, successful ending + # We update the global + CDDR ; PUSH string "success" ; PAIR ; + # We transfer the fee to the broker + DUP ; CDDAAR ; # P + DIP { DUP ; CDDDAR } # X + UNIT ; TRANSFER_TOKENS ; DROP ; + # We transfer the rest to A + DUP ; CDDADR ; # N + DIP { DUP ; CDDDDAR } # A + UNIT ; TRANSFER_TOKENS ; DROP } } + { # After timeout, we refund + # We update the global + CDDR ; PUSH string "timeout" ; PAIR ; + # We try to transfer the fee to the broker + PUSH tez "1.00" ; BALANCE ; SUB ; # available + DIP { DUP ; CDDAAR } ; # P + COMPARE ; LT ; # available < P + IF { PUSH tez "1.00" ; BALANCE ; SUB ; # available + DIP { DUP ; CDDDAR } # X + UNIT ; TRANSFER_TOKENS ; DROP } + { DUP ; CDDAAR ; # P + DIP { DUP ; CDDDAR } # X + UNIT ; TRANSFER_TOKENS ; DROP } + # We transfer the rest to B + PUSH tez "1.00" ; BALANCE ; SUB ; # available + DIP { DUP ; CDDDDDR } # B + UNIT ; TRANSFER_TOKENS ; DROP } } + # return Unit + UNIT ; PAIR } + +Forward contract +~~~~~~~~~~~~~~~~ + +We want to write a forward contract on dried peas. The contract takes as +global data the tons of peas ``Q``, the expected delivery date ``T``, +the contract agreement date ``Z``, a strike ``K``, a collateral ``C`` +per ton of dried peas, and the accounts of the buyer ``B``, the seller +``S`` and the warehouse ``W``. + +These parameters as grouped in the global storage as follows: + +:: + + Pair + (Pair (Pair Q (Pair T Z))) + (Pair + (Pair K C) + (Pair (Pair B S) W)) + +of type + +:: + + pair + (pair nat (pair timestamp timestamp)) + (pair + (pair tez tez) + (pair (pair account account) account)) + +The 24 hours after timestamp ``Z`` are for the buyer and seller to store +their collateral ``(Q * C)``. For this, the contract takes a string as +parameter, matching ``"buyer"`` or ``"seller"`` indicating the party for +which the tokens are transferred. At the end of this day, each of them +can send a transaction to send its tokens back. For this, we need to +store who already paid and how much, as a ``(pair tez tez)`` where the +left component is the buyer and the right one the seller. + +After the first day, nothing cam happen until ``T``. + +During the 24 hours after ``T``, the buyer must pay ``(Q * K)`` to the +contract, minus the amount already sent. + +After this day, if the buyer didn’t pay enough then any transaction will +send all the tokens to the seller. + +Otherwise, the seller must deliver at least ``Q`` tons of dried peas to +the warehouse, in the next 24 hours. When the amount is equal to or +exceeds ``Q``, all the tokens are transferred to the seller and the +contract is destroyed. For storing the quantity of peas already +delivered, we add a counter of type ``nat`` in the global storage. For +knowing this quantity, we accept messages from W with a partial amount +of delivered peas as argument. + +After this day, any transaction will send all the tokens to the buyer +(not enough peas have been delivered in time). + +Hence, the global storage is a pair, with the counters on the left, and +the constant parameters on the right, initially as follows. + +:: + + Pair + (Pair 0 (Pair 0_00 0_00)) + (Pair + (Pair (Pair Q (Pair T Z))) + (Pair + (Pair K C) + (Pair (Pair B S) W))) + +of type + +:: + + pair + (pair nat (pair tez tez)) + (pair + (pair nat (pair timestamp timestamp)) + (pair + (pair tez tez) + (pair (pair account account) account))) + +The parameter of the transaction will be either a transfer from the +buyer or the seller or a delivery notification from the warehouse of +type ``(or string nat)``. + +At the beginning of the transaction: + +:: + + Q is accessible via a CDDAAR + T via a CDDADAR + Z via a CDDADDR + K via a CDDDAAR + C via a CDDDADR + B via a CDDDDAAR + S via a CDDDDADR + W via a CDDDDDR + the delivery counter via a CDAAR + the amount versed by the seller via a CDADDR + the argument via a CAR + +The contract returns a unit value, and we assume that it is created with +the minimum amount, set to ``(Tez "1.00")``. + +The complete source ``forward.tz`` is: + +:: + + parameter (or string nat) ; + return unit ; + storage + (pair + (pair nat (pair tez tez)) # counter from_buyer from_seller + (pair + (pair nat (pair timestamp timestamp)) # Q T Z + (pair + (pair tez tez) # K C + (pair + (pair (contract unit unit) (contract unit unit)) # B S + (contract unit unit))))) ; # W + code + { DUP ; CDDADDR ; # Z + PUSH nat 86400 ; SWAP ; ADD ; # one day in second + NOW ; COMPARE ; LT ; + IF { # Before Z + 24 + DUP ; CAR ; # we must receive (Left "buyer") or (Left "seller") + IF_LEFT + { DUP ; PUSH string "buyer" ; COMPARE ; EQ ; + IF { DROP ; + DUP ; CDADAR ; # amount already versed by the buyer + DIP { AMOUNT } ; ADD ; # transaction + # then we rebuild the globals + DIP { DUP ; CDADDR } ; PAIR ; # seller amount + PUSH nat 0 ; PAIR ; # delivery counter at 0 + DIP { CDDR } ; PAIR ; # parameters + # and return Unit + UNIT ; PAIR } + { PUSH string "seller" ; COMPARE ; EQ ; + IF { DUP ; CDADDR ; # amount already versed by the seller + DIP { AMOUNT } ; ADD ; # transaction + # then we rebuild the globals + DIP { DUP ; CDADAR } ; SWAP ; PAIR ; # buyer amount + PUSH nat 0 ; PAIR ; # delivery counter at 0 + DIP { CDDR } ; PAIR ; # parameters + # and return Unit + UNIT ; PAIR } + { FAIL } } } # (Left _) + { FAIL } } # (Right _) + { # After Z + 24 + # test if the required amount is reached + DUP ; CDDAAR ; # Q + DIP { DUP ; CDDDADR } ; MUL ; # C + PUSH nat 2 ; MUL ; + PUSH tez "1.00" ; ADD ; + BALANCE ; COMPARE ; LT ; # balance < 2 * (Q * C) + 1 + IF { # refund the parties + CDR ; DUP ; CADAR ; # amount versed by the buyer + DIP { DUP ; CDDDAAR } # B + UNIT ; TRANSFER_TOKENS ; DROP + DUP ; CADDR ; # amount versed by the seller + DIP { DUP ; CDDDADR } # S + UNIT ; TRANSFER_TOKENS ; DROP + BALANCE ; # bonus to the warehouse to destroy the account + DIP { DUP ; CDDDDR } # W + UNIT ; TRANSFER_TOKENS ; DROP + # return unit, don't change the global + # since the contract will be destroyed + UNIT ; PAIR } + { # otherwise continue + DUP ; CDDADAR # T + NOW ; COMPARE ; LT + IF { FAIL } # Between Z + 24 and T + { # after T + DUP ; CDDADAR # T + PUSH nat 86400 ; ADD # one day in second + NOW ; COMPARE ; LT + IF { # Between T and T + 24 + # we only accept transactions from the buyer + DUP ; CAR ; # we must receive (Left "buyer") + IF_LEFT + { PUSH string "buyer" ; COMPARE ; EQ ; + IF { DUP ; CDADAR ; # amount already versed by the buyer + DIP { AMOUNT } ; ADD ; # transaction + # The amount must not exceed Q * K + DUP ; + DIIP { DUP ; CDDAAR ; # Q + DIP { DUP ; CDDDAAR } ; MUL ; } ; # K + DIP { COMPARE ; GT ; # new amount > Q * K + IF { FAIL } { } } ; # abort or continue + # then we rebuild the globals + DIP { DUP ; CDADDR } ; PAIR ; # seller amount + PUSH nat 0 ; PAIR ; # delivery counter at 0 + DIP { CDDR } ; PAIR ; # parameters + # and return Unit + UNIT ; PAIR } + { FAIL } } # (Left _) + { FAIL } } # (Right _) + { # After T + 24 + # test if the required payment is reached + DUP ; CDDAAR ; # Q + DIP { DUP ; CDDDAAR } ; MUL ; # K + DIP { DUP ; CDADAR } ; # amount already versed by the buyer + COMPARE ; NEQ ; + IF { # not reached, pay the seller and destroy the contract + BALANCE ; + DIP { DUP ; CDDDDADR } # S + DIIP { CDR } ; + UNIT ; TRANSFER_TOKENS ; DROP ; + # and return Unit + UNIT ; PAIR } + { # otherwise continue + DUP ; CDDADAR # T + PUSH nat 86400 ; ADD ; + PUSH nat 86400 ; ADD ; # two days in second + NOW ; COMPARE ; LT + IF { # Between T + 24 and T + 48 + # We accept only delivery notifications, from W + DUP ; CDDDDDR ; MANAGER ; # W + SOURCE unit unit ; MANAGER ; + COMPARE ; NEQ ; + IF { FAIL } {} # fail if not the warehouse + DUP ; CAR ; # we must receive (Right amount) + IF_LEFT + { FAIL } # (Left _) + { # We increment the counter + DIP { DUP ; CDAAR } ; ADD ; + # And rebuild the globals in advance + DIP { DUP ; CDADR } ; PAIR ; + DIP { CDDR } ; PAIR ; + UNIT ; PAIR ; + # We test if enough have been delivered + DUP ; CDAAR ; + DIP { DUP ; CDDAAR } ; + COMPARE ; LT ; # counter < Q + IF { CDR } # wait for more + { # Transfer all the money to the seller + BALANCE ; # and destroy the contract + DIP { DUP ; CDDDDADR } # S + DIIP { CDR } ; + UNIT ; TRANSFER_TOKENS ; DROP } } ; + UNIT ; PAIR } + { # after T + 48, transfer everything to the buyer + BALANCE ; # and destroy the contract + DIP { DUP ; CDDDDAAR } # B + DIIP { CDR } ; + UNIT ; TRANSFER_TOKENS ; DROP ; + # and return unit + UNIT ; PAIR } } } } } } } + +XII - Full grammar +------------------ + +:: + + ::= + | + | + | + | + | + | + | + | + | + | Unit + | True + | False + | Pair + | Left + | Right + | Some + | None + | { ; ... } + | { Elt ; ... } + | instruction + ::= + | { ... } + | DROP + | DUP + | SWAP + | PUSH + | SOME + | NONE + | UNIT + | IF_NONE { ... } { ... } + | PAIR + | CAR + | CDR + | LEFT + | RIGHT + | IF_LEFT { ... } { ... } + | NIL + | CONS + | IF_CONS { ... } { ... } + | EMPTY_SET + | EMPTY_MAP + | MAP + | MAP { ... } + | REDUCE + | ITER { ... } + | MEM + | GET + | UPDATE + | IF { ... } { ... } + | LOOP { ... } + | LOOP_LEFT { ... } + | LAMBDA { ... } + | EXEC + | DIP { ... } + | FAIL + | CONCAT + | ADD + | SUB + | MUL + | DIV + | ABS + | NEG + | MOD + | LSL + | LSR + | OR + | AND + | XOR + | NOT + | COMPARE + | EQ + | NEQ + | LT + | GT + | LE + | GE + | INT + | MANAGER + | SELF + | TRANSFER_TOKENS + | CREATE_ACCOUNT + | CREATE_CONTRACT + | DEFAULT_ACCOUNT + | NOW + | AMOUNT + | BALANCE + | CHECK_SIGNATURE + | H + | HASH_KEY + | STEPS_TO_QUOTA + | SOURCE + ::= + | + | key + | unit + | signature + | option + | list + | set + | contract + | pair + | or + | lambda + | map + ::= + | int + | nat + | string + | tez + | bool + | key_hash + | timestamp + +XIII - Reference implementation +------------------------------- + +The language is implemented in OCaml as follows: + +- The lower internal representation is written as a GADT whose type + parameters encode exactly the typing rules given in this + specification. In other words, if a program written in this + representation is accepted by OCaml’s typechecker, it is mandatorily + type-safe. This of course also valid for programs not handwritten but + generated by OCaml code, so we are sure that any manipulated code is + type-safe. + + In the end, what remains to be checked is the encoding of the typing + rules as OCaml types, which boils down to half a line of code for + each instruction. Everything else is left to the venerable and well + trusted OCaml. + +- The interpreter is basically the direct transcription of the + rewriting rules presented above. It takes an instruction, a stack and + transforms it. OCaml’s typechecker ensures that the transformation + respects the pre and post stack types declared by the GADT case for + each instruction. + + The only things that remain to we reviewed are value dependent + choices, such as that we did not swap true and false when + interpreting the If instruction. + +- The input, untyped internal representation is an OCaml ADT with the + only 5 grammar constructions: ``String``, ``Int``, ``Seq`` and + ``Prim``. It is the target language for the parser, since not all + parsable programs are well typed, and thus could simply not be + constructed using the GADT. + +- The typechecker is a simple function that recognizes the abstract + grammar described in section X by pattern matching, producing the + well-typed, corresponding GADT expressions. It is mostly a checker, + not a full inferer, and thus takes some annotations (basically the + input and output of the program, of lambdas and of uninitialized maps + and sets). It works by performing a symbolic evaluation of the + program, transforming a symbolic stack. It only needs one pass over + the whole program. + + Here again, OCaml does most of the checking, the structure of the + function is very simple, what we have to check is that we transform a + ``Prim ("If", ...)`` into an ``If``, a ``Prim ("Dup", ...)`` into a + ``Dup``, etc. diff --git a/docs/whitedoc/octopus.svg b/docs/whitedoc/octopus.svg new file mode 100644 index 000000000..39085da7e --- /dev/null +++ b/docs/whitedoc/octopus.svg @@ -0,0 +1,1558 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/whitedoc/the_big_picture.rst b/docs/whitedoc/the_big_picture.rst new file mode 100644 index 000000000..a8ced6232 --- /dev/null +++ b/docs/whitedoc/the_big_picture.rst @@ -0,0 +1,59 @@ +.. _the_big_picture: + +Tezos Software Architecture +=========================== + +The diagram below shows a very coarse grained architecture of Tezos. +|Tezos architecture diagram| + +The characteristic that makes Tezos unique is its self-amending +property. The part that amends itself is called the *economic protocol* +(the green eye of the octopus), sometimes abbreviated by protocol or +even proto in the source code. The rest of a Tezos node is what we call +the *shell* (the blue octopus). + +The protocol is responsible for interpreting the transactions and other +administrative operations. It also has the responsibility to detect +erroneous blocks. + +An important thing to notice is that the protocol always sees only one +block chain. In other words, a linear sequence of blocks since the +genesis. It does not know that it lives in an open network where nodes +can propose alternative heads. + +Only the shell knows about the multiple heads. It is responsible for +choosing between the various chain proposals that come from the bakers +(the programs that cook new blocks) of the network. The shell has the +responsibility of selecting and downloading alternative chains, feed +them to the protocol, which in turn has the responsibility to check them +for errors, and give them an absolute score. The shell then simply +selects the valid head of highest absolute score. This part of the shell +is called the validator. + +The rest of the shell includes the peer-to-peer layer, the disk storage +of blocks, the operations to allow the node to transmit the chain data +to new nodes and the versioned state of the ledger. Inbetween the +validator, the peer-to-peer layer and the storage sits a component +called the distributed database, that abstracts the fetching and +replication of new chain data to the validator. + +Protocols are compiled using a tweaked OCaml compiler (green part on the +left of the picture) that does two things. First, it checks that the +protocol’s main module has the right type. A good analogy is to see +protocol as plug-ins, and in this case, it means that it respects the +common plugin interface. Then, it restricts the typing environment of +the protocol’s code so that it only calls authorized modules and +functions. Seeing protocols as plug-ins, it means that the code only +called primitives from the plug-in API. It is a form of statically +enforced sandboxing. + +Finally, the RPC layer (in yellow on the right in the picture) is an +important part of the node. It is how the client, third party +applications and daemons can interact with the node and introspect its +state. This component uses the mainstream JSON format and HTTP protocol. +It uses in-house libraries ``ocplib-resto`` and ``ocplib-json-typed`` +(via the module :ref:`Data_encoding `). It +is fully inter-operable, and auto descriptive, using JSON schema. + +.. |Tezos architecture diagram| image:: octopus.svg + diff --git a/scripts/ocamldot.py b/scripts/ocamldot.py new file mode 100755 index 000000000..1f44d0f0e --- /dev/null +++ b/scripts/ocamldot.py @@ -0,0 +1,85 @@ +#!/usr/bin/python + +import re +import sys +import os +import argparse +from sets import Set + +alldeps={} +allmodules={} + +def sanitize(s): + s=re.sub('.*/','',s) + s=re.sub('[^0-9a-zA-Z]+', '_', s) + return s + +def cleanName(s): + ml = os.path.basename(s) + (mod,_) = os.path.splitext(ml) + return mod.capitalize() + +def mangle(f,modulename): + + dictionary = {} + for line in open(f): + s=line.split(); + for x in range(2, len(s)): + mod = cleanName(s[0]) + dep = sanitize(s[x]) + if mod in dictionary : + dictionary[mod].append(dep) + else : + dictionary[mod] = [dep] + allmodules.update({mod : modulename}) + + alldeps[modulename] = dictionary + +def cleanup(alldeps): + # remove references to external libraries + for (name,dictionary) in alldeps.iteritems() : + for (mod,deps) in dictionary.iteritems() : + dictionary[mod] = [x for x in deps if x in allmodules] + alldeps[name] = dictionary + return alldeps + +def print_graph(alldeps): + print("strict digraph G {") + print('graph [fontsize=10 fontname="Verdana"];') + print('node [shape=record fontsize=10 fontname="Verdana" compound=true];') + counter = 0 + l = { x: i for i,x in enumerate(alldeps.keys())} + for (name,dictionary) in alldeps.iteritems() : + names = ['"%s"' % mod for mod in dictionary.keys()] + if len(names) > 0 : + print ('subgraph cluster_%i { label = "%s"; color=blue; node [style=filled];' % (l[name],name)) + counter += 1 + for (mod,deps) in dictionary.iteritems() : + for dep in deps : + if dep in dictionary : + print ('"%s" -> "%s";' % (mod,dep)) + print "}" + # for (mod,deps) in dictionary.iteritems() : + # for dep in deps : + # if dep not in dictionary : + # print ('"%s" -> "%s" [ltail=cluster_%i lhead=cluster_%i];' % (mod,dep,l[name],l[allmodules[dep]])) + print "}" + +def scan(directories): + ext = ".depends.ocamldep-output" + for directory in directories: + for root, dirs, files in os.walk(directory): + for f in files: + if f.endswith(ext): + mangle(os.path.join(root, f),f[:-len(ext)]) + print_graph(cleanup(alldeps)) + +def main(): + parser = argparse.ArgumentParser(description='OcamlDep Dependency Tree') + parser.add_argument('inputdirs', type=str, nargs='*', help="directories to scan") + args = parser.parse_args() + + scan(args.inputdirs) + +if __name__ == '__main__': + main() diff --git a/src/lib_crypto/base58.mli b/src/lib_crypto/base58.mli index 7cb81fb9b..b11df9e74 100644 --- a/src/lib_crypto/base58.mli +++ b/src/lib_crypto/base58.mli @@ -95,7 +95,7 @@ val complete: ?alphabet:Alphabet.t -> string -> string list Lwt.t (** {1 Low-level: distinct registering function for economic protocol} *) -(** See [src/environment/v1/base58.mli]} for an inlined +(** See [src/environment/v1/base58.mli] for an inlined documentation. *) module Make(C: sig type context end) : sig diff --git a/src/lib_protocol_environment_sigs/v1/mBytes.mli b/src/lib_protocol_environment_sigs/v1/mBytes.mli index ff26d9935..2dac9771c 100644 --- a/src/lib_protocol_environment_sigs/v1/mBytes.mli +++ b/src/lib_protocol_environment_sigs/v1/mBytes.mli @@ -25,7 +25,7 @@ val shift: t -> int -> t val blit: t -> int -> t -> int -> int -> unit (** [blit src ofs_src dst ofs_dst len] copy [len] bytes from [src] - starting at [ofs_src] into [dst] starting at [ofs_dst].] *) + starting at [ofs_src] into [dst] starting at [ofs_dst]. *) val blit_from_string: string -> int -> t -> int -> int -> unit (** See [blit] *) diff --git a/src/lib_stdlib/mBytes.mli b/src/lib_stdlib/mBytes.mli index 845bd846e..4a9ff94b2 100644 --- a/src/lib_stdlib/mBytes.mli +++ b/src/lib_stdlib/mBytes.mli @@ -34,7 +34,7 @@ val shift: t -> int -> t val blit: t -> int -> t -> int -> int -> unit (** [blit src ofs_src dst ofs_dst len] copy [len] bytes from [src] - starting at [ofs_src] into [dst] starting at [ofs_dst].] *) + starting at [ofs_src] into [dst] starting at [ofs_dst]. *) val blit_from_string: string -> int -> t -> int -> int -> unit (** See [blit] *) @@ -95,7 +95,7 @@ val get_double: t -> int -> float val set_int16: t -> int -> int -> unit (** [set_int16 buff i v] writes the least significant 16 bits of [v] - to [buff] at offset [i] *) + to [buff] at offset [i] *) val set_int32: t -> int -> int32 -> unit (** [set_int32 buff i v] writes [v] to [buff] at offset [i] *) diff --git a/src/lib_stdlib_lwt/cli_entries.mli b/src/lib_stdlib_lwt/cli_entries.mli index 18d571175..fca456f5a 100644 --- a/src/lib_stdlib_lwt/cli_entries.mli +++ b/src/lib_stdlib_lwt/cli_entries.mli @@ -33,7 +33,7 @@ val arg : doc:string -> parameter:string -> ('p option, 'ctx) arg (** Create an argument that will contain the [~default] value if it is not provided. - @see arg *) + see arg *) val default_arg : doc:string -> parameter:string -> default:string -> ('p, 'ctx) parameter ->