Skip to content

Commit

Permalink
Spec checker tool (#8722)
Browse files Browse the repository at this point in the history
* Add specdocs static code analyzer

* docs pulling script

* update content pulling script

* add test

* better parsing of incoming docs

* update test

* implements analyzer

* separate tool

* remove analyzer code

* cleanup

* deep source fixes

* untrack raw specs files

* add back phase0 defs

* update spec texts

* re-arrange code

* updated spec list

* cleanup

* more comments and readme

* add merkle proofs specs

* add extra.md

* mark wrong length issue

* update readme

* update readme

* remove non-def snippets

* update comment

* check numrows

* ignore last empty line

Co-authored-by: Raul Jordan <raul@prysmaticlabs.com>
  • Loading branch information
farazdagi and rauljordan authored Apr 15, 2021
1 parent 169cd78 commit 3d3b9d1
Show file tree
Hide file tree
Showing 11 changed files with 2,137 additions and 0 deletions.
28 changes: 28 additions & 0 deletions tools/specs-checker/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary")
load("@prysm//tools/go:def.bzl", "go_library")

go_library(
name = "go_default_library",
srcs = [
"check.go",
"download.go",
"main.go",
],
embedsrcs = [
"data/specs/phase0/beacon-chain.md",
"data/specs/phase0/fork-choice.md",
"data/specs/phase0/validator.md",
"data/specs/phase0/weak-subjectivity.md",
"data/ssz/merkle-proofs.md",
"data/extra.md",
],
importpath = "github.com/prysmaticlabs/prysm/tools/specs-checker",
visibility = ["//visibility:public"],
deps = ["@com_github_urfave_cli_v2//:go_default_library"],
)

go_binary(
name = "specs-checker",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)
41 changes: 41 additions & 0 deletions tools/specs-checker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Specs checker tool

This simple tool helps downloading and parsing [ETH2 specs](https://github.com/ethereum/eth2.0-specs/tree/dev/specs),
to be later used for making sure that our reference comments match specs definitions precisely.

### Updating the reference specs
See `main.go` for a list of files to be downloaded, currently:
```golang
var specDirs = map[string][]string{
"specs/phase0": {
"beacon-chain.md",
"fork-choice.md",
"validator.md",
"weak-subjectivity.md",
},
"ssz": {
"merkle-proofs.md",
},
}
```

To download/update specs:
```bash
bazel run //tools/specs-checker download -- --dir=$PWD/tools/specs-checker/data
```

This will pull the files defined in `specDirs`, parse them (extract Python code snippets, discarding any other text),
and save them to the folder from which `bazel run //tools/specs-checker check` will be able to embed.

### Checking against the reference specs

To check whether reference comments have the matching version of Python specs:
```bash
bazel run //tools/specs-checker check -- --dir $PWD/beacon-chain
bazel run //tools/specs-checker check -- --dir $PWD/validator
bazel run //tools/specs-checker check -- --dir $PWD/shared
```
Or, to check the whole project:
```bash
bazel run //tools/specs-checker check -- --dir $PWD
```
176 changes: 176 additions & 0 deletions tools/specs-checker/check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package main

import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"path"
"path/filepath"
"regexp"
"strings"

"github.com/urfave/cli/v2"
)

// Regex to find Python's "def".
var reg1 = regexp.MustCompile(`def\s(.*)\(.*`)

// checkNumRows defines whether tool should check that the spec comment is the last comment of the block, so not only
// it matches the reference snippet, but it also has the same number of rows.
const checkNumRows = false

func check(cliCtx *cli.Context) error {
// Obtain reference snippets.
defs, err := parseSpecs()
if err != nil {
return err
}

// Walk the path, and process all contained Golang files.
fileWalker := func(path string, info os.FileInfo, err error) error {
if info == nil {
return fmt.Errorf("invalid input dir %q", path)
}
if !strings.HasSuffix(info.Name(), ".go") {
return nil
}
return inspectFile(path, defs)
}
return filepath.Walk(cliCtx.String(dirFlag.Name), fileWalker)
}

func inspectFile(path string, defs map[string][]string) error {
// Parse source files, and check the pseudo code.
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
if err != nil {
return err
}

ast.Inspect(file, func(node ast.Node) bool {
stmt, ok := node.(*ast.CommentGroup)
if !ok {
return true
}
// Ignore comment groups that do not have python pseudo-code.
chunk := stmt.Text()
if !reg1.MatchString(chunk) {
return true
}

// Trim the chunk, so that it starts from Python's "def".
loc := reg1.FindStringIndex(chunk)
chunk = chunk[loc[0]:]

// Find out Python function name.
defName, defBody := parseDefChunk(chunk)
if defName == "" {
fmt.Printf("%s: cannot parse comment pseudo code\n", fset.Position(node.Pos()))
return false
}

// Calculate differences with reference implementation.
refDefs, ok := defs[defName]
if !ok {
fmt.Printf("%s: %q is not found in spec docs\n", fset.Position(node.Pos()), defName)
return false
}
if !matchesRefImplementation(defName, refDefs, defBody, fset.Position(node.Pos())) {
fmt.Printf("%s: %q code does not match reference implementation in specs\n", fset.Position(node.Pos()), defName)
return false
}

return true
})

return nil
}

// parseSpecs parses input spec docs into map of function name -> array of function bodies
// (single entity may have several definitions).
func parseSpecs() (map[string][]string, error) {
loadSpecsFile := func(sb *strings.Builder, specFilePath string) error {
chunk, err := specFS.ReadFile(specFilePath)
if err != nil {
return fmt.Errorf("cannot read specs file: %w", err)
}
_, err = sb.Write(chunk)
if err != nil {
return fmt.Errorf("cannot copy specs file: %w", err)
}
return nil
}

// Traverse all spec files, and aggregate them within as single string.
var sb strings.Builder
for dirName, fileNames := range specDirs {
for _, fileName := range fileNames {
if err := loadSpecsFile(&sb, path.Join("data", dirName, fileName)); err != nil {
return nil, err
}
}
}

// Load file with extra definitions (this allows us to use pseudo-code that is not from specs).
if err := loadSpecsFile(&sb, path.Join("data", "extra.md")); err != nil {
return nil, err
}

// Parse docs into function name -> array of function bodies map.
chunks := strings.Split(strings.ReplaceAll(sb.String(), "```python", ""), "```")
defs := make(map[string][]string, len(chunks))
for _, chunk := range chunks {
defName, defBody := parseDefChunk(chunk)
if defName == "" {
continue
}
defs[defName] = append(defs[defName], defBody)
}
return defs, nil
}

// parseDefChunk extract function name and function body from a Python's "def" chunk.
func parseDefChunk(chunk string) (string, string) {
chunk = strings.TrimLeft(chunk, "\n")
if chunk == "" {
return "", ""
}
chunkLines := strings.Split(chunk, "\n")
// Ignore all snippets, that do not define functions.
if chunkLines[0][:4] != "def " {
return "", ""
}
defMatches := reg1.FindStringSubmatch(chunkLines[0])
if len(defMatches) < 2 {
return "", ""
}
return strings.Trim(defMatches[1], " "), chunk
}

// matchesRefImplementation compares input string to reference code snippets (there might be multiple implementations).
func matchesRefImplementation(defName string, refDefs []string, input string, pos token.Position) bool {
for _, refDef := range refDefs {
refDefLines := strings.Split(strings.TrimRight(refDef, "\n"), "\n")
inputLines := strings.Split(strings.TrimRight(input, "\n"), "\n")

matchesPerfectly := true
for i := 0; i < len(refDefLines); i++ {
a, b := strings.Trim(refDefLines[i], " "), strings.Trim(inputLines[i], " ")
if a != b {
matchesPerfectly = false
break
}
}
// Mark potential issues, when there's some more comments in our code (which might be ok, as we are not required
// to put specs comments as the last one in the doc block).
if checkNumRows && len(refDefLines) != len(inputLines) {
fmt.Printf("%s: %q potentially has issues (comment is longer than reference implementation)\n", pos, defName)
}
if matchesPerfectly {
return true
}
}
return false
}
15 changes: 15 additions & 0 deletions tools/specs-checker/data/extra.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
```python
def Sign(SK: int, message: Bytes) -> BLSSignature
```
```python
def Verify(PK: BLSPubkey, message: Bytes, signature: BLSSignature) -> bool
```
```python
def AggregateVerify(pairs: Sequence[PK: BLSPubkey, message: Bytes], signature: BLSSignature) -> bool
```
```python
def FastAggregateVerify(PKs: Sequence[BLSPubkey], message: Bytes, signature: BLSSignature) -> bool
```
```python
def Aggregate(signatures: Sequence[BLSSignature]) -> BLSSignature
```
Loading

0 comments on commit 3d3b9d1

Please sign in to comment.