Yet more argh implementation fun

This commit is contained in:
Dan Buch 2022-05-13 20:58:55 -04:00
parent 451be7bf31
commit e4ffde87e5
9 changed files with 465 additions and 77 deletions

32
argh.go
View File

@ -1,5 +1,35 @@
package argh package argh
import (
"log"
"os"
)
// NOTE: much of this is lifted from
// https://blog.gopheracademy.com/advent-2014/parsers-lexers/
var (
tracingEnabled = os.Getenv("ARGH_TRACING") == "enabled"
)
type Argh struct { type Argh struct {
AST *AST ParseTree *ParseTree `json:"parse_tree"`
}
func (a *Argh) AST() []TypedNode {
return a.ParseTree.toAST()
}
/*
func (a *Argh) String() string {
return a.ParseTree.String()
}
*/
func tracef(format string, v ...any) {
if !tracingEnabled {
return
}
log.Printf(format, v...)
} }

10
ast.go
View File

@ -1,10 +0,0 @@
package argh
type AST struct {
Nodes []*Node `json:"nodes"`
}
type Node struct {
Token string `json:"token"`
Literal string `json:"literal"`
}

View File

@ -10,7 +10,9 @@ import (
) )
func main() { func main() {
ast, err := argh.ParseArgs(os.Args) log.SetFlags(0)
ast, err := argh.ParseArgs(os.Args, nil)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }

7
go.mod
View File

@ -3,3 +3,10 @@ module git.meatballhat.com/x/box-o-sand/argh
go 1.18 go 1.18
require github.com/pkg/errors v0.9.1 require github.com/pkg/errors v0.9.1
require (
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.7.1 // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)

10
go.sum
View File

@ -1,2 +1,12 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

82
parse_tree.go Normal file
View File

@ -0,0 +1,82 @@
package argh
import "fmt"
type ParseTree struct {
Nodes []Node `json:"nodes"`
}
func (pt *ParseTree) toAST() []TypedNode {
ret := []TypedNode{}
for _, node := range pt.Nodes {
if _, ok := node.(ArgDelimiter); ok {
continue
}
if _, ok := node.(StopFlag); ok {
continue
}
ret = append(
ret,
TypedNode{
Type: fmt.Sprintf("%T", node),
Node: node,
},
)
}
return ret
}
type Node interface{}
type TypedNode struct {
Type string `json:"type"`
Node Node `json:"node"`
}
type Args struct {
Pos int `json:"pos"`
Nodes []Node `json:"nodes"`
}
type Statement struct {
Pos int `json:"pos"`
Nodes []Node `json:"nodes"`
}
type Program struct {
Pos int `json:"pos"`
Name string `json:"name"`
}
type Ident struct {
Pos int `json:"pos"`
Literal string `json:"literal"`
}
type Command struct {
Pos int `json:"pos"`
Name string `json:"name"`
Nodes []Node `json:"nodes"`
}
type Flag struct {
Pos int `json:"pos"`
Name string `json:"name"`
Value *string `json:"value,omitempty"`
}
type StdinFlag struct {
Pos int `json:"pos"`
}
type StopFlag struct {
Pos int `json:"pos"`
}
type ArgDelimiter struct {
Pos int `json:"pos"`
}

200
parser.go
View File

@ -7,76 +7,220 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
) )
// NOTE: much of this is lifted from
// https://blog.gopheracademy.com/advent-2014/parsers-lexers/
var ( var (
errSyntax = errors.New("syntax error") errSyntax = errors.New("syntax error")
DefaultParserConfig = &ParserConfig{
Commands: []string{},
ValueFlags: []string{},
ScannerConfig: DefaultScannerConfig,
}
) )
func ParseArgs(args []string) (*Argh, error) { func ParseArgs(args []string, pCfg *ParserConfig) (*Argh, error) {
reEncoded := strings.Join(args, string(nul)) reEncoded := strings.Join(args, string(nul))
return NewParser( return NewParser(
strings.NewReader(reEncoded), strings.NewReader(reEncoded),
nil, pCfg,
).Parse() ).Parse()
} }
type Parser struct { type Parser struct {
s *Scanner s *Scanner
buf ParserBuffer buf ParserBuffer
commands map[string]struct{}
valueFlags map[string]struct{}
nodes []Node
stopSeen bool
} }
type ParserBuffer struct { type ParserBuffer struct {
tok Token tok Token
lit string lit string
pos int
n int n int
} }
func NewParser(r io.Reader, cfg *ScannerConfig) *Parser { type ParserConfig struct {
return &Parser{s: NewScanner(r, cfg)} Commands []string
ValueFlags []string
ScannerConfig *ScannerConfig
}
type parseDirective struct {
Break bool
}
func NewParser(r io.Reader, pCfg *ParserConfig) *Parser {
if pCfg == nil {
pCfg = DefaultParserConfig
}
parser := &Parser{
s: NewScanner(r, pCfg.ScannerConfig),
commands: map[string]struct{}{},
valueFlags: map[string]struct{}{},
}
for _, command := range pCfg.Commands {
parser.commands[command] = struct{}{}
}
for _, valueFlag := range pCfg.ValueFlags {
parser.valueFlags[valueFlag] = struct{}{}
}
tracef("NewParser parser=%+#v", parser)
tracef("NewParser pCfg=%+#v", pCfg)
return parser
} }
func (p *Parser) Parse() (*Argh, error) { func (p *Parser) Parse() (*Argh, error) {
arghOut := &Argh{ p.nodes = []Node{}
AST: &AST{
Nodes: []*Node{},
},
}
for { for {
tok, lit := p.scan() pd, err := p.parseArg()
if err != nil {
return nil, err
}
if pd != nil && pd.Break {
break
}
}
return &Argh{ParseTree: &ParseTree{Nodes: p.nodes}}, nil
}
func (p *Parser) parseArg() (*parseDirective, error) {
tok, lit, pos := p.scan()
if tok == ILLEGAL { if tok == ILLEGAL {
return nil, errors.Wrapf(errSyntax, "illegal value %q", lit) return nil, errors.Wrapf(errSyntax, "illegal value %q at pos=%v", lit, pos)
} }
if tok == EOL { if tok == EOL {
break return &parseDirective{Break: true}, nil
} }
arghOut.AST.Nodes = append( p.unscan()
arghOut.AST.Nodes,
&Node{Token: tok.String(), Literal: lit}, node, err := p.nodify()
tracef("parseArg node=%+#v err=%+#v", node, err)
if err != nil {
return nil, errors.Wrapf(err, "value %q at pos=%v", lit, pos)
}
if node != nil {
p.nodes = append(p.nodes, node)
}
return nil, nil
}
func (p *Parser) nodify() (Node, error) {
tok, lit, pos := p.scan()
tracef("nodify tok=%s lit=%q pos=%v", tok, lit, pos)
switch tok {
case IDENT:
if len(p.nodes) == 0 {
return Program{Name: lit, Pos: pos - len(lit)}, nil
}
return Ident{Literal: lit, Pos: pos - len(lit)}, nil
case ARG_DELIMITER:
return ArgDelimiter{Pos: pos - 1}, nil
case COMPOUND_SHORT_FLAG:
flagNodes := []Node{}
for i, r := range lit[1:] {
flagNodes = append(
flagNodes,
Flag{
Pos: pos + i + 1,
Name: string(r),
},
) )
} }
return arghOut, nil return Statement{Pos: pos, Nodes: flagNodes}, nil
} case SHORT_FLAG:
flagName := string(lit[1:])
func (p *Parser) scan() (Token, string) { if _, ok := p.valueFlags[flagName]; ok {
if p.buf.n != 0 { return p.scanValueFlag(flagName, pos)
p.buf.n = 0
return p.buf.tok, p.buf.lit
} }
tok, lit := p.s.Scan() return Flag{Name: flagName, Pos: pos - len(flagName) - 1}, nil
case LONG_FLAG:
flagName := string(lit[2:])
if _, ok := p.valueFlags[flagName]; ok {
return p.scanValueFlag(flagName, pos)
}
p.buf.tok, p.buf.lit = tok, lit return Flag{Name: flagName, Pos: pos - len(flagName) - 2}, nil
default:
}
return tok, lit return Ident{Literal: lit, Pos: pos - len(lit)}, nil
}
func (p *Parser) scanValueFlag(flagName string, pos int) (Node, error) {
tracef("scanValueFlag flagName=%q pos=%v", flagName, pos)
lit, err := p.scanIdent()
if err != nil {
return nil, err
}
flagSepLen := len("--") + 1
return Flag{Name: flagName, Pos: pos - len(lit) - flagSepLen, Value: ptr(lit)}, nil
}
func (p *Parser) scanIdent() (string, error) {
tok, lit, pos := p.scan()
nUnscan := 0
if tok == ASSIGN || tok == ARG_DELIMITER {
nUnscan++
tok, lit, pos = p.scan()
}
if tok == IDENT {
return lit, nil
}
for i := 0; i < nUnscan; i++ {
p.unscan()
}
return "", errors.Wrapf(errSyntax, "expected ident at pos=%v but got %s (%q)", pos, tok, lit)
}
func (p *Parser) scan() (Token, string, int) {
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit, p.buf.pos
}
tok, lit, pos := p.s.Scan()
p.buf.tok, p.buf.lit, p.buf.pos = tok, lit, pos
return tok, lit, pos
} }
func (p *Parser) unscan() { func (p *Parser) unscan() {
p.buf.n = 1 p.buf.n = 1
} }
func ptr[T any](v T) *T {
return &v
}

119
parser_test.go Normal file
View File

@ -0,0 +1,119 @@
package argh_test
import (
"testing"
"git.meatballhat.com/x/box-o-sand/argh"
"github.com/stretchr/testify/assert"
)
func ptr[T any](v T) *T {
return &v
}
func TestParser(t *testing.T) {
for _, tc := range []struct {
name string
args []string
cfg *argh.ParserConfig
expected *argh.Argh
expectedErr error
skip bool
}{
{
name: "bare",
args: []string{"pizzas"},
expected: &argh.Argh{
ParseTree: &argh.ParseTree{
Nodes: []argh.Node{
argh.Program{Name: "pizzas"},
},
},
},
},
{
name: "long value-less flags",
args: []string{"pizzas", "--tasty", "--fresh", "--super-hot-right-now"},
expected: &argh.Argh{
ParseTree: &argh.ParseTree{
Nodes: []argh.Node{
argh.Program{Name: "pizzas", Pos: 0},
argh.ArgDelimiter{Pos: 6},
argh.Flag{Name: "tasty", Pos: 7},
argh.ArgDelimiter{Pos: 14},
argh.Flag{Name: "fresh", Pos: 15},
argh.ArgDelimiter{Pos: 22},
argh.Flag{Name: "super-hot-right-now", Pos: 23},
},
},
},
},
{
name: "long flags mixed",
args: []string{"pizzas", "--tasty", "--fresh", "soon", "--super-hot-right-now"},
cfg: &argh.ParserConfig{
Commands: []string{},
ValueFlags: []string{"fresh"},
},
expected: &argh.Argh{
ParseTree: &argh.ParseTree{
Nodes: []argh.Node{
argh.Program{Name: "pizzas", Pos: 0},
argh.ArgDelimiter{Pos: 6},
argh.Flag{Name: "tasty", Pos: 7},
argh.ArgDelimiter{Pos: 14},
argh.Flag{Name: "fresh", Pos: 15, Value: ptr("soon")},
argh.ArgDelimiter{Pos: 27},
argh.Flag{Name: "super-hot-right-now", Pos: 28},
},
},
},
},
{
skip: true,
name: "typical",
args: []string{"pizzas", "-a", "--ca", "-b", "1312", "-lol"},
cfg: &argh.ParserConfig{
Commands: []string{},
ValueFlags: []string{"b"},
},
expected: &argh.Argh{
ParseTree: &argh.ParseTree{
Nodes: []argh.Node{
argh.Program{Name: "pizzas", Pos: 0},
argh.ArgDelimiter{Pos: 6},
argh.Flag{Name: "a", Pos: 7},
argh.ArgDelimiter{Pos: 9},
argh.Flag{Name: "ca", Pos: 10},
argh.ArgDelimiter{Pos: 14},
argh.Flag{Name: "b", Pos: 15, Value: ptr("1312")},
argh.ArgDelimiter{Pos: 22},
argh.Statement{
Pos: 23,
Nodes: []argh.Node{
argh.Flag{Name: "l", Pos: 29},
argh.Flag{Name: "o", Pos: 30},
argh.Flag{Name: "l", Pos: 31},
},
},
},
},
},
},
} {
if tc.skip {
continue
}
t.Run(tc.name, func(ct *testing.T) {
actual, err := argh.ParseArgs(tc.args, tc.cfg)
if err != nil {
assert.ErrorIs(ct, err, tc.expectedErr)
return
}
assert.Equal(ct, tc.expected, actual)
})
}
}

View File

@ -1,8 +1,5 @@
package argh package argh
// NOTE: much of this is lifted from
// https://blog.gopheracademy.com/advent-2014/parsers-lexers/
import ( import (
"bufio" "bufio"
"bytes" "bytes"
@ -27,6 +24,7 @@ var (
type Scanner struct { type Scanner struct {
r *bufio.Reader r *bufio.Reader
i int
cfg *ScannerConfig cfg *ScannerConfig
} }
@ -34,8 +32,6 @@ type ScannerConfig struct {
AssignmentOperator rune AssignmentOperator rune
FlagPrefix rune FlagPrefix rune
MultiValueDelim rune MultiValueDelim rune
Commands []string
} }
func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner { func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
@ -49,52 +45,56 @@ func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
} }
} }
func (s *Scanner) Scan() (Token, string) { func (s *Scanner) Scan() (Token, string, int) {
ch := s.read() ch, pos := s.read()
if s.isBlankspace(ch) { if s.isBlankspace(ch) {
s.unread() _ = s.unread()
return s.scanBlankspace() return s.scanBlankspace()
} }
if s.isAssignmentOperator(ch) { if s.isAssignmentOperator(ch) {
return ASSIGN, string(ch) return ASSIGN, string(ch), pos
} }
if s.isMultiValueDelim(ch) { if s.isMultiValueDelim(ch) {
return MULTI_VALUE_DELIMITER, string(ch) return MULTI_VALUE_DELIMITER, string(ch), pos
} }
if ch == eol { if ch == eol {
return EOL, "" return EOL, "", pos
} }
if ch == nul { if ch == nul {
return ARG_DELIMITER, string(ch) return ARG_DELIMITER, string(ch), pos
} }
if unicode.IsGraphic(ch) { if unicode.IsGraphic(ch) {
s.unread() _ = s.unread()
return s.scanArg() return s.scanArg()
} }
return ILLEGAL, string(ch) return ILLEGAL, string(ch), pos
} }
func (s *Scanner) read() rune { func (s *Scanner) read() (rune, int) {
ch, _, err := s.r.ReadRune() ch, _, err := s.r.ReadRune()
s.i++
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
return eol return eol, s.i
} else if err != nil { } else if err != nil {
log.Printf("unknown scanner error=%+v", err) log.Printf("unknown scanner error=%+v", err)
return eol return eol, s.i
} }
return ch return ch, s.i
} }
func (s *Scanner) unread() { func (s *Scanner) unread() int {
_ = s.r.UnreadRune() _ = s.r.UnreadRune()
s.i--
return s.i
} }
func (s *Scanner) isBlankspace(ch rune) bool { func (s *Scanner) isBlankspace(ch rune) bool {
@ -117,33 +117,37 @@ func (s *Scanner) isAssignmentOperator(ch rune) bool {
return ch == s.cfg.AssignmentOperator return ch == s.cfg.AssignmentOperator
} }
func (s *Scanner) scanBlankspace() (Token, string) { func (s *Scanner) scanBlankspace() (Token, string, int) {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
buf.WriteRune(s.read()) ch, pos := s.read()
buf.WriteRune(ch)
for { for {
if ch := s.read(); ch == eol { ch, pos = s.read()
if ch == eol {
break break
} else if !s.isBlankspace(ch) { } else if !s.isBlankspace(ch) {
s.unread() pos = s.unread()
break break
} else { } else {
_, _ = buf.WriteRune(ch) _, _ = buf.WriteRune(ch)
} }
} }
return BS, buf.String() return BS, buf.String(), pos
} }
func (s *Scanner) scanArg() (Token, string) { func (s *Scanner) scanArg() (Token, string, int) {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
buf.WriteRune(s.read()) ch, pos := s.read()
buf.WriteRune(ch)
for { for {
ch := s.read() ch, pos = s.read()
if ch == eol || ch == nul || s.isAssignmentOperator(ch) || s.isMultiValueDelim(ch) { if ch == eol || ch == nul || s.isAssignmentOperator(ch) || s.isMultiValueDelim(ch) {
s.unread() pos = s.unread()
break break
} }
@ -153,38 +157,38 @@ func (s *Scanner) scanArg() (Token, string) {
str := buf.String() str := buf.String()
if len(str) == 0 { if len(str) == 0 {
return EMPTY, str return EMPTY, str, pos
} }
ch0 := rune(str[0]) ch0 := rune(str[0])
if len(str) == 1 { if len(str) == 1 {
if s.isFlagPrefix(ch0) { if s.isFlagPrefix(ch0) {
return STDIN_FLAG, str return STDIN_FLAG, str, pos
} }
return IDENT, str return IDENT, str, pos
} }
ch1 := rune(str[1]) ch1 := rune(str[1])
if len(str) == 2 { if len(str) == 2 {
if str == string(s.cfg.FlagPrefix)+string(s.cfg.FlagPrefix) { if str == string(s.cfg.FlagPrefix)+string(s.cfg.FlagPrefix) {
return STOP_FLAG, str return STOP_FLAG, str, pos
} }
if s.isFlagPrefix(ch0) { if s.isFlagPrefix(ch0) {
return SHORT_FLAG, str return SHORT_FLAG, str, pos
} }
} }
if s.isFlagPrefix(ch0) { if s.isFlagPrefix(ch0) {
if s.isFlagPrefix(ch1) { if s.isFlagPrefix(ch1) {
return LONG_FLAG, str return LONG_FLAG, str, pos
} }
return COMPOUND_SHORT_FLAG, str return COMPOUND_SHORT_FLAG, str, pos
} }
return IDENT, str return IDENT, str, pos
} }