Making a mess with a parser that works more like go/parser

urfave-cli-integration
Dan Buch 2 years ago
parent 2af31eafe1
commit 0635987d4f

@ -7,6 +7,10 @@ type TypedNode struct {
Node Node `json:"node"` Node Node `json:"node"`
} }
type PassthroughArgs struct {
Nodes []Node `json:"nodes"`
}
type CompoundShortFlag struct { type CompoundShortFlag struct {
Nodes []Node `json:"nodes"` Nodes []Node `json:"nodes"`
} }
@ -21,6 +25,12 @@ type Ident struct {
Literal string `json:"literal"` Literal string `json:"literal"`
} }
type BadArg struct {
Literal string
From Pos
To Pos
}
type Command struct { type Command struct {
Name string `json:"name"` Name string `json:"name"`
Values map[string]string `json:"values"` Values map[string]string `json:"values"`

@ -10,24 +10,10 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
) )
const (
OneOrMoreValue NValue = -2
ZeroOrMoreValue NValue = -1
ZeroValue NValue = 0
)
var ( var (
ErrSyntax = errors.New("syntax error") ErrSyntax = errors.New("syntax error")
DefaultParserConfig = &ParserConfig{
Commands: map[string]CommandConfig{},
Flags: map[string]FlagConfig{},
ScannerConfig: DefaultScannerConfig,
}
) )
type NValue int
func ParseArgs(args []string, pCfg *ParserConfig) (*ParseTree, error) { func ParseArgs(args []string, pCfg *ParserConfig) (*ParseTree, error) {
reEncoded := strings.Join(args, string(nul)) reEncoded := strings.Join(args, string(nul))
@ -55,26 +41,7 @@ type ParseTree struct {
type scanEntry struct { type scanEntry struct {
tok Token tok Token
lit string lit string
pos int pos Pos
}
type ParserConfig struct {
Prog CommandConfig
Commands map[string]CommandConfig
Flags map[string]FlagConfig
ScannerConfig *ScannerConfig
}
type CommandConfig struct {
NValue NValue
ValueNames []string
Flags map[string]FlagConfig
}
type FlagConfig struct {
NValue NValue
ValueNames []string
} }
func NewParser(r io.Reader, pCfg *ParserConfig) *Parser { func NewParser(r io.Reader, pCfg *ParserConfig) *Parser {
@ -314,7 +281,7 @@ func (p *Parser) scanIdent() (string, error) {
unscanBuf := []scanEntry{} unscanBuf := []scanEntry{}
if tok == ASSIGN || tok == ARG_DELIMITER { if tok == ASSIGN || tok == ARG_DELIMITER {
entry := scanEntry{tok: tok, lit: lit, pos: pos} entry := scanEntry{tok: tok, lit: lit, pos: Pos(pos)}
tracef("scanIdent tok=%s; scanning next and pushing to unscan buffer entry=%+#v", tok, entry) tracef("scanIdent tok=%s; scanning next and pushing to unscan buffer entry=%+#v", tok, entry)
@ -327,7 +294,7 @@ func (p *Parser) scanIdent() (string, error) {
return lit, nil return lit, nil
} }
entry := scanEntry{tok: tok, lit: lit, pos: pos} entry := scanEntry{tok: tok, lit: lit, pos: Pos(pos)}
tracef("scanIdent tok=%s; unscanning entry=%+#v", tok, entry) tracef("scanIdent tok=%s; unscanning entry=%+#v", tok, entry)
@ -340,7 +307,7 @@ func (p *Parser) scanIdent() (string, error) {
return "", errors.Wrapf(ErrSyntax, "expected ident at pos=%v but got %s (%q)", pos, tok, lit) return "", errors.Wrapf(ErrSyntax, "expected ident at pos=%v but got %s (%q)", pos, tok, lit)
} }
func (p *Parser) scan() (Token, string, int) { func (p *Parser) scan() (Token, string, Pos) {
if len(p.buf) != 0 { if len(p.buf) != 0 {
entry, buf := p.buf[len(p.buf)-1], p.buf[:len(p.buf)-1] entry, buf := p.buf[len(p.buf)-1], p.buf[:len(p.buf)-1]
p.buf = buf p.buf = buf
@ -356,7 +323,7 @@ func (p *Parser) scan() (Token, string, int) {
return tok, lit, pos return tok, lit, pos
} }
func (p *Parser) unscan(tok Token, lit string, pos int) { func (p *Parser) unscan(tok Token, lit string, pos Pos) {
entry := scanEntry{tok: tok, lit: lit, pos: pos} entry := scanEntry{tok: tok, lit: lit, pos: pos}
tracef("unscan entry=%s %+#v", tok, entry) tracef("unscan entry=%s %+#v", tok, entry)

@ -0,0 +1,188 @@
package argh
import (
"fmt"
"io"
"strings"
)
type parser2 struct {
s *Scanner
commands map[string]struct{}
errors ScannerErrorList
tok Token
lit string
pos Pos
}
func ParseArgs2(args, commands []string) (*ParseTree, error) {
parser := &parser2{}
parser.init(
strings.NewReader(strings.Join(args, string(nul))),
commands,
)
tracef("ParseArgs2 parser=%+#v", parser)
return parser.parseArgs()
}
func (p *parser2) init(r io.Reader, commands []string) {
p.errors = ScannerErrorList{}
commandMap := map[string]struct{}{}
for _, c := range commands {
commandMap[c] = struct{}{}
}
p.s = NewScanner(r, nil)
p.commands = commandMap
p.next()
}
func (p *parser2) parseArgs() (*ParseTree, error) {
if p.errors.Len() != 0 {
tracef("parseArgs bailing due to initial error")
return nil, p.errors.Err()
}
prog := &Program{
Name: p.lit,
Values: map[string]string{},
Nodes: []Node{},
}
p.next()
for p.tok != EOL && p.tok != STOP_FLAG {
prog.Nodes = append(prog.Nodes, p.parseArg())
}
return &ParseTree{
Nodes: []Node{
prog, p.parsePassthrough(),
},
}, nil
}
func (p *parser2) next() {
tracef("parser2.next() <- %v %q %v", p.tok, p.lit, p.pos)
defer func() {
tracef("parser2.next() -> %v %q %v", p.tok, p.lit, p.pos)
}()
p.tok, p.lit, p.pos = p.s.Scan()
}
func (p *parser2) parseArg() Node {
switch p.tok {
case ARG_DELIMITER:
p.next()
return &ArgDelimiter{}
case IDENT:
if _, ok := p.commands[p.lit]; ok {
return p.parseCommand()
}
return p.parseIdent()
case LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG:
return p.parseFlag()
}
pos := p.pos
lit := p.lit
p.advanceArg()
return &BadArg{Literal: lit, From: pos, To: p.pos}
}
func (p *parser2) advanceArg() {
for ; p.tok != EOL; p.next() {
switch p.tok {
case IDENT, LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG:
return
}
}
}
func (p *parser2) parseCommand() Node {
node := &Command{Name: p.lit, Values: map[string]string{}, Nodes: []Node{}}
for i := 0; p.tok != EOL; i++ {
p.next()
if _, ok := p.commands[p.lit]; ok {
break
}
switch p.tok {
case ARG_DELIMITER:
continue
case IDENT, STDIN_FLAG:
node.Values[fmt.Sprintf("%d", i)] = p.lit
case LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG:
node.Nodes = append(node.Nodes, p.parseFlag())
default:
break
}
}
return node
}
func (p *parser2) parseIdent() Node {
defer p.next()
node := &Ident{Literal: p.lit}
return node
}
func (p *parser2) parseFlag() Node {
defer p.next()
switch p.tok {
case SHORT_FLAG:
return p.parseShortFlag()
case LONG_FLAG:
return p.parseLongFlag()
case COMPOUND_SHORT_FLAG:
return p.parseCompoundShortFlag()
}
panic(fmt.Sprintf("token %v cannot be parsed as flag", p.tok))
}
func (p *parser2) parseShortFlag() Node {
node := &Flag{Name: string(p.lit[1])}
// TODO: moar stuff
return node
}
func (p *parser2) parseLongFlag() Node {
node := &Flag{Name: string(p.lit[2:])}
// TODO: moar stuff
return node
}
func (p *parser2) parseCompoundShortFlag() Node {
flagNodes := []Node{}
withoutFlagPrefix := p.lit[1:]
for _, r := range withoutFlagPrefix {
flagNodes = append(flagNodes, &Flag{Name: string(r)})
}
return &CompoundShortFlag{Nodes: flagNodes}
}
func (p *parser2) parsePassthrough() Node {
nodes := []Node{}
for ; p.tok != EOL; p.next() {
nodes = append(nodes, &Ident{Literal: p.lit})
}
return &PassthroughArgs{Nodes: nodes}
}

@ -0,0 +1,36 @@
package argh_test
import (
"testing"
"git.meatballhat.com/x/box-o-sand/argh"
"github.com/davecgh/go-spew/spew"
)
func TestParser2(t *testing.T) {
for _, tc := range []struct {
name string
args []string
commands []string
}{
{
name: "basic",
args: []string{
"pies", "-eat", "--wat", "hello",
},
commands: []string{
"hello",
},
},
} {
t.Run(tc.name, func(ct *testing.T) {
pt, err := argh.ParseArgs2(tc.args, tc.commands)
if err != nil {
ct.Logf("err=%+#v", err)
return
}
spew.Dump(pt)
})
}
}

@ -0,0 +1,37 @@
package argh
const (
OneOrMoreValue NValue = -2
ZeroOrMoreValue NValue = -1
ZeroValue NValue = 0
)
var (
DefaultParserConfig = &ParserConfig{
Commands: map[string]CommandConfig{},
Flags: map[string]FlagConfig{},
ScannerConfig: DefaultScannerConfig,
}
)
type NValue int
type ParserConfig struct {
Prog CommandConfig
Commands map[string]CommandConfig
Flags map[string]FlagConfig
ScannerConfig *ScannerConfig
}
type CommandConfig struct {
NValue NValue
ValueNames []string
Flags map[string]FlagConfig
Commands map[string]CommandConfig
}
type FlagConfig struct {
NValue NValue
ValueNames []string
}

@ -4,8 +4,10 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"errors" "errors"
"fmt"
"io" "io"
"log" "log"
"sort"
"unicode" "unicode"
) )
@ -34,6 +36,74 @@ type ScannerConfig struct {
MultiValueDelim rune MultiValueDelim rune
} }
// ScannerError is largely borrowed from go/scanner.Error
type ScannerError struct {
Pos Position
Msg string
}
func (e ScannerError) Error() string {
if e.Pos.IsValid() {
return e.Pos.String() + ":" + e.Msg
}
return e.Msg
}
// ScannerErrorList is largely borrowed from go/scanner.ErrorList
type ScannerErrorList []*ScannerError
func (el *ScannerErrorList) Add(pos Position, msg string) {
*el = append(*el, &ScannerError{Pos: pos, Msg: msg})
}
func (el *ScannerErrorList) Reset() { *el = (*el)[0:0] }
func (el ScannerErrorList) Len() int { return len(el) }
func (el ScannerErrorList) Swap(i, j int) { el[i], el[j] = el[j], el[i] }
func (el ScannerErrorList) Less(i, j int) bool {
e := &el[i].Pos
f := &el[j].Pos
if e.Column != f.Column {
return e.Column < f.Column
}
return el[i].Msg < el[j].Msg
}
func (el ScannerErrorList) Sort() {
sort.Sort(el)
}
func (el ScannerErrorList) Error() string {
switch len(el) {
case 0:
return "no errors"
case 1:
return el[0].Error()
}
return fmt.Sprintf("%s (and %d more errors)", el[0], len(el)-1)
}
func (el ScannerErrorList) Err() error {
if len(el) == 0 {
return nil
}
return el
}
func PrintScannerError(w io.Writer, err error) {
if list, ok := err.(ScannerErrorList); ok {
for _, e := range list {
fmt.Fprintf(w, "%s\n", e)
}
} else if err != nil {
fmt.Fprintf(w, "%s\n", err)
}
}
func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner { func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
if cfg == nil { if cfg == nil {
cfg = DefaultScannerConfig cfg = DefaultScannerConfig
@ -45,7 +115,7 @@ func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
} }
} }
func (s *Scanner) Scan() (Token, string, int) { func (s *Scanner) Scan() (Token, string, Pos) {
ch, pos := s.read() ch, pos := s.read()
if s.isBlankspace(ch) { if s.isBlankspace(ch) {
@ -77,24 +147,24 @@ func (s *Scanner) Scan() (Token, string, int) {
return ILLEGAL, string(ch), pos return ILLEGAL, string(ch), pos
} }
func (s *Scanner) read() (rune, int) { func (s *Scanner) read() (rune, Pos) {
ch, _, err := s.r.ReadRune() ch, _, err := s.r.ReadRune()
s.i++ s.i++
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
return eol, s.i return eol, Pos(s.i)
} else if err != nil { } else if err != nil {
log.Printf("unknown scanner error=%+v", err) log.Printf("unknown scanner error=%+v", err)
return eol, s.i return eol, Pos(s.i)
} }
return ch, s.i return ch, Pos(s.i)
} }
func (s *Scanner) unread() int { func (s *Scanner) unread() Pos {
_ = s.r.UnreadRune() _ = s.r.UnreadRune()
s.i-- s.i--
return s.i return Pos(s.i)
} }
func (s *Scanner) isBlankspace(ch rune) bool { func (s *Scanner) isBlankspace(ch rune) bool {
@ -117,7 +187,7 @@ func (s *Scanner) isAssignmentOperator(ch rune) bool {
return ch == s.cfg.AssignmentOperator return ch == s.cfg.AssignmentOperator
} }
func (s *Scanner) scanBlankspace() (Token, string, int) { func (s *Scanner) scanBlankspace() (Token, string, Pos) {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
ch, pos := s.read() ch, pos := s.read()
buf.WriteRune(ch) buf.WriteRune(ch)
@ -138,7 +208,7 @@ func (s *Scanner) scanBlankspace() (Token, string, int) {
return BS, buf.String(), pos return BS, buf.String(), pos
} }
func (s *Scanner) scanArg() (Token, string, int) { func (s *Scanner) scanArg() (Token, string, Pos) {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
ch, pos := s.read() ch, pos := s.read()
buf.WriteRune(ch) buf.WriteRune(ch)

@ -2,21 +2,49 @@
package argh package argh
import "fmt"
const ( const (
ILLEGAL Token = iota ILLEGAL Token = iota
EOL EOL
EMPTY EMPTY // ''
BS BS // ' ' '\t' '\n'
IDENT IDENT // char group without flag prefix: 'some' 'words'
ARG_DELIMITER ARG_DELIMITER // rune(0)
COMMAND ASSIGN // '='
ASSIGN MULTI_VALUE_DELIMITER // ','
MULTI_VALUE_DELIMITER LONG_FLAG // char group with double flag prefix: '--flag'
LONG_FLAG SHORT_FLAG // single char with single flag prefix: '-f'
SHORT_FLAG COMPOUND_SHORT_FLAG // char group with single flag prefix: '-flag'
COMPOUND_SHORT_FLAG STDIN_FLAG // '-'
STDIN_FLAG STOP_FLAG // '--'
STOP_FLAG
) )
type Token int type Token int
// Position is adapted from go/token.Position
type Position struct {
Column int
}
func (p *Position) IsValid() bool { return p.Column > 0 }
func (p Position) String() string {
s := ""
if p.IsValid() {
s = fmt.Sprintf("%d", p.Column)
}
if s == "" {
s = "-"
}
return s
}
// Pos is borrowed from go/token.Pos
type Pos int
const NoPos Pos = 0
func (p Pos) IsValid() bool {
return p != NoPos
}

@ -14,19 +14,18 @@ func _() {
_ = x[BS-3] _ = x[BS-3]
_ = x[IDENT-4] _ = x[IDENT-4]
_ = x[ARG_DELIMITER-5] _ = x[ARG_DELIMITER-5]
_ = x[COMMAND-6] _ = x[ASSIGN-6]
_ = x[ASSIGN-7] _ = x[MULTI_VALUE_DELIMITER-7]
_ = x[MULTI_VALUE_DELIMITER-8] _ = x[LONG_FLAG-8]
_ = x[LONG_FLAG-9] _ = x[SHORT_FLAG-9]
_ = x[SHORT_FLAG-10] _ = x[COMPOUND_SHORT_FLAG-10]
_ = x[COMPOUND_SHORT_FLAG-11] _ = x[STDIN_FLAG-11]
_ = x[STDIN_FLAG-12] _ = x[STOP_FLAG-12]
_ = x[STOP_FLAG-13]
} }
const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERCOMMANDASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG" const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG"
var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 42, 48, 69, 78, 88, 107, 117, 126} var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 41, 62, 71, 81, 100, 110, 119}
func (i Token) String() string { func (i Token) String() string {
if i < 0 || i >= Token(len(_Token_index)-1) { if i < 0 || i >= Token(len(_Token_index)-1) {

Loading…
Cancel
Save