Making a mess with a parser that works more like go/parser

This commit is contained in:
Dan Buch 2022-05-22 08:47:45 -04:00
parent 58842504c4
commit f2e0de1b66
Signed by: meatballhat
GPG Key ID: A12F782281063434
8 changed files with 404 additions and 69 deletions

View File

@ -7,6 +7,10 @@ type TypedNode struct {
Node Node `json:"node"`
}
type PassthroughArgs struct {
Nodes []Node `json:"nodes"`
}
type CompoundShortFlag struct {
Nodes []Node `json:"nodes"`
}
@ -21,6 +25,12 @@ type Ident struct {
Literal string `json:"literal"`
}
type BadArg struct {
Literal string
From Pos
To Pos
}
type Command struct {
Name string `json:"name"`
Values map[string]string `json:"values"`

View File

@ -10,24 +10,10 @@ import (
"github.com/pkg/errors"
)
const (
OneOrMoreValue NValue = -2
ZeroOrMoreValue NValue = -1
ZeroValue NValue = 0
)
var (
ErrSyntax = errors.New("syntax error")
DefaultParserConfig = &ParserConfig{
Commands: map[string]CommandConfig{},
Flags: map[string]FlagConfig{},
ScannerConfig: DefaultScannerConfig,
}
)
type NValue int
func ParseArgs(args []string, pCfg *ParserConfig) (*ParseTree, error) {
reEncoded := strings.Join(args, string(nul))
@ -55,26 +41,7 @@ type ParseTree struct {
type scanEntry struct {
tok Token
lit string
pos int
}
type ParserConfig struct {
Prog CommandConfig
Commands map[string]CommandConfig
Flags map[string]FlagConfig
ScannerConfig *ScannerConfig
}
type CommandConfig struct {
NValue NValue
ValueNames []string
Flags map[string]FlagConfig
}
type FlagConfig struct {
NValue NValue
ValueNames []string
pos Pos
}
func NewParser(r io.Reader, pCfg *ParserConfig) *Parser {
@ -314,7 +281,7 @@ func (p *Parser) scanIdent() (string, error) {
unscanBuf := []scanEntry{}
if tok == ASSIGN || tok == ARG_DELIMITER {
entry := scanEntry{tok: tok, lit: lit, pos: pos}
entry := scanEntry{tok: tok, lit: lit, pos: Pos(pos)}
tracef("scanIdent tok=%s; scanning next and pushing to unscan buffer entry=%+#v", tok, entry)
@ -327,7 +294,7 @@ func (p *Parser) scanIdent() (string, error) {
return lit, nil
}
entry := scanEntry{tok: tok, lit: lit, pos: pos}
entry := scanEntry{tok: tok, lit: lit, pos: Pos(pos)}
tracef("scanIdent tok=%s; unscanning entry=%+#v", tok, entry)
@ -340,7 +307,7 @@ func (p *Parser) scanIdent() (string, error) {
return "", errors.Wrapf(ErrSyntax, "expected ident at pos=%v but got %s (%q)", pos, tok, lit)
}
func (p *Parser) scan() (Token, string, int) {
func (p *Parser) scan() (Token, string, Pos) {
if len(p.buf) != 0 {
entry, buf := p.buf[len(p.buf)-1], p.buf[:len(p.buf)-1]
p.buf = buf
@ -356,7 +323,7 @@ func (p *Parser) scan() (Token, string, int) {
return tok, lit, pos
}
func (p *Parser) unscan(tok Token, lit string, pos int) {
func (p *Parser) unscan(tok Token, lit string, pos Pos) {
entry := scanEntry{tok: tok, lit: lit, pos: pos}
tracef("unscan entry=%s %+#v", tok, entry)

188
argh/parser2.go Normal file
View File

@ -0,0 +1,188 @@
package argh
import (
"fmt"
"io"
"strings"
)
type parser2 struct {
s *Scanner
commands map[string]struct{}
errors ScannerErrorList
tok Token
lit string
pos Pos
}
func ParseArgs2(args, commands []string) (*ParseTree, error) {
parser := &parser2{}
parser.init(
strings.NewReader(strings.Join(args, string(nul))),
commands,
)
tracef("ParseArgs2 parser=%+#v", parser)
return parser.parseArgs()
}
func (p *parser2) init(r io.Reader, commands []string) {
p.errors = ScannerErrorList{}
commandMap := map[string]struct{}{}
for _, c := range commands {
commandMap[c] = struct{}{}
}
p.s = NewScanner(r, nil)
p.commands = commandMap
p.next()
}
func (p *parser2) parseArgs() (*ParseTree, error) {
if p.errors.Len() != 0 {
tracef("parseArgs bailing due to initial error")
return nil, p.errors.Err()
}
prog := &Program{
Name: p.lit,
Values: map[string]string{},
Nodes: []Node{},
}
p.next()
for p.tok != EOL && p.tok != STOP_FLAG {
prog.Nodes = append(prog.Nodes, p.parseArg())
}
return &ParseTree{
Nodes: []Node{
prog, p.parsePassthrough(),
},
}, nil
}
func (p *parser2) next() {
tracef("parser2.next() <- %v %q %v", p.tok, p.lit, p.pos)
defer func() {
tracef("parser2.next() -> %v %q %v", p.tok, p.lit, p.pos)
}()
p.tok, p.lit, p.pos = p.s.Scan()
}
func (p *parser2) parseArg() Node {
switch p.tok {
case ARG_DELIMITER:
p.next()
return &ArgDelimiter{}
case IDENT:
if _, ok := p.commands[p.lit]; ok {
return p.parseCommand()
}
return p.parseIdent()
case LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG:
return p.parseFlag()
}
pos := p.pos
lit := p.lit
p.advanceArg()
return &BadArg{Literal: lit, From: pos, To: p.pos}
}
func (p *parser2) advanceArg() {
for ; p.tok != EOL; p.next() {
switch p.tok {
case IDENT, LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG:
return
}
}
}
func (p *parser2) parseCommand() Node {
node := &Command{Name: p.lit, Values: map[string]string{}, Nodes: []Node{}}
for i := 0; p.tok != EOL; i++ {
p.next()
if _, ok := p.commands[p.lit]; ok {
break
}
switch p.tok {
case ARG_DELIMITER:
continue
case IDENT, STDIN_FLAG:
node.Values[fmt.Sprintf("%d", i)] = p.lit
case LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG:
node.Nodes = append(node.Nodes, p.parseFlag())
default:
break
}
}
return node
}
func (p *parser2) parseIdent() Node {
defer p.next()
node := &Ident{Literal: p.lit}
return node
}
func (p *parser2) parseFlag() Node {
defer p.next()
switch p.tok {
case SHORT_FLAG:
return p.parseShortFlag()
case LONG_FLAG:
return p.parseLongFlag()
case COMPOUND_SHORT_FLAG:
return p.parseCompoundShortFlag()
}
panic(fmt.Sprintf("token %v cannot be parsed as flag", p.tok))
}
func (p *parser2) parseShortFlag() Node {
node := &Flag{Name: string(p.lit[1])}
// TODO: moar stuff
return node
}
func (p *parser2) parseLongFlag() Node {
node := &Flag{Name: string(p.lit[2:])}
// TODO: moar stuff
return node
}
func (p *parser2) parseCompoundShortFlag() Node {
flagNodes := []Node{}
withoutFlagPrefix := p.lit[1:]
for _, r := range withoutFlagPrefix {
flagNodes = append(flagNodes, &Flag{Name: string(r)})
}
return &CompoundShortFlag{Nodes: flagNodes}
}
func (p *parser2) parsePassthrough() Node {
nodes := []Node{}
for ; p.tok != EOL; p.next() {
nodes = append(nodes, &Ident{Literal: p.lit})
}
return &PassthroughArgs{Nodes: nodes}
}

36
argh/parser2_test.go Normal file
View File

@ -0,0 +1,36 @@
package argh_test
import (
"testing"
"git.meatballhat.com/x/box-o-sand/argh"
"github.com/davecgh/go-spew/spew"
)
func TestParser2(t *testing.T) {
for _, tc := range []struct {
name string
args []string
commands []string
}{
{
name: "basic",
args: []string{
"pies", "-eat", "--wat", "hello",
},
commands: []string{
"hello",
},
},
} {
t.Run(tc.name, func(ct *testing.T) {
pt, err := argh.ParseArgs2(tc.args, tc.commands)
if err != nil {
ct.Logf("err=%+#v", err)
return
}
spew.Dump(pt)
})
}
}

37
argh/parser_config.go Normal file
View File

@ -0,0 +1,37 @@
package argh
const (
OneOrMoreValue NValue = -2
ZeroOrMoreValue NValue = -1
ZeroValue NValue = 0
)
var (
DefaultParserConfig = &ParserConfig{
Commands: map[string]CommandConfig{},
Flags: map[string]FlagConfig{},
ScannerConfig: DefaultScannerConfig,
}
)
type NValue int
type ParserConfig struct {
Prog CommandConfig
Commands map[string]CommandConfig
Flags map[string]FlagConfig
ScannerConfig *ScannerConfig
}
type CommandConfig struct {
NValue NValue
ValueNames []string
Flags map[string]FlagConfig
Commands map[string]CommandConfig
}
type FlagConfig struct {
NValue NValue
ValueNames []string
}

View File

@ -4,8 +4,10 @@ import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"log"
"sort"
"unicode"
)
@ -34,6 +36,74 @@ type ScannerConfig struct {
MultiValueDelim rune
}
// ScannerError is largely borrowed from go/scanner.Error
type ScannerError struct {
Pos Position
Msg string
}
func (e ScannerError) Error() string {
if e.Pos.IsValid() {
return e.Pos.String() + ":" + e.Msg
}
return e.Msg
}
// ScannerErrorList is largely borrowed from go/scanner.ErrorList
type ScannerErrorList []*ScannerError
func (el *ScannerErrorList) Add(pos Position, msg string) {
*el = append(*el, &ScannerError{Pos: pos, Msg: msg})
}
func (el *ScannerErrorList) Reset() { *el = (*el)[0:0] }
func (el ScannerErrorList) Len() int { return len(el) }
func (el ScannerErrorList) Swap(i, j int) { el[i], el[j] = el[j], el[i] }
func (el ScannerErrorList) Less(i, j int) bool {
e := &el[i].Pos
f := &el[j].Pos
if e.Column != f.Column {
return e.Column < f.Column
}
return el[i].Msg < el[j].Msg
}
func (el ScannerErrorList) Sort() {
sort.Sort(el)
}
func (el ScannerErrorList) Error() string {
switch len(el) {
case 0:
return "no errors"
case 1:
return el[0].Error()
}
return fmt.Sprintf("%s (and %d more errors)", el[0], len(el)-1)
}
func (el ScannerErrorList) Err() error {
if len(el) == 0 {
return nil
}
return el
}
func PrintScannerError(w io.Writer, err error) {
if list, ok := err.(ScannerErrorList); ok {
for _, e := range list {
fmt.Fprintf(w, "%s\n", e)
}
} else if err != nil {
fmt.Fprintf(w, "%s\n", err)
}
}
func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
if cfg == nil {
cfg = DefaultScannerConfig
@ -45,7 +115,7 @@ func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
}
}
func (s *Scanner) Scan() (Token, string, int) {
func (s *Scanner) Scan() (Token, string, Pos) {
ch, pos := s.read()
if s.isBlankspace(ch) {
@ -77,24 +147,24 @@ func (s *Scanner) Scan() (Token, string, int) {
return ILLEGAL, string(ch), pos
}
func (s *Scanner) read() (rune, int) {
func (s *Scanner) read() (rune, Pos) {
ch, _, err := s.r.ReadRune()
s.i++
if errors.Is(err, io.EOF) {
return eol, s.i
return eol, Pos(s.i)
} else if err != nil {
log.Printf("unknown scanner error=%+v", err)
return eol, s.i
return eol, Pos(s.i)
}
return ch, s.i
return ch, Pos(s.i)
}
func (s *Scanner) unread() int {
func (s *Scanner) unread() Pos {
_ = s.r.UnreadRune()
s.i--
return s.i
return Pos(s.i)
}
func (s *Scanner) isBlankspace(ch rune) bool {
@ -117,7 +187,7 @@ func (s *Scanner) isAssignmentOperator(ch rune) bool {
return ch == s.cfg.AssignmentOperator
}
func (s *Scanner) scanBlankspace() (Token, string, int) {
func (s *Scanner) scanBlankspace() (Token, string, Pos) {
buf := &bytes.Buffer{}
ch, pos := s.read()
buf.WriteRune(ch)
@ -138,7 +208,7 @@ func (s *Scanner) scanBlankspace() (Token, string, int) {
return BS, buf.String(), pos
}
func (s *Scanner) scanArg() (Token, string, int) {
func (s *Scanner) scanArg() (Token, string, Pos) {
buf := &bytes.Buffer{}
ch, pos := s.read()
buf.WriteRune(ch)

View File

@ -2,21 +2,49 @@
package argh
import "fmt"
const (
ILLEGAL Token = iota
EOL
EMPTY
BS
IDENT
ARG_DELIMITER
COMMAND
ASSIGN
MULTI_VALUE_DELIMITER
LONG_FLAG
SHORT_FLAG
COMPOUND_SHORT_FLAG
STDIN_FLAG
STOP_FLAG
EMPTY // ''
BS // ' ' '\t' '\n'
IDENT // char group without flag prefix: 'some' 'words'
ARG_DELIMITER // rune(0)
ASSIGN // '='
MULTI_VALUE_DELIMITER // ','
LONG_FLAG // char group with double flag prefix: '--flag'
SHORT_FLAG // single char with single flag prefix: '-f'
COMPOUND_SHORT_FLAG // char group with single flag prefix: '-flag'
STDIN_FLAG // '-'
STOP_FLAG // '--'
)
type Token int
// Position is adapted from go/token.Position
type Position struct {
Column int
}
func (p *Position) IsValid() bool { return p.Column > 0 }
func (p Position) String() string {
s := ""
if p.IsValid() {
s = fmt.Sprintf("%d", p.Column)
}
if s == "" {
s = "-"
}
return s
}
// Pos is borrowed from go/token.Pos
type Pos int
const NoPos Pos = 0
func (p Pos) IsValid() bool {
return p != NoPos
}

View File

@ -14,19 +14,18 @@ func _() {
_ = x[BS-3]
_ = x[IDENT-4]
_ = x[ARG_DELIMITER-5]
_ = x[COMMAND-6]
_ = x[ASSIGN-7]
_ = x[MULTI_VALUE_DELIMITER-8]
_ = x[LONG_FLAG-9]
_ = x[SHORT_FLAG-10]
_ = x[COMPOUND_SHORT_FLAG-11]
_ = x[STDIN_FLAG-12]
_ = x[STOP_FLAG-13]
_ = x[ASSIGN-6]
_ = x[MULTI_VALUE_DELIMITER-7]
_ = x[LONG_FLAG-8]
_ = x[SHORT_FLAG-9]
_ = x[COMPOUND_SHORT_FLAG-10]
_ = x[STDIN_FLAG-11]
_ = x[STOP_FLAG-12]
}
const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERCOMMANDASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG"
const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG"
var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 42, 48, 69, 78, 88, 107, 117, 126}
var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 41, 62, 71, 81, 100, 110, 119}
func (i Token) String() string {
if i < 0 || i >= Token(len(_Token_index)-1) {