diff --git a/node.go b/node.go index e35c1cc..f5bddf0 100644 --- a/node.go +++ b/node.go @@ -7,6 +7,10 @@ type TypedNode struct { Node Node `json:"node"` } +type PassthroughArgs struct { + Nodes []Node `json:"nodes"` +} + type CompoundShortFlag struct { Nodes []Node `json:"nodes"` } @@ -21,6 +25,12 @@ type Ident struct { Literal string `json:"literal"` } +type BadArg struct { + Literal string + From Pos + To Pos +} + type Command struct { Name string `json:"name"` Values map[string]string `json:"values"` diff --git a/parser.go b/parser.go index dc7409f..713356a 100644 --- a/parser.go +++ b/parser.go @@ -10,24 +10,10 @@ import ( "github.com/pkg/errors" ) -const ( - OneOrMoreValue NValue = -2 - ZeroOrMoreValue NValue = -1 - ZeroValue NValue = 0 -) - var ( ErrSyntax = errors.New("syntax error") - - DefaultParserConfig = &ParserConfig{ - Commands: map[string]CommandConfig{}, - Flags: map[string]FlagConfig{}, - ScannerConfig: DefaultScannerConfig, - } ) -type NValue int - func ParseArgs(args []string, pCfg *ParserConfig) (*ParseTree, error) { reEncoded := strings.Join(args, string(nul)) @@ -55,26 +41,7 @@ type ParseTree struct { type scanEntry struct { tok Token lit string - pos int -} - -type ParserConfig struct { - Prog CommandConfig - Commands map[string]CommandConfig - Flags map[string]FlagConfig - - ScannerConfig *ScannerConfig -} - -type CommandConfig struct { - NValue NValue - ValueNames []string - Flags map[string]FlagConfig -} - -type FlagConfig struct { - NValue NValue - ValueNames []string + pos Pos } func NewParser(r io.Reader, pCfg *ParserConfig) *Parser { @@ -314,7 +281,7 @@ func (p *Parser) scanIdent() (string, error) { unscanBuf := []scanEntry{} if tok == ASSIGN || tok == ARG_DELIMITER { - entry := scanEntry{tok: tok, lit: lit, pos: pos} + entry := scanEntry{tok: tok, lit: lit, pos: Pos(pos)} tracef("scanIdent tok=%s; scanning next and pushing to unscan buffer entry=%+#v", tok, entry) @@ -327,7 +294,7 @@ func (p *Parser) scanIdent() (string, error) { return lit, nil } - entry := scanEntry{tok: tok, lit: lit, pos: pos} + entry := scanEntry{tok: tok, lit: lit, pos: Pos(pos)} tracef("scanIdent tok=%s; unscanning entry=%+#v", tok, entry) @@ -340,7 +307,7 @@ func (p *Parser) scanIdent() (string, error) { return "", errors.Wrapf(ErrSyntax, "expected ident at pos=%v but got %s (%q)", pos, tok, lit) } -func (p *Parser) scan() (Token, string, int) { +func (p *Parser) scan() (Token, string, Pos) { if len(p.buf) != 0 { entry, buf := p.buf[len(p.buf)-1], p.buf[:len(p.buf)-1] p.buf = buf @@ -356,7 +323,7 @@ func (p *Parser) scan() (Token, string, int) { return tok, lit, pos } -func (p *Parser) unscan(tok Token, lit string, pos int) { +func (p *Parser) unscan(tok Token, lit string, pos Pos) { entry := scanEntry{tok: tok, lit: lit, pos: pos} tracef("unscan entry=%s %+#v", tok, entry) diff --git a/parser2.go b/parser2.go new file mode 100644 index 0000000..f428c5d --- /dev/null +++ b/parser2.go @@ -0,0 +1,188 @@ +package argh + +import ( + "fmt" + "io" + "strings" +) + +type parser2 struct { + s *Scanner + + commands map[string]struct{} + + errors ScannerErrorList + + tok Token + lit string + pos Pos +} + +func ParseArgs2(args, commands []string) (*ParseTree, error) { + parser := &parser2{} + parser.init( + strings.NewReader(strings.Join(args, string(nul))), + commands, + ) + + tracef("ParseArgs2 parser=%+#v", parser) + + return parser.parseArgs() +} + +func (p *parser2) init(r io.Reader, commands []string) { + p.errors = ScannerErrorList{} + commandMap := map[string]struct{}{} + + for _, c := range commands { + commandMap[c] = struct{}{} + } + + p.s = NewScanner(r, nil) + p.commands = commandMap + + p.next() +} + +func (p *parser2) parseArgs() (*ParseTree, error) { + if p.errors.Len() != 0 { + tracef("parseArgs bailing due to initial error") + return nil, p.errors.Err() + } + + prog := &Program{ + Name: p.lit, + Values: map[string]string{}, + Nodes: []Node{}, + } + p.next() + + for p.tok != EOL && p.tok != STOP_FLAG { + prog.Nodes = append(prog.Nodes, p.parseArg()) + } + + return &ParseTree{ + Nodes: []Node{ + prog, p.parsePassthrough(), + }, + }, nil +} + +func (p *parser2) next() { + tracef("parser2.next() <- %v %q %v", p.tok, p.lit, p.pos) + defer func() { + tracef("parser2.next() -> %v %q %v", p.tok, p.lit, p.pos) + }() + + p.tok, p.lit, p.pos = p.s.Scan() +} + +func (p *parser2) parseArg() Node { + switch p.tok { + case ARG_DELIMITER: + p.next() + return &ArgDelimiter{} + case IDENT: + if _, ok := p.commands[p.lit]; ok { + return p.parseCommand() + } + return p.parseIdent() + case LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG: + return p.parseFlag() + } + + pos := p.pos + lit := p.lit + p.advanceArg() + return &BadArg{Literal: lit, From: pos, To: p.pos} +} + +func (p *parser2) advanceArg() { + for ; p.tok != EOL; p.next() { + switch p.tok { + case IDENT, LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG: + return + } + } +} + +func (p *parser2) parseCommand() Node { + node := &Command{Name: p.lit, Values: map[string]string{}, Nodes: []Node{}} + + for i := 0; p.tok != EOL; i++ { + p.next() + + if _, ok := p.commands[p.lit]; ok { + break + } + + switch p.tok { + case ARG_DELIMITER: + continue + case IDENT, STDIN_FLAG: + node.Values[fmt.Sprintf("%d", i)] = p.lit + case LONG_FLAG, SHORT_FLAG, COMPOUND_SHORT_FLAG: + node.Nodes = append(node.Nodes, p.parseFlag()) + default: + break + } + } + + return node +} + +func (p *parser2) parseIdent() Node { + defer p.next() + + node := &Ident{Literal: p.lit} + return node +} + +func (p *parser2) parseFlag() Node { + defer p.next() + + switch p.tok { + case SHORT_FLAG: + return p.parseShortFlag() + case LONG_FLAG: + return p.parseLongFlag() + case COMPOUND_SHORT_FLAG: + return p.parseCompoundShortFlag() + } + + panic(fmt.Sprintf("token %v cannot be parsed as flag", p.tok)) +} + +func (p *parser2) parseShortFlag() Node { + node := &Flag{Name: string(p.lit[1])} + // TODO: moar stuff + return node +} + +func (p *parser2) parseLongFlag() Node { + node := &Flag{Name: string(p.lit[2:])} + // TODO: moar stuff + return node +} + +func (p *parser2) parseCompoundShortFlag() Node { + flagNodes := []Node{} + + withoutFlagPrefix := p.lit[1:] + + for _, r := range withoutFlagPrefix { + flagNodes = append(flagNodes, &Flag{Name: string(r)}) + } + + return &CompoundShortFlag{Nodes: flagNodes} +} + +func (p *parser2) parsePassthrough() Node { + nodes := []Node{} + + for ; p.tok != EOL; p.next() { + nodes = append(nodes, &Ident{Literal: p.lit}) + } + + return &PassthroughArgs{Nodes: nodes} +} diff --git a/parser2_test.go b/parser2_test.go new file mode 100644 index 0000000..7679d03 --- /dev/null +++ b/parser2_test.go @@ -0,0 +1,36 @@ +package argh_test + +import ( + "testing" + + "git.meatballhat.com/x/box-o-sand/argh" + "github.com/davecgh/go-spew/spew" +) + +func TestParser2(t *testing.T) { + for _, tc := range []struct { + name string + args []string + commands []string + }{ + { + name: "basic", + args: []string{ + "pies", "-eat", "--wat", "hello", + }, + commands: []string{ + "hello", + }, + }, + } { + t.Run(tc.name, func(ct *testing.T) { + pt, err := argh.ParseArgs2(tc.args, tc.commands) + if err != nil { + ct.Logf("err=%+#v", err) + return + } + + spew.Dump(pt) + }) + } +} diff --git a/parser_config.go b/parser_config.go new file mode 100644 index 0000000..6b735db --- /dev/null +++ b/parser_config.go @@ -0,0 +1,37 @@ +package argh + +const ( + OneOrMoreValue NValue = -2 + ZeroOrMoreValue NValue = -1 + ZeroValue NValue = 0 +) + +var ( + DefaultParserConfig = &ParserConfig{ + Commands: map[string]CommandConfig{}, + Flags: map[string]FlagConfig{}, + ScannerConfig: DefaultScannerConfig, + } +) + +type NValue int + +type ParserConfig struct { + Prog CommandConfig + Commands map[string]CommandConfig + Flags map[string]FlagConfig + + ScannerConfig *ScannerConfig +} + +type CommandConfig struct { + NValue NValue + ValueNames []string + Flags map[string]FlagConfig + Commands map[string]CommandConfig +} + +type FlagConfig struct { + NValue NValue + ValueNames []string +} diff --git a/scanner.go b/scanner.go index 27f01d0..c073ad0 100644 --- a/scanner.go +++ b/scanner.go @@ -4,8 +4,10 @@ import ( "bufio" "bytes" "errors" + "fmt" "io" "log" + "sort" "unicode" ) @@ -34,6 +36,74 @@ type ScannerConfig struct { MultiValueDelim rune } +// ScannerError is largely borrowed from go/scanner.Error +type ScannerError struct { + Pos Position + Msg string +} + +func (e ScannerError) Error() string { + if e.Pos.IsValid() { + return e.Pos.String() + ":" + e.Msg + } + return e.Msg +} + +// ScannerErrorList is largely borrowed from go/scanner.ErrorList +type ScannerErrorList []*ScannerError + +func (el *ScannerErrorList) Add(pos Position, msg string) { + *el = append(*el, &ScannerError{Pos: pos, Msg: msg}) +} + +func (el *ScannerErrorList) Reset() { *el = (*el)[0:0] } + +func (el ScannerErrorList) Len() int { return len(el) } + +func (el ScannerErrorList) Swap(i, j int) { el[i], el[j] = el[j], el[i] } + +func (el ScannerErrorList) Less(i, j int) bool { + e := &el[i].Pos + f := &el[j].Pos + + if e.Column != f.Column { + return e.Column < f.Column + } + + return el[i].Msg < el[j].Msg +} + +func (el ScannerErrorList) Sort() { + sort.Sort(el) +} + +func (el ScannerErrorList) Error() string { + switch len(el) { + case 0: + return "no errors" + case 1: + return el[0].Error() + } + return fmt.Sprintf("%s (and %d more errors)", el[0], len(el)-1) +} + +func (el ScannerErrorList) Err() error { + if len(el) == 0 { + return nil + } + return el +} + +func PrintScannerError(w io.Writer, err error) { + if list, ok := err.(ScannerErrorList); ok { + for _, e := range list { + fmt.Fprintf(w, "%s\n", e) + } + } else if err != nil { + fmt.Fprintf(w, "%s\n", err) + } +} + func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner { if cfg == nil { cfg = DefaultScannerConfig @@ -45,7 +115,7 @@ func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner { } } -func (s *Scanner) Scan() (Token, string, int) { +func (s *Scanner) Scan() (Token, string, Pos) { ch, pos := s.read() if s.isBlankspace(ch) { @@ -77,24 +147,24 @@ func (s *Scanner) Scan() (Token, string, int) { return ILLEGAL, string(ch), pos } -func (s *Scanner) read() (rune, int) { +func (s *Scanner) read() (rune, Pos) { ch, _, err := s.r.ReadRune() s.i++ if errors.Is(err, io.EOF) { - return eol, s.i + return eol, Pos(s.i) } else if err != nil { log.Printf("unknown scanner error=%+v", err) - return eol, s.i + return eol, Pos(s.i) } - return ch, s.i + return ch, Pos(s.i) } -func (s *Scanner) unread() int { +func (s *Scanner) unread() Pos { _ = s.r.UnreadRune() s.i-- - return s.i + return Pos(s.i) } func (s *Scanner) isBlankspace(ch rune) bool { @@ -117,7 +187,7 @@ func (s *Scanner) isAssignmentOperator(ch rune) bool { return ch == s.cfg.AssignmentOperator } -func (s *Scanner) scanBlankspace() (Token, string, int) { +func (s *Scanner) scanBlankspace() (Token, string, Pos) { buf := &bytes.Buffer{} ch, pos := s.read() buf.WriteRune(ch) @@ -138,7 +208,7 @@ func (s *Scanner) scanBlankspace() (Token, string, int) { return BS, buf.String(), pos } -func (s *Scanner) scanArg() (Token, string, int) { +func (s *Scanner) scanArg() (Token, string, Pos) { buf := &bytes.Buffer{} ch, pos := s.read() buf.WriteRune(ch) diff --git a/token.go b/token.go index ec3f758..9e70b81 100644 --- a/token.go +++ b/token.go @@ -2,21 +2,49 @@ package argh +import "fmt" + const ( ILLEGAL Token = iota EOL - EMPTY - BS - IDENT - ARG_DELIMITER - COMMAND - ASSIGN - MULTI_VALUE_DELIMITER - LONG_FLAG - SHORT_FLAG - COMPOUND_SHORT_FLAG - STDIN_FLAG - STOP_FLAG + EMPTY // '' + BS // ' ' '\t' '\n' + IDENT // char group without flag prefix: 'some' 'words' + ARG_DELIMITER // rune(0) + ASSIGN // '=' + MULTI_VALUE_DELIMITER // ',' + LONG_FLAG // char group with double flag prefix: '--flag' + SHORT_FLAG // single char with single flag prefix: '-f' + COMPOUND_SHORT_FLAG // char group with single flag prefix: '-flag' + STDIN_FLAG // '-' + STOP_FLAG // '--' ) type Token int + +// Position is adapted from go/token.Position +type Position struct { + Column int +} + +func (p *Position) IsValid() bool { return p.Column > 0 } + +func (p Position) String() string { + s := "" + if p.IsValid() { + s = fmt.Sprintf("%d", p.Column) + } + if s == "" { + s = "-" + } + return s +} + +// Pos is borrowed from go/token.Pos +type Pos int + +const NoPos Pos = 0 + +func (p Pos) IsValid() bool { + return p != NoPos +} diff --git a/token_string.go b/token_string.go index 8c1b585..ff6a07e 100644 --- a/token_string.go +++ b/token_string.go @@ -14,19 +14,18 @@ func _() { _ = x[BS-3] _ = x[IDENT-4] _ = x[ARG_DELIMITER-5] - _ = x[COMMAND-6] - _ = x[ASSIGN-7] - _ = x[MULTI_VALUE_DELIMITER-8] - _ = x[LONG_FLAG-9] - _ = x[SHORT_FLAG-10] - _ = x[COMPOUND_SHORT_FLAG-11] - _ = x[STDIN_FLAG-12] - _ = x[STOP_FLAG-13] + _ = x[ASSIGN-6] + _ = x[MULTI_VALUE_DELIMITER-7] + _ = x[LONG_FLAG-8] + _ = x[SHORT_FLAG-9] + _ = x[COMPOUND_SHORT_FLAG-10] + _ = x[STDIN_FLAG-11] + _ = x[STOP_FLAG-12] } -const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERCOMMANDASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG" +const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG" -var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 42, 48, 69, 78, 88, 107, 117, 126} +var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 41, 62, 71, 81, 100, 110, 119} func (i Token) String() string { if i < 0 || i >= Token(len(_Token_index)-1) {