From 451be7bf31d3f0eb1f7702db47e92865c61cc608 Mon Sep 17 00:00:00 2001 From: Dan Buch Date: Wed, 11 May 2022 22:11:05 -0400 Subject: [PATCH] Making a mess with command line parsing --- argh.go | 5 ++ ast.go | 10 +++ cmd/argh/main.go | 24 ++++++ go.mod | 5 ++ go.sum | 2 + parser.go | 82 ++++++++++++++++++++ scanner.go | 190 +++++++++++++++++++++++++++++++++++++++++++++++ token.go | 22 ++++++ token_string.go | 36 +++++++++ 9 files changed, 376 insertions(+) create mode 100644 argh.go create mode 100644 ast.go create mode 100644 cmd/argh/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 parser.go create mode 100644 scanner.go create mode 100644 token.go create mode 100644 token_string.go diff --git a/argh.go b/argh.go new file mode 100644 index 0000000..085cc76 --- /dev/null +++ b/argh.go @@ -0,0 +1,5 @@ +package argh + +type Argh struct { + AST *AST +} diff --git a/ast.go b/ast.go new file mode 100644 index 0000000..30834f8 --- /dev/null +++ b/ast.go @@ -0,0 +1,10 @@ +package argh + +type AST struct { + Nodes []*Node `json:"nodes"` +} + +type Node struct { + Token string `json:"token"` + Literal string `json:"literal"` +} diff --git a/cmd/argh/main.go b/cmd/argh/main.go new file mode 100644 index 0000000..2b77e52 --- /dev/null +++ b/cmd/argh/main.go @@ -0,0 +1,24 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "os" + + "git.meatballhat.com/x/box-o-sand/argh" +) + +func main() { + ast, err := argh.ParseArgs(os.Args) + if err != nil { + log.Fatal(err) + } + + b, err := json.MarshalIndent(ast, "", " ") + if err != nil { + log.Fatal(err) + } + + fmt.Println(string(b)) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..f70b654 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module git.meatballhat.com/x/box-o-sand/argh + +go 1.18 + +require github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..7c401c3 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..cf74b87 --- /dev/null +++ b/parser.go @@ -0,0 +1,82 @@ +package argh + +import ( + "io" + "strings" + + "github.com/pkg/errors" +) + +// NOTE: much of this is lifted from +// https://blog.gopheracademy.com/advent-2014/parsers-lexers/ + +var ( + errSyntax = errors.New("syntax error") +) + +func ParseArgs(args []string) (*Argh, error) { + reEncoded := strings.Join(args, string(nul)) + + return NewParser( + strings.NewReader(reEncoded), + nil, + ).Parse() +} + +type Parser struct { + s *Scanner + buf ParserBuffer +} + +type ParserBuffer struct { + tok Token + lit string + n int +} + +func NewParser(r io.Reader, cfg *ScannerConfig) *Parser { + return &Parser{s: NewScanner(r, cfg)} +} + +func (p *Parser) Parse() (*Argh, error) { + arghOut := &Argh{ + AST: &AST{ + Nodes: []*Node{}, + }, + } + + for { + tok, lit := p.scan() + if tok == ILLEGAL { + return nil, errors.Wrapf(errSyntax, "illegal value %q", lit) + } + + if tok == EOL { + break + } + + arghOut.AST.Nodes = append( + arghOut.AST.Nodes, + &Node{Token: tok.String(), Literal: lit}, + ) + } + + return arghOut, nil +} + +func (p *Parser) scan() (Token, string) { + if p.buf.n != 0 { + p.buf.n = 0 + return p.buf.tok, p.buf.lit + } + + tok, lit := p.s.Scan() + + p.buf.tok, p.buf.lit = tok, lit + + return tok, lit +} + +func (p *Parser) unscan() { + p.buf.n = 1 +} diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..02e1267 --- /dev/null +++ b/scanner.go @@ -0,0 +1,190 @@ +package argh + +// NOTE: much of this is lifted from +// https://blog.gopheracademy.com/advent-2014/parsers-lexers/ + +import ( + "bufio" + "bytes" + "errors" + "io" + "log" + "unicode" +) + +const ( + nul = rune(0) + eol = rune(-1) +) + +var ( + DefaultScannerConfig = &ScannerConfig{ + AssignmentOperator: '=', + FlagPrefix: '-', + MultiValueDelim: ',', + } +) + +type Scanner struct { + r *bufio.Reader + cfg *ScannerConfig +} + +type ScannerConfig struct { + AssignmentOperator rune + FlagPrefix rune + MultiValueDelim rune + + Commands []string +} + +func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner { + if cfg == nil { + cfg = DefaultScannerConfig + } + + return &Scanner{ + r: bufio.NewReader(r), + cfg: cfg, + } +} + +func (s *Scanner) Scan() (Token, string) { + ch := s.read() + + if s.isBlankspace(ch) { + s.unread() + return s.scanBlankspace() + } + + if s.isAssignmentOperator(ch) { + return ASSIGN, string(ch) + } + + if s.isMultiValueDelim(ch) { + return MULTI_VALUE_DELIMITER, string(ch) + } + + if ch == eol { + return EOL, "" + } + + if ch == nul { + return ARG_DELIMITER, string(ch) + } + + if unicode.IsGraphic(ch) { + s.unread() + return s.scanArg() + } + + return ILLEGAL, string(ch) +} + +func (s *Scanner) read() rune { + ch, _, err := s.r.ReadRune() + if errors.Is(err, io.EOF) { + return eol + } else if err != nil { + log.Printf("unknown scanner error=%+v", err) + return eol + } + + return ch +} + +func (s *Scanner) unread() { + _ = s.r.UnreadRune() +} + +func (s *Scanner) isBlankspace(ch rune) bool { + return ch == ' ' || ch == '\t' || ch == '\n' +} + +func (s *Scanner) isUnderscore(ch rune) bool { + return ch == '_' +} + +func (s *Scanner) isFlagPrefix(ch rune) bool { + return ch == s.cfg.FlagPrefix +} + +func (s *Scanner) isMultiValueDelim(ch rune) bool { + return ch == s.cfg.MultiValueDelim +} + +func (s *Scanner) isAssignmentOperator(ch rune) bool { + return ch == s.cfg.AssignmentOperator +} + +func (s *Scanner) scanBlankspace() (Token, string) { + buf := &bytes.Buffer{} + buf.WriteRune(s.read()) + + for { + if ch := s.read(); ch == eol { + break + } else if !s.isBlankspace(ch) { + s.unread() + break + } else { + _, _ = buf.WriteRune(ch) + } + } + + return BS, buf.String() +} + +func (s *Scanner) scanArg() (Token, string) { + buf := &bytes.Buffer{} + buf.WriteRune(s.read()) + + for { + ch := s.read() + + if ch == eol || ch == nul || s.isAssignmentOperator(ch) || s.isMultiValueDelim(ch) { + s.unread() + break + } + + _, _ = buf.WriteRune(ch) + } + + str := buf.String() + + if len(str) == 0 { + return EMPTY, str + } + + ch0 := rune(str[0]) + + if len(str) == 1 { + if s.isFlagPrefix(ch0) { + return STDIN_FLAG, str + } + + return IDENT, str + } + + ch1 := rune(str[1]) + + if len(str) == 2 { + if str == string(s.cfg.FlagPrefix)+string(s.cfg.FlagPrefix) { + return STOP_FLAG, str + } + + if s.isFlagPrefix(ch0) { + return SHORT_FLAG, str + } + } + + if s.isFlagPrefix(ch0) { + if s.isFlagPrefix(ch1) { + return LONG_FLAG, str + } + + return COMPOUND_SHORT_FLAG, str + } + + return IDENT, str +} diff --git a/token.go b/token.go new file mode 100644 index 0000000..ec3f758 --- /dev/null +++ b/token.go @@ -0,0 +1,22 @@ +//go:generate stringer -type Token + +package argh + +const ( + ILLEGAL Token = iota + EOL + EMPTY + BS + IDENT + ARG_DELIMITER + COMMAND + ASSIGN + MULTI_VALUE_DELIMITER + LONG_FLAG + SHORT_FLAG + COMPOUND_SHORT_FLAG + STDIN_FLAG + STOP_FLAG +) + +type Token int diff --git a/token_string.go b/token_string.go new file mode 100644 index 0000000..8c1b585 --- /dev/null +++ b/token_string.go @@ -0,0 +1,36 @@ +// Code generated by "stringer -type Token"; DO NOT EDIT. + +package argh + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ILLEGAL-0] + _ = x[EOL-1] + _ = x[EMPTY-2] + _ = x[BS-3] + _ = x[IDENT-4] + _ = x[ARG_DELIMITER-5] + _ = x[COMMAND-6] + _ = x[ASSIGN-7] + _ = x[MULTI_VALUE_DELIMITER-8] + _ = x[LONG_FLAG-9] + _ = x[SHORT_FLAG-10] + _ = x[COMPOUND_SHORT_FLAG-11] + _ = x[STDIN_FLAG-12] + _ = x[STOP_FLAG-13] +} + +const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERCOMMANDASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG" + +var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 42, 48, 69, 78, 88, 107, 117, 126} + +func (i Token) String() string { + if i < 0 || i >= Token(len(_Token_index)-1) { + return "Token(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Token_name[_Token_index[i]:_Token_index[i+1]] +}