Making a mess with command line parsing

This commit is contained in:
Dan Buch 2022-05-11 22:11:05 -04:00
commit 451be7bf31
9 changed files with 376 additions and 0 deletions

5
argh.go Normal file
View File

@ -0,0 +1,5 @@
package argh
type Argh struct {
AST *AST
}

10
ast.go Normal file
View File

@ -0,0 +1,10 @@
package argh
type AST struct {
Nodes []*Node `json:"nodes"`
}
type Node struct {
Token string `json:"token"`
Literal string `json:"literal"`
}

24
cmd/argh/main.go Normal file
View File

@ -0,0 +1,24 @@
package main
import (
"encoding/json"
"fmt"
"log"
"os"
"git.meatballhat.com/x/box-o-sand/argh"
)
func main() {
ast, err := argh.ParseArgs(os.Args)
if err != nil {
log.Fatal(err)
}
b, err := json.MarshalIndent(ast, "", " ")
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
}

5
go.mod Normal file
View File

@ -0,0 +1,5 @@
module git.meatballhat.com/x/box-o-sand/argh
go 1.18
require github.com/pkg/errors v0.9.1

2
go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=

82
parser.go Normal file
View File

@ -0,0 +1,82 @@
package argh
import (
"io"
"strings"
"github.com/pkg/errors"
)
// NOTE: much of this is lifted from
// https://blog.gopheracademy.com/advent-2014/parsers-lexers/
var (
errSyntax = errors.New("syntax error")
)
func ParseArgs(args []string) (*Argh, error) {
reEncoded := strings.Join(args, string(nul))
return NewParser(
strings.NewReader(reEncoded),
nil,
).Parse()
}
type Parser struct {
s *Scanner
buf ParserBuffer
}
type ParserBuffer struct {
tok Token
lit string
n int
}
func NewParser(r io.Reader, cfg *ScannerConfig) *Parser {
return &Parser{s: NewScanner(r, cfg)}
}
func (p *Parser) Parse() (*Argh, error) {
arghOut := &Argh{
AST: &AST{
Nodes: []*Node{},
},
}
for {
tok, lit := p.scan()
if tok == ILLEGAL {
return nil, errors.Wrapf(errSyntax, "illegal value %q", lit)
}
if tok == EOL {
break
}
arghOut.AST.Nodes = append(
arghOut.AST.Nodes,
&Node{Token: tok.String(), Literal: lit},
)
}
return arghOut, nil
}
func (p *Parser) scan() (Token, string) {
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit
}
tok, lit := p.s.Scan()
p.buf.tok, p.buf.lit = tok, lit
return tok, lit
}
func (p *Parser) unscan() {
p.buf.n = 1
}

190
scanner.go Normal file
View File

@ -0,0 +1,190 @@
package argh
// NOTE: much of this is lifted from
// https://blog.gopheracademy.com/advent-2014/parsers-lexers/
import (
"bufio"
"bytes"
"errors"
"io"
"log"
"unicode"
)
const (
nul = rune(0)
eol = rune(-1)
)
var (
DefaultScannerConfig = &ScannerConfig{
AssignmentOperator: '=',
FlagPrefix: '-',
MultiValueDelim: ',',
}
)
type Scanner struct {
r *bufio.Reader
cfg *ScannerConfig
}
type ScannerConfig struct {
AssignmentOperator rune
FlagPrefix rune
MultiValueDelim rune
Commands []string
}
func NewScanner(r io.Reader, cfg *ScannerConfig) *Scanner {
if cfg == nil {
cfg = DefaultScannerConfig
}
return &Scanner{
r: bufio.NewReader(r),
cfg: cfg,
}
}
func (s *Scanner) Scan() (Token, string) {
ch := s.read()
if s.isBlankspace(ch) {
s.unread()
return s.scanBlankspace()
}
if s.isAssignmentOperator(ch) {
return ASSIGN, string(ch)
}
if s.isMultiValueDelim(ch) {
return MULTI_VALUE_DELIMITER, string(ch)
}
if ch == eol {
return EOL, ""
}
if ch == nul {
return ARG_DELIMITER, string(ch)
}
if unicode.IsGraphic(ch) {
s.unread()
return s.scanArg()
}
return ILLEGAL, string(ch)
}
func (s *Scanner) read() rune {
ch, _, err := s.r.ReadRune()
if errors.Is(err, io.EOF) {
return eol
} else if err != nil {
log.Printf("unknown scanner error=%+v", err)
return eol
}
return ch
}
func (s *Scanner) unread() {
_ = s.r.UnreadRune()
}
func (s *Scanner) isBlankspace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n'
}
func (s *Scanner) isUnderscore(ch rune) bool {
return ch == '_'
}
func (s *Scanner) isFlagPrefix(ch rune) bool {
return ch == s.cfg.FlagPrefix
}
func (s *Scanner) isMultiValueDelim(ch rune) bool {
return ch == s.cfg.MultiValueDelim
}
func (s *Scanner) isAssignmentOperator(ch rune) bool {
return ch == s.cfg.AssignmentOperator
}
func (s *Scanner) scanBlankspace() (Token, string) {
buf := &bytes.Buffer{}
buf.WriteRune(s.read())
for {
if ch := s.read(); ch == eol {
break
} else if !s.isBlankspace(ch) {
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
return BS, buf.String()
}
func (s *Scanner) scanArg() (Token, string) {
buf := &bytes.Buffer{}
buf.WriteRune(s.read())
for {
ch := s.read()
if ch == eol || ch == nul || s.isAssignmentOperator(ch) || s.isMultiValueDelim(ch) {
s.unread()
break
}
_, _ = buf.WriteRune(ch)
}
str := buf.String()
if len(str) == 0 {
return EMPTY, str
}
ch0 := rune(str[0])
if len(str) == 1 {
if s.isFlagPrefix(ch0) {
return STDIN_FLAG, str
}
return IDENT, str
}
ch1 := rune(str[1])
if len(str) == 2 {
if str == string(s.cfg.FlagPrefix)+string(s.cfg.FlagPrefix) {
return STOP_FLAG, str
}
if s.isFlagPrefix(ch0) {
return SHORT_FLAG, str
}
}
if s.isFlagPrefix(ch0) {
if s.isFlagPrefix(ch1) {
return LONG_FLAG, str
}
return COMPOUND_SHORT_FLAG, str
}
return IDENT, str
}

22
token.go Normal file
View File

@ -0,0 +1,22 @@
//go:generate stringer -type Token
package argh
const (
ILLEGAL Token = iota
EOL
EMPTY
BS
IDENT
ARG_DELIMITER
COMMAND
ASSIGN
MULTI_VALUE_DELIMITER
LONG_FLAG
SHORT_FLAG
COMPOUND_SHORT_FLAG
STDIN_FLAG
STOP_FLAG
)
type Token int

36
token_string.go Normal file
View File

@ -0,0 +1,36 @@
// Code generated by "stringer -type Token"; DO NOT EDIT.
package argh
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[ILLEGAL-0]
_ = x[EOL-1]
_ = x[EMPTY-2]
_ = x[BS-3]
_ = x[IDENT-4]
_ = x[ARG_DELIMITER-5]
_ = x[COMMAND-6]
_ = x[ASSIGN-7]
_ = x[MULTI_VALUE_DELIMITER-8]
_ = x[LONG_FLAG-9]
_ = x[SHORT_FLAG-10]
_ = x[COMPOUND_SHORT_FLAG-11]
_ = x[STDIN_FLAG-12]
_ = x[STOP_FLAG-13]
}
const _Token_name = "ILLEGALEOLEMPTYBSIDENTARG_DELIMITERCOMMANDASSIGNMULTI_VALUE_DELIMITERLONG_FLAGSHORT_FLAGCOMPOUND_SHORT_FLAGSTDIN_FLAGSTOP_FLAG"
var _Token_index = [...]uint8{0, 7, 10, 15, 17, 22, 35, 42, 48, 69, 78, 88, 107, 117, 126}
func (i Token) String() string {
if i < 0 || i >= Token(len(_Token_index)-1) {
return "Token(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Token_name[_Token_index[i]:_Token_index[i+1]]
}