Fix Unquoted String scanning
This commit is contained in:
@ -1,396 +0,0 @@
|
||||
package nbt
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
type token int
|
||||
|
||||
const (
|
||||
ILLEGAL token = iota
|
||||
|
||||
IDENT // name
|
||||
|
||||
INT // 12345
|
||||
FLT // 12345.67
|
||||
|
||||
BYTE // b or B
|
||||
SHORT // s or S
|
||||
LONG // l or L
|
||||
FLOAT // f or F
|
||||
DOUBLE // d or D
|
||||
|
||||
STRING // "abc" 'def'
|
||||
|
||||
LPAREN // (
|
||||
LBRACK // [
|
||||
LBRACE // {
|
||||
COMMA // ,
|
||||
PERIOD // .
|
||||
|
||||
RPAREN // )
|
||||
RBRACK // ]
|
||||
RBRACE // }
|
||||
SEMICOLON // ;
|
||||
COLON // :
|
||||
)
|
||||
|
||||
const (
|
||||
scanContinue = iota // uninteresting byte
|
||||
scanBeginCompound // begin TAG_Compound (after left-brace )
|
||||
scanBeginList // begin TAG_List (after left-brack)
|
||||
scanListValue // just finished read list value
|
||||
scanListType // just finished read list type (after "B;" or "L;")
|
||||
scanCompoundTagName // just finished read tag name (before colon)
|
||||
scanCompoundValue // just finished read value (before comma or right-brace )
|
||||
scanSkipSpace // space byte; can skip; known to be last "continue" result
|
||||
scanEndValue
|
||||
|
||||
scanEnd
|
||||
scanError
|
||||
)
|
||||
|
||||
// These values are stored in the parseState stack.
|
||||
// They give the current state of a composite value
|
||||
// being scanned. If the parser is inside a nested value
|
||||
// the parseState describes the nested state, outermost at entry 0.
|
||||
const (
|
||||
parseCompoundName = iota // parsing tag name (before colon)
|
||||
parseCompoundValue // parsing value (after colon)
|
||||
parseListValue // parsing list
|
||||
)
|
||||
|
||||
const maxNestingDepth = 10000
|
||||
|
||||
type scanner struct {
|
||||
step func(c byte) int
|
||||
parseState []int
|
||||
err error
|
||||
}
|
||||
|
||||
// reset prepares the scanner for use.
|
||||
// It must be called before calling s.step.
|
||||
func (s *scanner) reset() {
|
||||
s.step = s.stateBeginValue
|
||||
s.parseState = s.parseState[0:0]
|
||||
}
|
||||
|
||||
// pushParseState pushes a new parse state p onto the parse stack.
|
||||
// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.
|
||||
func (s *scanner) pushParseState(c byte, newParseState int, successState int) int {
|
||||
s.parseState = append(s.parseState, newParseState)
|
||||
if len(s.parseState) <= maxNestingDepth {
|
||||
return successState
|
||||
}
|
||||
return scanError
|
||||
}
|
||||
|
||||
// popParseState pops a parse state (already obtained) off the stack
|
||||
// and updates s.step accordingly.
|
||||
func (s *scanner) popParseState() {
|
||||
n := len(s.parseState) - 1
|
||||
s.parseState = s.parseState[:n]
|
||||
if n == 0 {
|
||||
s.step = s.stateEndTop
|
||||
//s.endTop = true
|
||||
} else {
|
||||
s.step = s.stateEndValue
|
||||
}
|
||||
}
|
||||
|
||||
// stateEndTop is the state after finishing the top-level value,
|
||||
// such as after reading `{}` or `[1,2,3]`.
|
||||
// Only space characters should be seen now.
|
||||
func (s *scanner) stateEndTop(c byte) int {
|
||||
if !isSpace(c) {
|
||||
// Complain about non-space byte on next call.
|
||||
s.error(c, "after top-level value")
|
||||
}
|
||||
return scanEnd
|
||||
}
|
||||
|
||||
func (s *scanner) stateBeginValue(c byte) int {
|
||||
if isSpace(c) {
|
||||
s.step = s.stateBeginValue
|
||||
return scanSkipSpace
|
||||
}
|
||||
switch c {
|
||||
case '{': // beginning of TAG_Compound
|
||||
s.step = s.stateCompoundOrEmpty
|
||||
return s.pushParseState(c, parseCompoundName, scanBeginCompound)
|
||||
case '[': // beginning of TAG_List
|
||||
s.step = s.stateListOrArray
|
||||
return s.pushParseState(c, parseListValue, scanBeginList)
|
||||
case '"', '\'': // beginning of TAG_String
|
||||
return s.stateBeginString(c)
|
||||
case '-': // beginning of negative number
|
||||
s.step = s.stateNeg
|
||||
return scanContinue
|
||||
default:
|
||||
if isNumber(c) {
|
||||
return s.stateNum1(c)
|
||||
}
|
||||
if isNumOrLetter(c) {
|
||||
return s.stateBeginString(c)
|
||||
}
|
||||
}
|
||||
return s.error(c, "looking for beginning of value")
|
||||
}
|
||||
|
||||
func (s *scanner) stateCompoundOrEmpty(c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
if c == '}' {
|
||||
n := len(s.parseState)
|
||||
s.parseState[n-1] = parseCompoundValue
|
||||
return s.stateEndValue(c)
|
||||
}
|
||||
return s.stateBeginString(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateBeginString(c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
switch c {
|
||||
case '\'':
|
||||
s.step = s.stateInSqString
|
||||
return scanContinue
|
||||
case '"':
|
||||
s.step = s.stateInDqString
|
||||
return scanContinue
|
||||
default:
|
||||
if isNumOrLetter(c) {
|
||||
s.step = s.stateInPureString
|
||||
return scanContinue
|
||||
}
|
||||
}
|
||||
return s.error(c, "looking for beginning of string")
|
||||
}
|
||||
|
||||
func (s *scanner) stateInSqString(c byte) int {
|
||||
if c == '\\' {
|
||||
s.step = s.stateInSqStringEsc
|
||||
return scanContinue
|
||||
}
|
||||
if c == '\'' {
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
if isNumOrLetter(c) {
|
||||
return scanContinue
|
||||
}
|
||||
return s.stateEndValue(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateInSqStringEsc(c byte) int {
|
||||
switch c {
|
||||
case 'b', 'f', 'n', 'r', 't', '\\', '/', '\'':
|
||||
s.step = s.stateInSqString
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in string escape code")
|
||||
}
|
||||
|
||||
func (s *scanner) stateInDqString(c byte) int {
|
||||
if c == '\\' {
|
||||
s.step = s.stateInDqStringEsc
|
||||
return scanContinue
|
||||
}
|
||||
if c == '"' {
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
if isNumOrLetter(c) {
|
||||
return scanContinue
|
||||
}
|
||||
return s.stateEndValue(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateInDqStringEsc(c byte) int {
|
||||
switch c {
|
||||
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
|
||||
s.step = s.stateInDqString
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "in string escape code")
|
||||
}
|
||||
|
||||
func (s *scanner) stateInPureString(c byte) int {
|
||||
if isNumOrLetter(c) {
|
||||
return scanContinue
|
||||
}
|
||||
return s.stateEndValue(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateListOrArray(c byte) int {
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
switch c {
|
||||
case 'B', 'I', 'L':
|
||||
s.step = s.stateListOrArrayT
|
||||
return scanContinue
|
||||
case ']':
|
||||
return s.stateEndValue(c)
|
||||
default:
|
||||
return s.stateBeginValue(c)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) stateListOrArrayT(c byte) int {
|
||||
if c == ';' {
|
||||
s.step = s.stateBeginValue
|
||||
return scanListType
|
||||
}
|
||||
return s.stateInPureString(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateNeg(c byte) int {
|
||||
if !isNumber(c) {
|
||||
s.error(c, "not a number after '-'")
|
||||
}
|
||||
s.step = s.stateNum1
|
||||
return scanContinue
|
||||
}
|
||||
|
||||
func (s *scanner) stateNum1(c byte) int {
|
||||
if isNumber(c) {
|
||||
s.step = s.stateNum1
|
||||
return scanContinue
|
||||
}
|
||||
if c == '.' {
|
||||
s.step = s.stateNumDot
|
||||
return scanContinue
|
||||
}
|
||||
return s.stateEndNumValue(c)
|
||||
}
|
||||
|
||||
// stateDot is the state after reading the integer and decimal point in a number,
|
||||
// such as after reading `1.`.
|
||||
func (s *scanner) stateNumDot(c byte) int {
|
||||
if isNumber(c) {
|
||||
s.step = s.stateNumDot0
|
||||
return scanContinue
|
||||
}
|
||||
return s.error(c, "after decimal point in numeric literal")
|
||||
}
|
||||
|
||||
// stateNumDot0 is the state after reading the integer, decimal point, and subsequent
|
||||
// digits of a number, such as after reading `3.14`.
|
||||
func (s *scanner) stateNumDot0(c byte) int {
|
||||
if isNumber(c) {
|
||||
s.step = s.stateNumDot0
|
||||
return scanContinue
|
||||
}
|
||||
return s.stateEndNumDotValue(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateEndNumValue(c byte) int {
|
||||
if isSpace(c) {
|
||||
s.step = s.stateEndValue
|
||||
return scanSkipSpace
|
||||
}
|
||||
switch c {
|
||||
case 'b', 'B': // TAG_Byte
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
case 's', 'S': // TAG_Short
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
case 'l', 'L': // TAG_Long
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
case 'f', 'F', 'd', 'D':
|
||||
return s.stateEndNumDotValue(c)
|
||||
}
|
||||
return s.stateEndValue(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateEndNumDotValue(c byte) int {
|
||||
switch c {
|
||||
case 'f', 'F': // TAG_Float
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
case 'd', 'D': // TAG_Double
|
||||
s.step = s.stateEndValue
|
||||
return scanContinue
|
||||
}
|
||||
return s.stateEndValue(c)
|
||||
}
|
||||
|
||||
func (s *scanner) stateEndValue(c byte) int {
|
||||
n := len(s.parseState)
|
||||
if n == 0 {
|
||||
// Completed top-level before the current byte.
|
||||
s.step = s.stateEndTop
|
||||
//s.endTop = true
|
||||
return s.stateEndTop(c)
|
||||
}
|
||||
if isSpace(c) {
|
||||
return scanSkipSpace
|
||||
}
|
||||
|
||||
ps := s.parseState[n-1]
|
||||
switch ps {
|
||||
case parseCompoundName:
|
||||
if c == ':' {
|
||||
s.parseState[n-1] = parseCompoundValue
|
||||
s.step = s.stateBeginValue
|
||||
return scanCompoundTagName
|
||||
}
|
||||
return s.error(c, "after compound tag name")
|
||||
case parseCompoundValue:
|
||||
switch c {
|
||||
case ',':
|
||||
s.parseState[n-1] = parseCompoundName
|
||||
s.step = s.stateBeginString
|
||||
return scanCompoundValue
|
||||
case '}':
|
||||
s.popParseState()
|
||||
return scanEndValue
|
||||
}
|
||||
return s.error(c, "after compound value")
|
||||
case parseListValue:
|
||||
switch c {
|
||||
case ',':
|
||||
s.step = s.stateBeginValue
|
||||
return scanListValue
|
||||
case ']':
|
||||
s.popParseState()
|
||||
return scanEndValue
|
||||
}
|
||||
return s.error(c, "after list element")
|
||||
}
|
||||
return s.error(c, "")
|
||||
}
|
||||
func (s *scanner) error(c byte, context string) int {
|
||||
s.step = s.stateError
|
||||
s.err = errors.New(context)
|
||||
return scanError
|
||||
}
|
||||
|
||||
// stateError is the state after reaching a syntax error,
|
||||
// such as after reading `[1}` or `5.1.2`.
|
||||
func (s *scanner) stateError(c byte) int {
|
||||
return scanError
|
||||
}
|
||||
|
||||
func isSpace(c byte) bool {
|
||||
return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n')
|
||||
}
|
||||
|
||||
func isNumber(c byte) bool {
|
||||
if c >= '0' && c <= '9' {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isNumOrLetter(c byte) bool {
|
||||
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || isNumber(c) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
Reference in New Issue
Block a user