Fix Unquoted String scanning

2021-05-25 16:13:55 +08:00
parent 5320722ed0
commit 69601cec28
3 changed files with 87 additions and 87 deletions
--- a/nbt/bigTest_test.snbt
+++ b/nbt/bigTest_test.snbt
--- a/nbt/snbt_scanner.go
+++ b/nbt/snbt_scanner.go
@ -4,37 +4,6 @@ import (
 	"errors"
 )

-type token int
-
-const (
-	ILLEGAL token = iota
-
-	IDENT // name
-
-	INT // 12345
-	FLT // 12345.67
-
-	BYTE   // b or B
-	SHORT  // s or S
-	LONG   // l or L
-	FLOAT  // f or F
-	DOUBLE // d or D
-
-	STRING // "abc" 'def'
-
-	LPAREN // (
-	LBRACK // [
-	LBRACE // {
-	COMMA  // ,
-	PERIOD // .
-
-	RPAREN    // )
-	RBRACK    // ]
-	RBRACE    // }
-	SEMICOLON // ;
-	COLON     // :
-)
-
 const (
 	scanContinue        = iota // uninteresting byte
 	scanBeginCompound          // begin TAG_Compound (after left-brace )
@ -130,7 +99,7 @@ func (s *scanner) stateBeginValue(c byte) int {
 		if isNumber(c) {
 			return s.stateNum1(c)
 		}
-		if isNumOrLetter(c) {
+		if isAllowedInUnquotedString(c) {
 			return s.stateBeginString(c)
 		}
 	}
@ -155,45 +124,42 @@ func (s *scanner) stateBeginString(c byte) int {
 	}
 	switch c {
 	case '\'':
-		s.step = s.stateInSqString
+		s.step = s.stateInSingleQuotedString
 		return scanContinue
 	case '"':
-		s.step = s.stateInDqString
+		s.step = s.stateInDoubleQuotedString
 		return scanContinue
 	default:
-		if isNumOrLetter(c) {
-			s.step = s.stateInPureString
+		if isAllowedInUnquotedString(c) {
+			s.step = s.stateInUnquotedString
 			return scanContinue
 		}
 	}
 	return s.error(c, "looking for beginning of string")
 }

-func (s *scanner) stateInSqString(c byte) int {
+func (s *scanner) stateInSingleQuotedString(c byte) int {
 	if c == '\\' {
-		s.step = s.stateInSqStringEsc
+		s.step = s.stateInSingleQuotedStringEsc
 		return scanContinue
 	}
 	if c == '\'' {
 		s.step = s.stateEndValue
 		return scanContinue
 	}
-	if isNumOrLetter(c) {
 	return scanContinue
 }
-	return s.stateEndValue(c)
-}

-func (s *scanner) stateInSqStringEsc(c byte) int {
+func (s *scanner) stateInSingleQuotedStringEsc(c byte) int {
 	switch c {
 	case 'b', 'f', 'n', 'r', 't', '\\', '/', '\'':
-		s.step = s.stateInSqString
+		s.step = s.stateInSingleQuotedString
 		return scanContinue
 	}
 	return s.error(c, "in string escape code")
 }

-func (s *scanner) stateInDqString(c byte) int {
+func (s *scanner) stateInDoubleQuotedString(c byte) int {
 	if c == '\\' {
 		s.step = s.stateInDqStringEsc
 		return scanContinue
@ -202,23 +168,20 @@ func (s *scanner) stateInDqString(c byte) int {
 		s.step = s.stateEndValue
 		return scanContinue
 	}
-	if isNumOrLetter(c) {
 	return scanContinue
 }
-	return s.stateEndValue(c)
-}

 func (s *scanner) stateInDqStringEsc(c byte) int {
 	switch c {
 	case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
-		s.step = s.stateInDqString
+		s.step = s.stateInDoubleQuotedString
 		return scanContinue
 	}
 	return s.error(c, "in string escape code")
 }

-func (s *scanner) stateInPureString(c byte) int {
-	if isNumOrLetter(c) {
+func (s *scanner) stateInUnquotedString(c byte) int {
+	if isAllowedInUnquotedString(c) {
 		return scanContinue
 	}
 	return s.stateEndValue(c)
@ -244,16 +207,20 @@ func (s *scanner) stateListOrArrayT(c byte) int {
 		s.step = s.stateBeginValue
 		return scanListType
 	}
-	return s.stateInPureString(c)
+	return s.stateInUnquotedString(c)
 }

 func (s *scanner) stateNeg(c byte) int {
-	if !isNumber(c) {
-		s.error(c, "not a number after '-'")
-	}
+	if isNumber(c) {
 		s.step = s.stateNum1
 		return scanContinue
 	}
+	if isAllowedInUnquotedString(c) {
+		s.step = s.stateInUnquotedString
+		return scanContinue
+	}
+	return s.error(c, "not a number after '-'")
+}

 func (s *scanner) stateNum1(c byte) int {
 	if isNumber(c) {
@ -264,6 +231,10 @@ func (s *scanner) stateNum1(c byte) int {
 		s.step = s.stateNumDot
 		return scanContinue
 	}
+	if isAllowedInUnquotedString(c) {
+		s.step = s.stateInUnquotedString
+		return scanContinue
+	}
 	return s.stateEndNumValue(c)
 }

@ -274,6 +245,10 @@ func (s *scanner) stateNumDot(c byte) int {
 		s.step = s.stateNumDot0
 		return scanContinue
 	}
+	if isAllowedInUnquotedString(c) {
+		s.step = s.stateInUnquotedString
+		return scanContinue
+	}
 	return s.error(c, "after decimal point in numeric literal")
 }

@ -284,6 +259,10 @@ func (s *scanner) stateNumDot0(c byte) int {
 		s.step = s.stateNumDot0
 		return scanContinue
 	}
+	if isAllowedInUnquotedString(c) {
+		s.step = s.stateInUnquotedString
+		return scanContinue
+	}
 	return s.stateEndNumDotValue(c)
 }

@ -382,15 +361,13 @@ func isSpace(c byte) bool {
 }

 func isNumber(c byte) bool {
-	if c >= '0' && c <= '9' {
-		return true
-	}
-	return false
+	return c >= '0' && c <= '9'
 }

-func isNumOrLetter(c byte) bool {
-	if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || isNumber(c) {
-		return true
-	}
-	return false
+func isAllowedInUnquotedString(c byte) bool {
+	return c == '_' || c == '-' ||
+		c == '.' || c == '+' ||
+		c >= '0' && c <= '9' ||
+		c >= 'A' && c <= 'Z' ||
+		c >= 'a' && c <= 'z'
 }
--- a/nbt/snbt_scanner_test.go
+++ b/nbt/snbt_scanner_test.go
@ -1,6 +1,9 @@
 package nbt

-import "testing"
+import (
+	_ "embed"
+	"testing"
+)

 func TestSNBT_number(t *testing.T) {
 	goods := []string{
@ -9,10 +12,6 @@ func TestSNBT_number(t *testing.T) {
 		"255B", "1234s", "6666L",
 		"314F", "3.14f", "3.14159265358979323846264D",
 	}
-	bads := []string{
-		".0", "1234.5678.90",
-		"25-5B", "1234.s",
-	}
 	var s scanner
 	scan := func(str string) bool {
 		s.reset()
@ -29,36 +28,29 @@ func TestSNBT_number(t *testing.T) {
 			t.Errorf("scan valid data %q error: %v", str, s.err)
 		}
 	}
-	for _, str := range bads {
-		if scan(str) {
-			t.Errorf("scan invalid data %q success", str)
-		}
-	}
 }

+//go:embed bigTest_test.snbt
+var bigTest string
+
 func TestSNBT_compound(t *testing.T) {
 	goods := []string{
 		`{}`, `{name:3.14f}`, `{ "name" : 12345 }`,
 		`{ abc: { }}`, `{ "a b\"c": {}, def: 12345}`,
+		bigTest,
 	}
 	var s scanner
-	scan := func(str string) bool {
+	for _, str := range goods {
 		s.reset()
-		for _, c := range []byte(str) {
+		for i, c := range []byte(str) {
 			res := s.step(c)
 			if res == scanError {
-				return false
-			}
-		}
-		return true
-	}
-	for _, str := range goods {
-		if scan(str) == false {
-			t.Errorf("scan valid data %q error: %v", str, s.err)
+				t.Errorf("scan valid data %q error: %v at [%d]", str[:i], s.err, i)
+				break
+			}
 		}
 	}
 }
-
 func TestSNBT_list(t *testing.T) {
 	goods := []string{
 		`[]`, `[a, 'b', "c", d]`, // List of string