283 lines
7.1 KiB
Go
283 lines
7.1 KiB
Go
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package hujson
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
func lineColumn(b []byte, n int) (line, column int) {
|
|
line = 1 + bytes.Count(b[:n], []byte("\n"))
|
|
column = 1 + n - (bytes.LastIndexByte(b[:n], '\n') + len("\n"))
|
|
return line, column
|
|
}
|
|
|
|
// Parse parses a HuJSON value as a Value.
|
|
// Extra and Literal values in v will alias the provided input buffer.
|
|
func Parse(b []byte) (Value, error) {
|
|
v, n, err := parseNext(0, b)
|
|
if err == nil && n < len(b) {
|
|
err = newInvalidCharacterError(b[n:], "after top-level value")
|
|
}
|
|
if err != nil {
|
|
line, column := lineColumn(b, n)
|
|
err = fmt.Errorf("hujson: line %d, column %d: %w", line, column, err)
|
|
return v, err
|
|
}
|
|
return v, nil
|
|
}
|
|
|
|
// parseNext parses the next value with surrounding whitespace and comments.
|
|
func parseNext(n int, b []byte) (v Value, _ int, err error) {
|
|
n0 := n
|
|
|
|
// Consume leading whitespace and comments.
|
|
if n, err = consumeExtra(n, b); err != nil {
|
|
return v, n, err
|
|
}
|
|
if n > n0 {
|
|
v.BeforeExtra = b[n0:n:n]
|
|
}
|
|
|
|
// Parse the next value.
|
|
v.StartOffset = n
|
|
if v.Value, n, err = parseNextTrimmed(n, b); err != nil {
|
|
return v, n, err
|
|
}
|
|
v.EndOffset = n
|
|
|
|
// Consume trailing whitespace and comments.
|
|
if n, err = consumeExtra(n, b); err != nil {
|
|
return v, n, err
|
|
}
|
|
if n > v.EndOffset {
|
|
v.AfterExtra = b[v.EndOffset:n:n]
|
|
}
|
|
|
|
return v, n, nil
|
|
}
|
|
|
|
var (
|
|
errInvalidObjectEnd = errors.New("invalid character '}' at start of value")
|
|
errInvalidArrayEnd = errors.New("invalid character ']' at start of value")
|
|
)
|
|
|
|
// parseNextTrimmed parses the next value without surrounding whitespace and comments.
|
|
func parseNextTrimmed(n int, b []byte) (ValueTrimmed, int, error) {
|
|
if len(b) == n {
|
|
return nil, n, fmt.Errorf("parsing value: %w", io.ErrUnexpectedEOF)
|
|
}
|
|
switch b[n] {
|
|
// Parse objects.
|
|
case '{':
|
|
n++
|
|
var obj Object
|
|
for {
|
|
var vk, vv Value
|
|
var err error
|
|
|
|
// Parse the name.
|
|
if vk, n, err = parseNext(n, b); err != nil {
|
|
if err == errInvalidObjectEnd && vk.Value == nil {
|
|
setTrailingComma(&obj, len(obj.Members) > 0)
|
|
obj.AfterExtra = vk.BeforeExtra
|
|
return &obj, n + len(`}`), nil
|
|
}
|
|
return &obj, n, err
|
|
}
|
|
if vk.Value.Kind() != '"' {
|
|
return &obj, vk.StartOffset, newInvalidCharacterError(b[vk.StartOffset:], "at start of object name")
|
|
}
|
|
|
|
// Parse the colon.
|
|
switch {
|
|
case len(b) == n:
|
|
return &obj, n, fmt.Errorf("parsing object after name: %w", io.ErrUnexpectedEOF)
|
|
case b[n] != ':':
|
|
return &obj, n, newInvalidCharacterError(b[n:], "after object name")
|
|
}
|
|
n++
|
|
|
|
// Parse the value.
|
|
if vv, n, err = parseNext(n, b); err != nil {
|
|
return &obj, n, err
|
|
}
|
|
|
|
obj.Members = append(obj.Members, ObjectMember{vk, vv})
|
|
switch {
|
|
case len(b) == n:
|
|
return &obj, n, fmt.Errorf("parsing object after value: %w", io.ErrUnexpectedEOF)
|
|
case b[n] == ',':
|
|
n++
|
|
case b[n] == '}':
|
|
// Move AfterExtra from last value to AfterExtra of the object.
|
|
obj.AfterExtra = obj.Members[len(obj.Members)-1].Value.AfterExtra
|
|
obj.Members[len(obj.Members)-1].Value.AfterExtra = nil
|
|
return &obj, n + len(`}`), nil
|
|
default:
|
|
return &obj, n, newInvalidCharacterError(b[n:], "after object value (expecting ',' or '}')")
|
|
}
|
|
}
|
|
case '}':
|
|
return nil, n, errInvalidObjectEnd
|
|
|
|
// Parse arrays.
|
|
case '[':
|
|
n++
|
|
var arr Array
|
|
for {
|
|
var v Value
|
|
var err error
|
|
if v, n, err = parseNext(n, b); err != nil {
|
|
if err == errInvalidArrayEnd && v.Value == nil {
|
|
setTrailingComma(&arr, len(arr.Elements) > 0)
|
|
arr.AfterExtra = v.BeforeExtra
|
|
return &arr, n + len(`]`), nil
|
|
}
|
|
return &arr, n, err
|
|
}
|
|
arr.Elements = append(arr.Elements, v)
|
|
switch {
|
|
case len(b) == n:
|
|
return &arr, n, fmt.Errorf("parsing array after value: %w", io.ErrUnexpectedEOF)
|
|
case b[n] == ',':
|
|
n++
|
|
case b[n] == ']':
|
|
// Move AfterExtra from last value to AfterExtra of the array.
|
|
arr.AfterExtra = arr.Elements[len(arr.Elements)-1].AfterExtra
|
|
arr.Elements[len(arr.Elements)-1].AfterExtra = nil
|
|
return &arr, n + len(`]`), nil
|
|
default:
|
|
return &arr, n, newInvalidCharacterError(b[n:], "after array value (expecting ',' or ']')")
|
|
}
|
|
}
|
|
case ']':
|
|
return nil, n, errInvalidArrayEnd
|
|
|
|
// Parse strings.
|
|
case '"':
|
|
n0 := n
|
|
n++
|
|
var inEscape bool
|
|
for {
|
|
switch {
|
|
case len(b) == n:
|
|
return nil, n, fmt.Errorf("parsing string: %w", io.ErrUnexpectedEOF)
|
|
case inEscape:
|
|
inEscape = false
|
|
case b[n] == '\\':
|
|
inEscape = true
|
|
case b[n] == '"':
|
|
n++
|
|
lit := Literal(b[n0:n:n])
|
|
if !lit.IsValid() {
|
|
return nil, n0, fmt.Errorf("invalid literal: %s", lit)
|
|
}
|
|
return lit, n, nil
|
|
}
|
|
n++
|
|
}
|
|
|
|
// Parse null, booleans, and numbers.
|
|
default:
|
|
n0 := n
|
|
for len(b) > n && (b[n] == '-' || b[n] == '+' || b[n] == '.' ||
|
|
('a' <= b[n] && b[n] <= 'z') ||
|
|
('A' <= b[n] && b[n] <= 'Z') ||
|
|
('0' <= b[n] && b[n] <= '9')) {
|
|
n++
|
|
}
|
|
switch lit := Literal(b[n0:n:n]); {
|
|
case len(lit) == 0:
|
|
return nil, n0, newInvalidCharacterError(b[n0:], "at start of value")
|
|
case !lit.IsValid():
|
|
return nil, n0, fmt.Errorf("invalid literal: %s", lit)
|
|
default:
|
|
return lit, n, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
var (
|
|
lineCommentStart = []byte("//")
|
|
lineCommentEnd = []byte("\n")
|
|
blockCommentStart = []byte("/*")
|
|
blockCommentEnd = []byte("*/")
|
|
)
|
|
|
|
// consumeExtra consumes leading whitespace and comments.
|
|
func consumeExtra(n int, b []byte) (int, error) {
|
|
for len(b) > n {
|
|
switch b[n] {
|
|
// Skip past whitespace.
|
|
case ' ', '\t', '\r', '\n':
|
|
n += consumeWhitespace(b[n:])
|
|
// Skip past comments.
|
|
case '/':
|
|
switch nc := consumeComment(b[n:]); {
|
|
case nc == 0:
|
|
return n, nil
|
|
case nc < 0:
|
|
return n, fmt.Errorf("parsing comment: %w", io.ErrUnexpectedEOF)
|
|
case !utf8.Valid(b[n : n+nc]):
|
|
return n, fmt.Errorf("invalid UTF-8 in comment")
|
|
default:
|
|
n += nc
|
|
}
|
|
default:
|
|
return n, nil
|
|
}
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
func consumeWhitespace(b []byte) (n int) {
|
|
for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
|
|
n++
|
|
}
|
|
return n
|
|
}
|
|
|
|
// consumeComment consumes a line or block comment start in b.
|
|
// It returns the length of the comment if valid, otherwise
|
|
// it returns 0 if it is not a comment and -1 if it is invalid.
|
|
func consumeComment(b []byte) (n int) {
|
|
var start, end []byte
|
|
switch {
|
|
case bytes.HasPrefix(b, lineCommentStart):
|
|
start, end = lineCommentStart, lineCommentEnd
|
|
case bytes.HasPrefix(b, blockCommentStart):
|
|
start, end = blockCommentStart, blockCommentEnd
|
|
default:
|
|
return 0
|
|
}
|
|
i := bytes.Index(b[len(start):], end)
|
|
if i < 0 {
|
|
return -1
|
|
}
|
|
return len(start) + i + len(end)
|
|
}
|
|
|
|
func newInvalidCharacterError(prefix []byte, where string) error {
|
|
var what string
|
|
r, n := utf8.DecodeRune(prefix)
|
|
switch {
|
|
case r == utf8.RuneError && n == 1:
|
|
what = fmt.Sprintf(`'\x%02x'`, prefix[0])
|
|
case unicode.IsPrint(r):
|
|
what = fmt.Sprintf(`%q`, r)
|
|
case r <= '\uffff':
|
|
what = fmt.Sprintf(`'\u%04x'`, r)
|
|
default:
|
|
what = fmt.Sprintf(`'\U%08x'`, r)
|
|
}
|
|
return errors.New("invalid character " + what + " " + where)
|
|
}
|