603 lines
18 KiB
Go
603 lines
18 KiB
Go
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package hujson
|
|
|
|
import (
|
|
"bytes"
|
|
"unicode"
|
|
)
|
|
|
|
// Standardize strips any features specific to HuJSON from b,
|
|
// making it compliant with standard JSON per RFC 8259.
|
|
// All comments and trailing commas are replaced with a space character
|
|
// in order to preserve the original line numbers and byte offsets.
|
|
// If an error is encountered, then b is returned as is along with the error.
|
|
func Standardize(b []byte) ([]byte, error) {
|
|
ast, err := Parse(b)
|
|
if err != nil {
|
|
return b, err
|
|
}
|
|
ast.Standardize()
|
|
return ast.Pack(), nil
|
|
}
|
|
|
|
// Minimize removes all whitespace, comments, and trailing commas from b,
|
|
// making it compliant with standard JSON per RFC 8259.
|
|
// If an error is encountered, then b is returned as is along with the error.
|
|
func Minimize(b []byte) ([]byte, error) {
|
|
ast, err := Parse(b)
|
|
if err != nil {
|
|
return b, err
|
|
}
|
|
ast.Minimize()
|
|
return ast.Pack(), nil
|
|
}
|
|
|
|
// Format formats b according to some opinionated heuristics for
|
|
// how HuJSON should look. The exact output may change over time.
|
|
// It is the equivalent of `go fmt` but for HuJSON.
|
|
//
|
|
// If the input is standard JSON, then the output will remain standard.
|
|
// Format is idempotent such that formatting already formatted HuJSON
|
|
// results in no changes.
|
|
// If an error is encountered, then b is returned as is along with the error.
|
|
func Format(b []byte) ([]byte, error) {
|
|
ast, err := Parse(b)
|
|
if err != nil {
|
|
return b, err
|
|
}
|
|
ast.Format()
|
|
return ast.Pack(), nil
|
|
}
|
|
|
|
const punchCardWidth = 80
|
|
|
|
var (
|
|
newline = []byte("\n")
|
|
twoNewlines = []byte("\n\n")
|
|
endlineWindows = []byte("\r\n")
|
|
endlineMacOSX = []byte("\n\r")
|
|
carriageReturn = []byte("\r")
|
|
space = []byte(" ")
|
|
)
|
|
|
|
// Format formats the value according to some opinionated heuristics for
|
|
// how HuJSON should look. The exact output may change over time.
|
|
// It is the equivalent of `go fmt` but for HuJSON.
|
|
//
|
|
// If the input is standard JSON, then the output will remain standard.
|
|
// Format is idempotent such that formatting already formatted HuJSON
|
|
// results in no changes.
|
|
func (v *Value) Format() {
|
|
// Format leading extra.
|
|
v.BeforeExtra.format(0, formatOptions{})
|
|
v.BeforeExtra = v.BeforeExtra[consumeWhitespace(v.BeforeExtra):] // never has leading whitespace
|
|
// Format the value.
|
|
needExpand := make(map[composite]bool)
|
|
isStandard := v.IsStandard()
|
|
v.normalize()
|
|
v.expandComposites(needExpand)
|
|
v.formatWhitespace(0, needExpand, isStandard)
|
|
v.alignObjectValues()
|
|
// Format trailing extra.
|
|
v.AfterExtra.format(0, formatOptions{})
|
|
v.AfterExtra = append(bytes.TrimRightFunc(v.AfterExtra, unicode.IsSpace), '\n') // always has exactly one trailing newline
|
|
|
|
v.UpdateOffsets()
|
|
}
|
|
|
|
// normalize performs simple normalization changes. In particular, it:
|
|
// - normalizes strings,
|
|
// - normalizes empty objects and arrays as simply {} or [],
|
|
// - normalizes whitespace between names and colons,
|
|
// - normalizes whitespace between values and commas.
|
|
//
|
|
// It always returns true to be compatible with composite.rangeValues.
|
|
func (v *Value) normalize() bool {
|
|
switch v2 := v.Value.(type) {
|
|
case Literal:
|
|
// Normalize string if there are escape characters.
|
|
if v2.Kind() == '"' && bytes.IndexByte(v2, '\\') >= 0 {
|
|
v.Value = String(v2.String())
|
|
}
|
|
case composite:
|
|
// Cleanup for empty objects and arrays.
|
|
if v2.length() == 0 {
|
|
// If there is only whitespace, then remove the whitespace.
|
|
if !v2.afterExtra().hasComment() {
|
|
*v2.afterExtra() = nil
|
|
}
|
|
break
|
|
}
|
|
|
|
// If there is only whitespace between the name and colon,
|
|
// or between the value and comma, then remove the whitespace.
|
|
for v3 := range v2.allValues() {
|
|
if !v3.AfterExtra.hasComment() {
|
|
v3.AfterExtra = nil
|
|
}
|
|
}
|
|
|
|
// Normalize all sub-values.
|
|
for v3 := range v2.allValues() {
|
|
v3.normalize()
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// lineStats carries statistics about a sequence of lines.
|
|
type lineStats struct {
|
|
firstLength int
|
|
lastLength int
|
|
multiline bool // false implies firstLength == lastLength
|
|
}
|
|
|
|
// expandComposites populates needExpand with the set of composite values
|
|
// that need to be expanded (i.e., print each member/element on a new line).
|
|
// This method is pure and does not mutate the AST.
|
|
func (v *Value) expandComposites(needExpand map[composite]bool) (stats lineStats) {
|
|
switch v2 := v.Value.(type) {
|
|
case Literal:
|
|
stats = lineStats{len(v2), len(v2), false}
|
|
case composite:
|
|
// Every object or array is either fully inlined or fully expanded.
|
|
// This simplifies machine-modification of HuJSON so that the mutation
|
|
// can easily determine which mode it is currently in.
|
|
//
|
|
// If any whitespace after a '{', '[', or ',' or before a '}' or ']'
|
|
// contains a newline, then we always expand the object or array.
|
|
var expand bool
|
|
|
|
// Keep track of line lengths.
|
|
var lineLength int
|
|
var lineLengths []int
|
|
updateStats := func(s lineStats) {
|
|
lineLength += s.firstLength
|
|
if s.multiline {
|
|
lineLengths = append(lineLengths, lineLength)
|
|
lineLength = s.lastLength
|
|
}
|
|
}
|
|
|
|
// Iterate through all members/elements in an object/array.
|
|
switch v2 := v2.(type) {
|
|
case *Object:
|
|
lineLength += len("{")
|
|
for i := range v2.Members {
|
|
name := &v2.Members[i].Name
|
|
value := &v2.Members[i].Value
|
|
expand = expand || name.BeforeExtra.hasNewline()
|
|
updateStats(name.BeforeExtra.lineStats())
|
|
updateStats(name.expandComposites(needExpand))
|
|
updateStats(name.AfterExtra.lineStats())
|
|
lineLength += len(": ")
|
|
updateStats(value.BeforeExtra.lineStats())
|
|
updateStats(value.expandComposites(needExpand))
|
|
updateStats(value.AfterExtra.lineStats())
|
|
lineLength += len(", ")
|
|
}
|
|
lineLength += len("}")
|
|
|
|
// Always expand multiline objects with more than 1 member.
|
|
expand = expand || v2.length() > 1 && stats.multiline
|
|
case *Array:
|
|
lineLength += len("[")
|
|
for i := range v2.Elements {
|
|
value := &v2.Elements[i]
|
|
expand = expand || value.BeforeExtra.hasNewline()
|
|
updateStats(value.BeforeExtra.lineStats())
|
|
updateStats(value.expandComposites(needExpand))
|
|
updateStats(value.AfterExtra.lineStats())
|
|
lineLength += len(", ")
|
|
}
|
|
lineLength += len("]")
|
|
}
|
|
if last := v2.lastValue(); last != nil {
|
|
expand = expand || last.AfterExtra.hasNewline()
|
|
}
|
|
expand = expand || v2.afterExtra().hasNewline()
|
|
|
|
// Update the block statistics.
|
|
lineLengths = append(lineLengths, lineLength)
|
|
stats = lineStats{
|
|
firstLength: lineLengths[0],
|
|
lastLength: lineLengths[len(lineLengths)-1],
|
|
multiline: len(lineLengths) > 1,
|
|
}
|
|
for i := 0; !expand && i < len(lineLengths); i++ {
|
|
expand = lineLengths[i] > punchCardWidth
|
|
}
|
|
|
|
if expand {
|
|
stats = lineStats{len("{"), len("}"), true}
|
|
stats.firstLength += v2.beforeExtraAt(0).lineStats().firstLength
|
|
needExpand[v2] = expand
|
|
}
|
|
}
|
|
return stats
|
|
}
|
|
|
|
func (b Extra) lineStats() (stats lineStats) {
|
|
// length is the approximate length of the comments.
|
|
length := func(b []byte) (n int) {
|
|
for {
|
|
b = b[consumeWhitespace(b):]
|
|
switch {
|
|
case bytes.HasPrefix(b, lineCommentStart):
|
|
return n + len(" ") + len(b) // line comment must go to the end
|
|
case bytes.HasPrefix(b, blockCommentStart):
|
|
nc := consumeComment(b)
|
|
if nc <= 0 {
|
|
return n + len(" ") + len(b) // truncated block comment must go to the end
|
|
}
|
|
n += len(" ") + nc
|
|
b = b[nc:]
|
|
continue
|
|
default:
|
|
if n > 0 {
|
|
n += len(" ") // account for padding space after block comment
|
|
}
|
|
return n
|
|
}
|
|
}
|
|
}
|
|
if !bytes.Contains(b, newline) {
|
|
n := length(b)
|
|
return lineStats{n, n, false}
|
|
} else {
|
|
first := b[:bytes.IndexByte(b, '\n')]
|
|
last := b[bytes.LastIndexByte(b, '\n')+len("\n"):]
|
|
return lineStats{length(first), length(last), true}
|
|
}
|
|
}
|
|
|
|
// formatWhitespace mutates the AST and formats whitespace to ensure
|
|
// consistent indentation and expansion of objects and arrays.
|
|
func (v *Value) formatWhitespace(depth int, needExpand map[composite]bool, standardize bool) {
|
|
if comp, ok := v.Value.(composite); ok {
|
|
expand := needExpand[comp]
|
|
|
|
// Format all members/elements in an object/array.
|
|
switch comp := comp.(type) {
|
|
case *Object:
|
|
for i := range comp.Members {
|
|
name := &comp.Members[i].Name
|
|
value := &comp.Members[i].Value
|
|
|
|
// Format extra before name.
|
|
name.BeforeExtra.format(depth+1, formatOptions{
|
|
ensureLeadingNewline: expand,
|
|
removeLeadingEmptyLines: i == 0,
|
|
appendSpaceIfEmpty: i != 0,
|
|
})
|
|
// Format the name.
|
|
name.formatWhitespace(depth+1, needExpand, standardize)
|
|
// Format extra after name and before colon.
|
|
name.AfterExtra.format(depth+2, formatOptions{
|
|
removeLeadingEmptyLines: true,
|
|
removeTrailingEmptyLines: true,
|
|
})
|
|
// Format extra after colon and before value.
|
|
value.BeforeExtra.format(depth+2, formatOptions{
|
|
removeLeadingEmptyLines: true,
|
|
removeTrailingEmptyLines: true,
|
|
appendSpaceIfEmpty: true,
|
|
})
|
|
// Format the value.
|
|
depthOffset := 0
|
|
if expand {
|
|
depthOffset++
|
|
}
|
|
if name.AfterExtra.hasNewline() || value.BeforeExtra.hasNewline() {
|
|
depthOffset++
|
|
}
|
|
value.formatWhitespace(depth+depthOffset, needExpand, standardize)
|
|
// Format extra after value and before comma.
|
|
value.AfterExtra.format(depth+2, formatOptions{
|
|
removeLeadingEmptyLines: true,
|
|
removeTrailingEmptyLines: true,
|
|
})
|
|
}
|
|
case *Array:
|
|
for i := range comp.Elements {
|
|
value := &comp.Elements[i]
|
|
|
|
// Format extra before value.
|
|
value.BeforeExtra.format(depth+1, formatOptions{
|
|
ensureLeadingNewline: expand,
|
|
removeLeadingEmptyLines: i == 0,
|
|
appendSpaceIfEmpty: i != 0,
|
|
})
|
|
// Format the value.
|
|
depthOffset := 0
|
|
if expand {
|
|
depthOffset++
|
|
}
|
|
value.formatWhitespace(depth+depthOffset, needExpand, standardize)
|
|
// Format extra after value and before comma.
|
|
value.AfterExtra.format(depth+2, formatOptions{
|
|
removeLeadingEmptyLines: true,
|
|
removeTrailingEmptyLines: true,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Format the extra before the closing '}' or ']'.
|
|
comp.afterExtra().format(depth+1, formatOptions{
|
|
ensureTrailingNewline: expand,
|
|
removeLeadingEmptyLines: comp.length() == 0,
|
|
removeTrailingEmptyLines: true,
|
|
unindentLastLine: true,
|
|
})
|
|
|
|
// Normalize presence of trailing comma.
|
|
surroundedComma := comp.lastValue() != nil && len(comp.lastValue().AfterExtra) > 0 && len(*comp.afterExtra()) > 0
|
|
switch {
|
|
// Avoid a trailing comma for a non-expanded object or array.
|
|
case !expand && !surroundedComma:
|
|
setTrailingComma(comp, false)
|
|
// Otherwise, emit a trailing comma (unless this need to be standard).
|
|
case expand && !standardize:
|
|
setTrailingComma(comp, true)
|
|
}
|
|
}
|
|
}
|
|
|
|
type formatOptions struct {
|
|
ensureLeadingNewline bool
|
|
ensureTrailingNewline bool
|
|
removeLeadingEmptyLines bool
|
|
removeTrailingEmptyLines bool
|
|
unindentLastLine bool
|
|
appendSpaceIfEmpty bool
|
|
}
|
|
|
|
func (b *Extra) format(depth int, opts formatOptions) {
|
|
// Remove carriage returns to normalize output across operating systems.
|
|
if bytes.IndexByte(*b, '\r') >= 0 {
|
|
*b = bytes.ReplaceAll(*b, endlineWindows, newline)
|
|
*b = bytes.ReplaceAll(*b, endlineMacOSX, newline)
|
|
*b = bytes.ReplaceAll(*b, carriageReturn, space)
|
|
}
|
|
|
|
in := *b
|
|
var out []byte // TODO(dsnet): Cache this in sync.Pool?
|
|
|
|
// Inject a leading newline if not present in the input.
|
|
if opts.ensureLeadingNewline && !in.hasNewline() {
|
|
out = append(out, '\n')
|
|
}
|
|
|
|
// Iterate over every paragraph in the comment.
|
|
for len(in) > 0 {
|
|
// Handle whitespace.
|
|
if n := consumeWhitespace(in); n > 0 {
|
|
nl := bytes.Count(in[:n], newline)
|
|
if nl > 2 {
|
|
nl = 2 // never allow more than one blank line
|
|
}
|
|
for i := 0; i < nl; i++ {
|
|
out = append(out, '\n')
|
|
}
|
|
in = in[n:]
|
|
continue
|
|
}
|
|
|
|
// Handle comments.
|
|
n := consumeComment(in)
|
|
if n <= 0 {
|
|
return // invalid comment
|
|
}
|
|
|
|
// Emit leading whitespace.
|
|
if bytes.HasSuffix(out, newline) {
|
|
out = appendIndent(out, depth)
|
|
} else {
|
|
out = append(out, ' ')
|
|
}
|
|
|
|
// Copy single-line comment to the output verbatim.
|
|
comment := in[:n]
|
|
if bytes.HasPrefix(comment, lineCommentStart) || !comment.hasNewline() {
|
|
comment = bytes.TrimRightFunc(comment, unicode.IsSpace) // trim trailing whitespace
|
|
if bytes.HasPrefix(comment, lineCommentStart) {
|
|
n-- // leave newline for next iteration of comment
|
|
}
|
|
out = append(out, comment...) // single-line comments preserved verbatim
|
|
in = in[n:]
|
|
continue
|
|
}
|
|
|
|
// Format multi-line block comments and copy to the output.
|
|
lines := bytes.Split(comment, newline) // len(lines) >= 2 since at least one '\n' exists
|
|
var firstLine []byte // first non-empty line after blockCommentStart
|
|
var hasEmptyLine bool
|
|
for i, line := range lines {
|
|
line = bytes.TrimRightFunc(line, unicode.IsSpace) // trim trailing whitespace
|
|
if len(firstLine) == 0 && len(line) > 0 && i > 0 {
|
|
firstLine = line
|
|
}
|
|
hasEmptyLine = hasEmptyLine || len(line) == 0
|
|
lines[i] = line
|
|
}
|
|
|
|
// Compute the longest common prefix
|
|
commonPrefix := firstLine
|
|
for i, line := range lines[1:] {
|
|
if len(line) == 0 {
|
|
continue // ignore empty lines
|
|
}
|
|
|
|
// If the last line is just "*/" with preceding whitespace, then
|
|
// ignore any whitespace as part of the common prefix.
|
|
// Instead, copy the whitespace from the common prefix.
|
|
isLast := bytes.HasSuffix(line, blockCommentEnd)
|
|
if isLast && consumeWhitespace(line)+len(blockCommentEnd) == len(line) {
|
|
prefixLen := consumeWhitespace(commonPrefix)
|
|
lines[i+1] = append(commonPrefix[:prefixLen:prefixLen], blockCommentEnd...)
|
|
break
|
|
}
|
|
|
|
// Check for longest common prefix.
|
|
for i := 0; i < len(line) && i < len(commonPrefix); i++ {
|
|
if line[i] != commonPrefix[i] {
|
|
commonPrefix = commonPrefix[:i]
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// Indent every line and copy to output.
|
|
prefixLen := consumeWhitespace(commonPrefix)
|
|
starAligned := !hasEmptyLine && len(commonPrefix) > prefixLen && commonPrefix[prefixLen] == '*'
|
|
out = append(out, lines[0]...)
|
|
out = append(out, '\n')
|
|
for _, line := range lines[1:] {
|
|
if len(line) > 0 {
|
|
out = appendIndent(out, depth)
|
|
if starAligned {
|
|
out = append(out, ' ')
|
|
}
|
|
out = append(out, line[prefixLen:]...)
|
|
}
|
|
out = append(out, '\n')
|
|
}
|
|
out = bytes.TrimRight(out, "\n")
|
|
in = in[n:]
|
|
}
|
|
|
|
// Inject a trailing newline if not present in the input.
|
|
if opts.ensureTrailingNewline && !bytes.HasSuffix(out, newline) {
|
|
out = append(out, '\n')
|
|
}
|
|
// Remove all leading empty lines.
|
|
for opts.removeLeadingEmptyLines && bytes.HasPrefix(out, twoNewlines) {
|
|
out = out[1:]
|
|
}
|
|
// Remove all trailing empty lines.
|
|
for opts.removeTrailingEmptyLines && bytes.HasSuffix(out, twoNewlines) {
|
|
out = out[:len(out)-1]
|
|
}
|
|
// If the whitespace ends on a newline, append the necessary indentation.
|
|
// Otherwise, emit a space if we did not end on a new line.
|
|
if bytes.HasSuffix(out, newline) {
|
|
if opts.unindentLastLine {
|
|
depth--
|
|
}
|
|
out = appendIndent(out, depth)
|
|
} else if len(out) > 0 {
|
|
out = append(out, ' ')
|
|
}
|
|
// Emit a space if the output is empty.
|
|
if opts.appendSpaceIfEmpty && len(out) == 0 {
|
|
out = append(out, ' ')
|
|
}
|
|
|
|
// Copy intermediate output to the receiver.
|
|
if !bytes.Equal(*b, out) {
|
|
*b = append((*b)[:0], out...)
|
|
}
|
|
}
|
|
|
|
// alignObjectValues aligns object values by inserting spaces after the name
|
|
// so that the values are aligned to the same column.
|
|
//
|
|
// It always returns true to be compatible with composite.rangeValues.
|
|
func (v *Value) alignObjectValues() bool {
|
|
// TODO(dsnet): This is broken for non-monospace, non-narrow characters.
|
|
// This is hard to fix as even `go fmt` suffers from this problem.
|
|
// See https://golang.org/issue/8273.
|
|
if obj, ok := v.Value.(*Object); ok {
|
|
type row struct {
|
|
extra *Extra // pointer to extra after colon and before value
|
|
length int // length from start of name to end of extra
|
|
}
|
|
var rows []row
|
|
alignRows := func() {
|
|
// TODO(dsnet): Should we break apart rows if the number of spaces
|
|
// to insert exceeds some threshold?
|
|
|
|
// Compute the maximum width.
|
|
var max int
|
|
for _, row := range rows {
|
|
if max < row.length {
|
|
max = row.length
|
|
}
|
|
}
|
|
// Align every row up to that width.
|
|
for _, row := range rows {
|
|
for n := max - row.length; n > 0; n-- {
|
|
*row.extra = append(*row.extra, ' ')
|
|
}
|
|
}
|
|
// Reset the sequence of rows.
|
|
rows = rows[:0]
|
|
}
|
|
var indentSuffix []byte
|
|
for i := range obj.Members {
|
|
name := &obj.Members[i].Name
|
|
value := &obj.Members[i].Value
|
|
|
|
// Whitespace right before name must have a newline and
|
|
// everything after the name until the comma cannot have newlines.
|
|
if !name.BeforeExtra.hasNewline() ||
|
|
name.hasNewline(false) ||
|
|
name.AfterExtra.hasNewline() ||
|
|
value.BeforeExtra.hasNewline() ||
|
|
value.hasNewline(false) ||
|
|
value.AfterExtra.hasNewline() {
|
|
alignRows()
|
|
continue
|
|
}
|
|
|
|
// If there are multiple newlines or the indentSuffix mismatches,
|
|
// then this is the start of a new block or rows to align.
|
|
if bytes.Count(name.BeforeExtra, newline) > 1 || !bytes.HasSuffix(name.BeforeExtra, indentSuffix) {
|
|
alignRows() // flush the current block or rows
|
|
}
|
|
|
|
rows = append(rows, row{
|
|
extra: &value.BeforeExtra,
|
|
length: len(name.Value.(Literal)) + len(name.AfterExtra) + len(":") + len(value.BeforeExtra),
|
|
})
|
|
}
|
|
alignRows()
|
|
}
|
|
|
|
// Recursively align all sub-objects.
|
|
if comp, ok := v.Value.(composite); ok {
|
|
for v2 := range comp.allValues() {
|
|
v2.alignObjectValues()
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (v Value) hasNewline(checkTopLevelExtra bool) bool {
|
|
if checkTopLevelExtra && (v.BeforeExtra.hasNewline() || v.AfterExtra.hasNewline()) {
|
|
return true
|
|
}
|
|
if comp, ok := v.Value.(composite); ok {
|
|
for v := range comp.allValues() {
|
|
if v.hasNewline(true) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (b Extra) hasNewline() bool {
|
|
return bytes.IndexByte(b, '\n') >= 0
|
|
}
|
|
|
|
func appendIndent(b []byte, n int) []byte {
|
|
for i := 0; i < n; i++ {
|
|
b = append(b, '\t')
|
|
}
|
|
return b
|
|
}
|