Files
autogits/vendor/github.com/tailscale/hujson/format.go
Adam Majer 082db173f3 vendor: move vendored sources in-tree
This should make it easier to see changes instead of just a blob
2025-08-25 19:48:19 +02:00

603 lines
18 KiB
Go

// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hujson
import (
"bytes"
"unicode"
)
// Standardize strips any features specific to HuJSON from b,
// making it compliant with standard JSON per RFC 8259.
// All comments and trailing commas are replaced with a space character
// in order to preserve the original line numbers and byte offsets.
// If an error is encountered, then b is returned as is along with the error.
func Standardize(b []byte) ([]byte, error) {
ast, err := Parse(b)
if err != nil {
return b, err
}
ast.Standardize()
return ast.Pack(), nil
}
// Minimize removes all whitespace, comments, and trailing commas from b,
// making it compliant with standard JSON per RFC 8259.
// If an error is encountered, then b is returned as is along with the error.
func Minimize(b []byte) ([]byte, error) {
ast, err := Parse(b)
if err != nil {
return b, err
}
ast.Minimize()
return ast.Pack(), nil
}
// Format formats b according to some opinionated heuristics for
// how HuJSON should look. The exact output may change over time.
// It is the equivalent of `go fmt` but for HuJSON.
//
// If the input is standard JSON, then the output will remain standard.
// Format is idempotent such that formatting already formatted HuJSON
// results in no changes.
// If an error is encountered, then b is returned as is along with the error.
func Format(b []byte) ([]byte, error) {
ast, err := Parse(b)
if err != nil {
return b, err
}
ast.Format()
return ast.Pack(), nil
}
const punchCardWidth = 80
var (
newline = []byte("\n")
twoNewlines = []byte("\n\n")
endlineWindows = []byte("\r\n")
endlineMacOSX = []byte("\n\r")
carriageReturn = []byte("\r")
space = []byte(" ")
)
// Format formats the value according to some opinionated heuristics for
// how HuJSON should look. The exact output may change over time.
// It is the equivalent of `go fmt` but for HuJSON.
//
// If the input is standard JSON, then the output will remain standard.
// Format is idempotent such that formatting already formatted HuJSON
// results in no changes.
func (v *Value) Format() {
// Format leading extra.
v.BeforeExtra.format(0, formatOptions{})
v.BeforeExtra = v.BeforeExtra[consumeWhitespace(v.BeforeExtra):] // never has leading whitespace
// Format the value.
needExpand := make(map[composite]bool)
isStandard := v.IsStandard()
v.normalize()
v.expandComposites(needExpand)
v.formatWhitespace(0, needExpand, isStandard)
v.alignObjectValues()
// Format trailing extra.
v.AfterExtra.format(0, formatOptions{})
v.AfterExtra = append(bytes.TrimRightFunc(v.AfterExtra, unicode.IsSpace), '\n') // always has exactly one trailing newline
v.UpdateOffsets()
}
// normalize performs simple normalization changes. In particular, it:
// - normalizes strings,
// - normalizes empty objects and arrays as simply {} or [],
// - normalizes whitespace between names and colons,
// - normalizes whitespace between values and commas.
//
// It always returns true to be compatible with composite.rangeValues.
func (v *Value) normalize() bool {
switch v2 := v.Value.(type) {
case Literal:
// Normalize string if there are escape characters.
if v2.Kind() == '"' && bytes.IndexByte(v2, '\\') >= 0 {
v.Value = String(v2.String())
}
case composite:
// Cleanup for empty objects and arrays.
if v2.length() == 0 {
// If there is only whitespace, then remove the whitespace.
if !v2.afterExtra().hasComment() {
*v2.afterExtra() = nil
}
break
}
// If there is only whitespace between the name and colon,
// or between the value and comma, then remove the whitespace.
for v3 := range v2.allValues() {
if !v3.AfterExtra.hasComment() {
v3.AfterExtra = nil
}
}
// Normalize all sub-values.
for v3 := range v2.allValues() {
v3.normalize()
}
}
return true
}
// lineStats carries statistics about a sequence of lines.
type lineStats struct {
firstLength int
lastLength int
multiline bool // false implies firstLength == lastLength
}
// expandComposites populates needExpand with the set of composite values
// that need to be expanded (i.e., print each member/element on a new line).
// This method is pure and does not mutate the AST.
func (v *Value) expandComposites(needExpand map[composite]bool) (stats lineStats) {
switch v2 := v.Value.(type) {
case Literal:
stats = lineStats{len(v2), len(v2), false}
case composite:
// Every object or array is either fully inlined or fully expanded.
// This simplifies machine-modification of HuJSON so that the mutation
// can easily determine which mode it is currently in.
//
// If any whitespace after a '{', '[', or ',' or before a '}' or ']'
// contains a newline, then we always expand the object or array.
var expand bool
// Keep track of line lengths.
var lineLength int
var lineLengths []int
updateStats := func(s lineStats) {
lineLength += s.firstLength
if s.multiline {
lineLengths = append(lineLengths, lineLength)
lineLength = s.lastLength
}
}
// Iterate through all members/elements in an object/array.
switch v2 := v2.(type) {
case *Object:
lineLength += len("{")
for i := range v2.Members {
name := &v2.Members[i].Name
value := &v2.Members[i].Value
expand = expand || name.BeforeExtra.hasNewline()
updateStats(name.BeforeExtra.lineStats())
updateStats(name.expandComposites(needExpand))
updateStats(name.AfterExtra.lineStats())
lineLength += len(": ")
updateStats(value.BeforeExtra.lineStats())
updateStats(value.expandComposites(needExpand))
updateStats(value.AfterExtra.lineStats())
lineLength += len(", ")
}
lineLength += len("}")
// Always expand multiline objects with more than 1 member.
expand = expand || v2.length() > 1 && stats.multiline
case *Array:
lineLength += len("[")
for i := range v2.Elements {
value := &v2.Elements[i]
expand = expand || value.BeforeExtra.hasNewline()
updateStats(value.BeforeExtra.lineStats())
updateStats(value.expandComposites(needExpand))
updateStats(value.AfterExtra.lineStats())
lineLength += len(", ")
}
lineLength += len("]")
}
if last := v2.lastValue(); last != nil {
expand = expand || last.AfterExtra.hasNewline()
}
expand = expand || v2.afterExtra().hasNewline()
// Update the block statistics.
lineLengths = append(lineLengths, lineLength)
stats = lineStats{
firstLength: lineLengths[0],
lastLength: lineLengths[len(lineLengths)-1],
multiline: len(lineLengths) > 1,
}
for i := 0; !expand && i < len(lineLengths); i++ {
expand = lineLengths[i] > punchCardWidth
}
if expand {
stats = lineStats{len("{"), len("}"), true}
stats.firstLength += v2.beforeExtraAt(0).lineStats().firstLength
needExpand[v2] = expand
}
}
return stats
}
func (b Extra) lineStats() (stats lineStats) {
// length is the approximate length of the comments.
length := func(b []byte) (n int) {
for {
b = b[consumeWhitespace(b):]
switch {
case bytes.HasPrefix(b, lineCommentStart):
return n + len(" ") + len(b) // line comment must go to the end
case bytes.HasPrefix(b, blockCommentStart):
nc := consumeComment(b)
if nc <= 0 {
return n + len(" ") + len(b) // truncated block comment must go to the end
}
n += len(" ") + nc
b = b[nc:]
continue
default:
if n > 0 {
n += len(" ") // account for padding space after block comment
}
return n
}
}
}
if !bytes.Contains(b, newline) {
n := length(b)
return lineStats{n, n, false}
} else {
first := b[:bytes.IndexByte(b, '\n')]
last := b[bytes.LastIndexByte(b, '\n')+len("\n"):]
return lineStats{length(first), length(last), true}
}
}
// formatWhitespace mutates the AST and formats whitespace to ensure
// consistent indentation and expansion of objects and arrays.
func (v *Value) formatWhitespace(depth int, needExpand map[composite]bool, standardize bool) {
if comp, ok := v.Value.(composite); ok {
expand := needExpand[comp]
// Format all members/elements in an object/array.
switch comp := comp.(type) {
case *Object:
for i := range comp.Members {
name := &comp.Members[i].Name
value := &comp.Members[i].Value
// Format extra before name.
name.BeforeExtra.format(depth+1, formatOptions{
ensureLeadingNewline: expand,
removeLeadingEmptyLines: i == 0,
appendSpaceIfEmpty: i != 0,
})
// Format the name.
name.formatWhitespace(depth+1, needExpand, standardize)
// Format extra after name and before colon.
name.AfterExtra.format(depth+2, formatOptions{
removeLeadingEmptyLines: true,
removeTrailingEmptyLines: true,
})
// Format extra after colon and before value.
value.BeforeExtra.format(depth+2, formatOptions{
removeLeadingEmptyLines: true,
removeTrailingEmptyLines: true,
appendSpaceIfEmpty: true,
})
// Format the value.
depthOffset := 0
if expand {
depthOffset++
}
if name.AfterExtra.hasNewline() || value.BeforeExtra.hasNewline() {
depthOffset++
}
value.formatWhitespace(depth+depthOffset, needExpand, standardize)
// Format extra after value and before comma.
value.AfterExtra.format(depth+2, formatOptions{
removeLeadingEmptyLines: true,
removeTrailingEmptyLines: true,
})
}
case *Array:
for i := range comp.Elements {
value := &comp.Elements[i]
// Format extra before value.
value.BeforeExtra.format(depth+1, formatOptions{
ensureLeadingNewline: expand,
removeLeadingEmptyLines: i == 0,
appendSpaceIfEmpty: i != 0,
})
// Format the value.
depthOffset := 0
if expand {
depthOffset++
}
value.formatWhitespace(depth+depthOffset, needExpand, standardize)
// Format extra after value and before comma.
value.AfterExtra.format(depth+2, formatOptions{
removeLeadingEmptyLines: true,
removeTrailingEmptyLines: true,
})
}
}
// Format the extra before the closing '}' or ']'.
comp.afterExtra().format(depth+1, formatOptions{
ensureTrailingNewline: expand,
removeLeadingEmptyLines: comp.length() == 0,
removeTrailingEmptyLines: true,
unindentLastLine: true,
})
// Normalize presence of trailing comma.
surroundedComma := comp.lastValue() != nil && len(comp.lastValue().AfterExtra) > 0 && len(*comp.afterExtra()) > 0
switch {
// Avoid a trailing comma for a non-expanded object or array.
case !expand && !surroundedComma:
setTrailingComma(comp, false)
// Otherwise, emit a trailing comma (unless this need to be standard).
case expand && !standardize:
setTrailingComma(comp, true)
}
}
}
type formatOptions struct {
ensureLeadingNewline bool
ensureTrailingNewline bool
removeLeadingEmptyLines bool
removeTrailingEmptyLines bool
unindentLastLine bool
appendSpaceIfEmpty bool
}
func (b *Extra) format(depth int, opts formatOptions) {
// Remove carriage returns to normalize output across operating systems.
if bytes.IndexByte(*b, '\r') >= 0 {
*b = bytes.ReplaceAll(*b, endlineWindows, newline)
*b = bytes.ReplaceAll(*b, endlineMacOSX, newline)
*b = bytes.ReplaceAll(*b, carriageReturn, space)
}
in := *b
var out []byte // TODO(dsnet): Cache this in sync.Pool?
// Inject a leading newline if not present in the input.
if opts.ensureLeadingNewline && !in.hasNewline() {
out = append(out, '\n')
}
// Iterate over every paragraph in the comment.
for len(in) > 0 {
// Handle whitespace.
if n := consumeWhitespace(in); n > 0 {
nl := bytes.Count(in[:n], newline)
if nl > 2 {
nl = 2 // never allow more than one blank line
}
for i := 0; i < nl; i++ {
out = append(out, '\n')
}
in = in[n:]
continue
}
// Handle comments.
n := consumeComment(in)
if n <= 0 {
return // invalid comment
}
// Emit leading whitespace.
if bytes.HasSuffix(out, newline) {
out = appendIndent(out, depth)
} else {
out = append(out, ' ')
}
// Copy single-line comment to the output verbatim.
comment := in[:n]
if bytes.HasPrefix(comment, lineCommentStart) || !comment.hasNewline() {
comment = bytes.TrimRightFunc(comment, unicode.IsSpace) // trim trailing whitespace
if bytes.HasPrefix(comment, lineCommentStart) {
n-- // leave newline for next iteration of comment
}
out = append(out, comment...) // single-line comments preserved verbatim
in = in[n:]
continue
}
// Format multi-line block comments and copy to the output.
lines := bytes.Split(comment, newline) // len(lines) >= 2 since at least one '\n' exists
var firstLine []byte // first non-empty line after blockCommentStart
var hasEmptyLine bool
for i, line := range lines {
line = bytes.TrimRightFunc(line, unicode.IsSpace) // trim trailing whitespace
if len(firstLine) == 0 && len(line) > 0 && i > 0 {
firstLine = line
}
hasEmptyLine = hasEmptyLine || len(line) == 0
lines[i] = line
}
// Compute the longest common prefix
commonPrefix := firstLine
for i, line := range lines[1:] {
if len(line) == 0 {
continue // ignore empty lines
}
// If the last line is just "*/" with preceding whitespace, then
// ignore any whitespace as part of the common prefix.
// Instead, copy the whitespace from the common prefix.
isLast := bytes.HasSuffix(line, blockCommentEnd)
if isLast && consumeWhitespace(line)+len(blockCommentEnd) == len(line) {
prefixLen := consumeWhitespace(commonPrefix)
lines[i+1] = append(commonPrefix[:prefixLen:prefixLen], blockCommentEnd...)
break
}
// Check for longest common prefix.
for i := 0; i < len(line) && i < len(commonPrefix); i++ {
if line[i] != commonPrefix[i] {
commonPrefix = commonPrefix[:i]
continue
}
}
}
// Indent every line and copy to output.
prefixLen := consumeWhitespace(commonPrefix)
starAligned := !hasEmptyLine && len(commonPrefix) > prefixLen && commonPrefix[prefixLen] == '*'
out = append(out, lines[0]...)
out = append(out, '\n')
for _, line := range lines[1:] {
if len(line) > 0 {
out = appendIndent(out, depth)
if starAligned {
out = append(out, ' ')
}
out = append(out, line[prefixLen:]...)
}
out = append(out, '\n')
}
out = bytes.TrimRight(out, "\n")
in = in[n:]
}
// Inject a trailing newline if not present in the input.
if opts.ensureTrailingNewline && !bytes.HasSuffix(out, newline) {
out = append(out, '\n')
}
// Remove all leading empty lines.
for opts.removeLeadingEmptyLines && bytes.HasPrefix(out, twoNewlines) {
out = out[1:]
}
// Remove all trailing empty lines.
for opts.removeTrailingEmptyLines && bytes.HasSuffix(out, twoNewlines) {
out = out[:len(out)-1]
}
// If the whitespace ends on a newline, append the necessary indentation.
// Otherwise, emit a space if we did not end on a new line.
if bytes.HasSuffix(out, newline) {
if opts.unindentLastLine {
depth--
}
out = appendIndent(out, depth)
} else if len(out) > 0 {
out = append(out, ' ')
}
// Emit a space if the output is empty.
if opts.appendSpaceIfEmpty && len(out) == 0 {
out = append(out, ' ')
}
// Copy intermediate output to the receiver.
if !bytes.Equal(*b, out) {
*b = append((*b)[:0], out...)
}
}
// alignObjectValues aligns object values by inserting spaces after the name
// so that the values are aligned to the same column.
//
// It always returns true to be compatible with composite.rangeValues.
func (v *Value) alignObjectValues() bool {
// TODO(dsnet): This is broken for non-monospace, non-narrow characters.
// This is hard to fix as even `go fmt` suffers from this problem.
// See https://golang.org/issue/8273.
if obj, ok := v.Value.(*Object); ok {
type row struct {
extra *Extra // pointer to extra after colon and before value
length int // length from start of name to end of extra
}
var rows []row
alignRows := func() {
// TODO(dsnet): Should we break apart rows if the number of spaces
// to insert exceeds some threshold?
// Compute the maximum width.
var max int
for _, row := range rows {
if max < row.length {
max = row.length
}
}
// Align every row up to that width.
for _, row := range rows {
for n := max - row.length; n > 0; n-- {
*row.extra = append(*row.extra, ' ')
}
}
// Reset the sequence of rows.
rows = rows[:0]
}
var indentSuffix []byte
for i := range obj.Members {
name := &obj.Members[i].Name
value := &obj.Members[i].Value
// Whitespace right before name must have a newline and
// everything after the name until the comma cannot have newlines.
if !name.BeforeExtra.hasNewline() ||
name.hasNewline(false) ||
name.AfterExtra.hasNewline() ||
value.BeforeExtra.hasNewline() ||
value.hasNewline(false) ||
value.AfterExtra.hasNewline() {
alignRows()
continue
}
// If there are multiple newlines or the indentSuffix mismatches,
// then this is the start of a new block or rows to align.
if bytes.Count(name.BeforeExtra, newline) > 1 || !bytes.HasSuffix(name.BeforeExtra, indentSuffix) {
alignRows() // flush the current block or rows
}
rows = append(rows, row{
extra: &value.BeforeExtra,
length: len(name.Value.(Literal)) + len(name.AfterExtra) + len(":") + len(value.BeforeExtra),
})
}
alignRows()
}
// Recursively align all sub-objects.
if comp, ok := v.Value.(composite); ok {
for v2 := range comp.allValues() {
v2.alignObjectValues()
}
}
return true
}
func (v Value) hasNewline(checkTopLevelExtra bool) bool {
if checkTopLevelExtra && (v.BeforeExtra.hasNewline() || v.AfterExtra.hasNewline()) {
return true
}
if comp, ok := v.Value.(composite); ok {
for v := range comp.allValues() {
if v.hasNewline(true) {
return true
}
}
}
return false
}
func (b Extra) hasNewline() bool {
return bytes.IndexByte(b, '\n') >= 0
}
func appendIndent(b []byte, n int) []byte {
for i := 0; i < n; i++ {
b = append(b, '\t')
}
return b
}