// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package hujson import ( "bytes" "unicode" ) // Standardize strips any features specific to HuJSON from b, // making it compliant with standard JSON per RFC 8259. // All comments and trailing commas are replaced with a space character // in order to preserve the original line numbers and byte offsets. // If an error is encountered, then b is returned as is along with the error. func Standardize(b []byte) ([]byte, error) { ast, err := Parse(b) if err != nil { return b, err } ast.Standardize() return ast.Pack(), nil } // Minimize removes all whitespace, comments, and trailing commas from b, // making it compliant with standard JSON per RFC 8259. // If an error is encountered, then b is returned as is along with the error. func Minimize(b []byte) ([]byte, error) { ast, err := Parse(b) if err != nil { return b, err } ast.Minimize() return ast.Pack(), nil } // Format formats b according to some opinionated heuristics for // how HuJSON should look. The exact output may change over time. // It is the equivalent of `go fmt` but for HuJSON. // // If the input is standard JSON, then the output will remain standard. // Format is idempotent such that formatting already formatted HuJSON // results in no changes. // If an error is encountered, then b is returned as is along with the error. func Format(b []byte) ([]byte, error) { ast, err := Parse(b) if err != nil { return b, err } ast.Format() return ast.Pack(), nil } const punchCardWidth = 80 var ( newline = []byte("\n") twoNewlines = []byte("\n\n") endlineWindows = []byte("\r\n") endlineMacOSX = []byte("\n\r") carriageReturn = []byte("\r") space = []byte(" ") ) // Format formats the value according to some opinionated heuristics for // how HuJSON should look. The exact output may change over time. // It is the equivalent of `go fmt` but for HuJSON. // // If the input is standard JSON, then the output will remain standard. // Format is idempotent such that formatting already formatted HuJSON // results in no changes. func (v *Value) Format() { // Format leading extra. v.BeforeExtra.format(0, formatOptions{}) v.BeforeExtra = v.BeforeExtra[consumeWhitespace(v.BeforeExtra):] // never has leading whitespace // Format the value. needExpand := make(map[composite]bool) isStandard := v.IsStandard() v.normalize() v.expandComposites(needExpand) v.formatWhitespace(0, needExpand, isStandard) v.alignObjectValues() // Format trailing extra. v.AfterExtra.format(0, formatOptions{}) v.AfterExtra = append(bytes.TrimRightFunc(v.AfterExtra, unicode.IsSpace), '\n') // always has exactly one trailing newline v.UpdateOffsets() } // normalize performs simple normalization changes. In particular, it: // - normalizes strings, // - normalizes empty objects and arrays as simply {} or [], // - normalizes whitespace between names and colons, // - normalizes whitespace between values and commas. // // It always returns true to be compatible with composite.rangeValues. func (v *Value) normalize() bool { switch v2 := v.Value.(type) { case Literal: // Normalize string if there are escape characters. if v2.Kind() == '"' && bytes.IndexByte(v2, '\\') >= 0 { v.Value = String(v2.String()) } case composite: // Cleanup for empty objects and arrays. if v2.length() == 0 { // If there is only whitespace, then remove the whitespace. if !v2.afterExtra().hasComment() { *v2.afterExtra() = nil } break } // If there is only whitespace between the name and colon, // or between the value and comma, then remove the whitespace. for v3 := range v2.allValues() { if !v3.AfterExtra.hasComment() { v3.AfterExtra = nil } } // Normalize all sub-values. for v3 := range v2.allValues() { v3.normalize() } } return true } // lineStats carries statistics about a sequence of lines. type lineStats struct { firstLength int lastLength int multiline bool // false implies firstLength == lastLength } // expandComposites populates needExpand with the set of composite values // that need to be expanded (i.e., print each member/element on a new line). // This method is pure and does not mutate the AST. func (v *Value) expandComposites(needExpand map[composite]bool) (stats lineStats) { switch v2 := v.Value.(type) { case Literal: stats = lineStats{len(v2), len(v2), false} case composite: // Every object or array is either fully inlined or fully expanded. // This simplifies machine-modification of HuJSON so that the mutation // can easily determine which mode it is currently in. // // If any whitespace after a '{', '[', or ',' or before a '}' or ']' // contains a newline, then we always expand the object or array. var expand bool // Keep track of line lengths. var lineLength int var lineLengths []int updateStats := func(s lineStats) { lineLength += s.firstLength if s.multiline { lineLengths = append(lineLengths, lineLength) lineLength = s.lastLength } } // Iterate through all members/elements in an object/array. switch v2 := v2.(type) { case *Object: lineLength += len("{") for i := range v2.Members { name := &v2.Members[i].Name value := &v2.Members[i].Value expand = expand || name.BeforeExtra.hasNewline() updateStats(name.BeforeExtra.lineStats()) updateStats(name.expandComposites(needExpand)) updateStats(name.AfterExtra.lineStats()) lineLength += len(": ") updateStats(value.BeforeExtra.lineStats()) updateStats(value.expandComposites(needExpand)) updateStats(value.AfterExtra.lineStats()) lineLength += len(", ") } lineLength += len("}") // Always expand multiline objects with more than 1 member. expand = expand || v2.length() > 1 && stats.multiline case *Array: lineLength += len("[") for i := range v2.Elements { value := &v2.Elements[i] expand = expand || value.BeforeExtra.hasNewline() updateStats(value.BeforeExtra.lineStats()) updateStats(value.expandComposites(needExpand)) updateStats(value.AfterExtra.lineStats()) lineLength += len(", ") } lineLength += len("]") } if last := v2.lastValue(); last != nil { expand = expand || last.AfterExtra.hasNewline() } expand = expand || v2.afterExtra().hasNewline() // Update the block statistics. lineLengths = append(lineLengths, lineLength) stats = lineStats{ firstLength: lineLengths[0], lastLength: lineLengths[len(lineLengths)-1], multiline: len(lineLengths) > 1, } for i := 0; !expand && i < len(lineLengths); i++ { expand = lineLengths[i] > punchCardWidth } if expand { stats = lineStats{len("{"), len("}"), true} stats.firstLength += v2.beforeExtraAt(0).lineStats().firstLength needExpand[v2] = expand } } return stats } func (b Extra) lineStats() (stats lineStats) { // length is the approximate length of the comments. length := func(b []byte) (n int) { for { b = b[consumeWhitespace(b):] switch { case bytes.HasPrefix(b, lineCommentStart): return n + len(" ") + len(b) // line comment must go to the end case bytes.HasPrefix(b, blockCommentStart): nc := consumeComment(b) if nc <= 0 { return n + len(" ") + len(b) // truncated block comment must go to the end } n += len(" ") + nc b = b[nc:] continue default: if n > 0 { n += len(" ") // account for padding space after block comment } return n } } } if !bytes.Contains(b, newline) { n := length(b) return lineStats{n, n, false} } else { first := b[:bytes.IndexByte(b, '\n')] last := b[bytes.LastIndexByte(b, '\n')+len("\n"):] return lineStats{length(first), length(last), true} } } // formatWhitespace mutates the AST and formats whitespace to ensure // consistent indentation and expansion of objects and arrays. func (v *Value) formatWhitespace(depth int, needExpand map[composite]bool, standardize bool) { if comp, ok := v.Value.(composite); ok { expand := needExpand[comp] // Format all members/elements in an object/array. switch comp := comp.(type) { case *Object: for i := range comp.Members { name := &comp.Members[i].Name value := &comp.Members[i].Value // Format extra before name. name.BeforeExtra.format(depth+1, formatOptions{ ensureLeadingNewline: expand, removeLeadingEmptyLines: i == 0, appendSpaceIfEmpty: i != 0, }) // Format the name. name.formatWhitespace(depth+1, needExpand, standardize) // Format extra after name and before colon. name.AfterExtra.format(depth+2, formatOptions{ removeLeadingEmptyLines: true, removeTrailingEmptyLines: true, }) // Format extra after colon and before value. value.BeforeExtra.format(depth+2, formatOptions{ removeLeadingEmptyLines: true, removeTrailingEmptyLines: true, appendSpaceIfEmpty: true, }) // Format the value. depthOffset := 0 if expand { depthOffset++ } if name.AfterExtra.hasNewline() || value.BeforeExtra.hasNewline() { depthOffset++ } value.formatWhitespace(depth+depthOffset, needExpand, standardize) // Format extra after value and before comma. value.AfterExtra.format(depth+2, formatOptions{ removeLeadingEmptyLines: true, removeTrailingEmptyLines: true, }) } case *Array: for i := range comp.Elements { value := &comp.Elements[i] // Format extra before value. value.BeforeExtra.format(depth+1, formatOptions{ ensureLeadingNewline: expand, removeLeadingEmptyLines: i == 0, appendSpaceIfEmpty: i != 0, }) // Format the value. depthOffset := 0 if expand { depthOffset++ } value.formatWhitespace(depth+depthOffset, needExpand, standardize) // Format extra after value and before comma. value.AfterExtra.format(depth+2, formatOptions{ removeLeadingEmptyLines: true, removeTrailingEmptyLines: true, }) } } // Format the extra before the closing '}' or ']'. comp.afterExtra().format(depth+1, formatOptions{ ensureTrailingNewline: expand, removeLeadingEmptyLines: comp.length() == 0, removeTrailingEmptyLines: true, unindentLastLine: true, }) // Normalize presence of trailing comma. surroundedComma := comp.lastValue() != nil && len(comp.lastValue().AfterExtra) > 0 && len(*comp.afterExtra()) > 0 switch { // Avoid a trailing comma for a non-expanded object or array. case !expand && !surroundedComma: setTrailingComma(comp, false) // Otherwise, emit a trailing comma (unless this need to be standard). case expand && !standardize: setTrailingComma(comp, true) } } } type formatOptions struct { ensureLeadingNewline bool ensureTrailingNewline bool removeLeadingEmptyLines bool removeTrailingEmptyLines bool unindentLastLine bool appendSpaceIfEmpty bool } func (b *Extra) format(depth int, opts formatOptions) { // Remove carriage returns to normalize output across operating systems. if bytes.IndexByte(*b, '\r') >= 0 { *b = bytes.ReplaceAll(*b, endlineWindows, newline) *b = bytes.ReplaceAll(*b, endlineMacOSX, newline) *b = bytes.ReplaceAll(*b, carriageReturn, space) } in := *b var out []byte // TODO(dsnet): Cache this in sync.Pool? // Inject a leading newline if not present in the input. if opts.ensureLeadingNewline && !in.hasNewline() { out = append(out, '\n') } // Iterate over every paragraph in the comment. for len(in) > 0 { // Handle whitespace. if n := consumeWhitespace(in); n > 0 { nl := bytes.Count(in[:n], newline) if nl > 2 { nl = 2 // never allow more than one blank line } for i := 0; i < nl; i++ { out = append(out, '\n') } in = in[n:] continue } // Handle comments. n := consumeComment(in) if n <= 0 { return // invalid comment } // Emit leading whitespace. if bytes.HasSuffix(out, newline) { out = appendIndent(out, depth) } else { out = append(out, ' ') } // Copy single-line comment to the output verbatim. comment := in[:n] if bytes.HasPrefix(comment, lineCommentStart) || !comment.hasNewline() { comment = bytes.TrimRightFunc(comment, unicode.IsSpace) // trim trailing whitespace if bytes.HasPrefix(comment, lineCommentStart) { n-- // leave newline for next iteration of comment } out = append(out, comment...) // single-line comments preserved verbatim in = in[n:] continue } // Format multi-line block comments and copy to the output. lines := bytes.Split(comment, newline) // len(lines) >= 2 since at least one '\n' exists var firstLine []byte // first non-empty line after blockCommentStart var hasEmptyLine bool for i, line := range lines { line = bytes.TrimRightFunc(line, unicode.IsSpace) // trim trailing whitespace if len(firstLine) == 0 && len(line) > 0 && i > 0 { firstLine = line } hasEmptyLine = hasEmptyLine || len(line) == 0 lines[i] = line } // Compute the longest common prefix commonPrefix := firstLine for i, line := range lines[1:] { if len(line) == 0 { continue // ignore empty lines } // If the last line is just "*/" with preceding whitespace, then // ignore any whitespace as part of the common prefix. // Instead, copy the whitespace from the common prefix. isLast := bytes.HasSuffix(line, blockCommentEnd) if isLast && consumeWhitespace(line)+len(blockCommentEnd) == len(line) { prefixLen := consumeWhitespace(commonPrefix) lines[i+1] = append(commonPrefix[:prefixLen:prefixLen], blockCommentEnd...) break } // Check for longest common prefix. for i := 0; i < len(line) && i < len(commonPrefix); i++ { if line[i] != commonPrefix[i] { commonPrefix = commonPrefix[:i] continue } } } // Indent every line and copy to output. prefixLen := consumeWhitespace(commonPrefix) starAligned := !hasEmptyLine && len(commonPrefix) > prefixLen && commonPrefix[prefixLen] == '*' out = append(out, lines[0]...) out = append(out, '\n') for _, line := range lines[1:] { if len(line) > 0 { out = appendIndent(out, depth) if starAligned { out = append(out, ' ') } out = append(out, line[prefixLen:]...) } out = append(out, '\n') } out = bytes.TrimRight(out, "\n") in = in[n:] } // Inject a trailing newline if not present in the input. if opts.ensureTrailingNewline && !bytes.HasSuffix(out, newline) { out = append(out, '\n') } // Remove all leading empty lines. for opts.removeLeadingEmptyLines && bytes.HasPrefix(out, twoNewlines) { out = out[1:] } // Remove all trailing empty lines. for opts.removeTrailingEmptyLines && bytes.HasSuffix(out, twoNewlines) { out = out[:len(out)-1] } // If the whitespace ends on a newline, append the necessary indentation. // Otherwise, emit a space if we did not end on a new line. if bytes.HasSuffix(out, newline) { if opts.unindentLastLine { depth-- } out = appendIndent(out, depth) } else if len(out) > 0 { out = append(out, ' ') } // Emit a space if the output is empty. if opts.appendSpaceIfEmpty && len(out) == 0 { out = append(out, ' ') } // Copy intermediate output to the receiver. if !bytes.Equal(*b, out) { *b = append((*b)[:0], out...) } } // alignObjectValues aligns object values by inserting spaces after the name // so that the values are aligned to the same column. // // It always returns true to be compatible with composite.rangeValues. func (v *Value) alignObjectValues() bool { // TODO(dsnet): This is broken for non-monospace, non-narrow characters. // This is hard to fix as even `go fmt` suffers from this problem. // See https://golang.org/issue/8273. if obj, ok := v.Value.(*Object); ok { type row struct { extra *Extra // pointer to extra after colon and before value length int // length from start of name to end of extra } var rows []row alignRows := func() { // TODO(dsnet): Should we break apart rows if the number of spaces // to insert exceeds some threshold? // Compute the maximum width. var max int for _, row := range rows { if max < row.length { max = row.length } } // Align every row up to that width. for _, row := range rows { for n := max - row.length; n > 0; n-- { *row.extra = append(*row.extra, ' ') } } // Reset the sequence of rows. rows = rows[:0] } var indentSuffix []byte for i := range obj.Members { name := &obj.Members[i].Name value := &obj.Members[i].Value // Whitespace right before name must have a newline and // everything after the name until the comma cannot have newlines. if !name.BeforeExtra.hasNewline() || name.hasNewline(false) || name.AfterExtra.hasNewline() || value.BeforeExtra.hasNewline() || value.hasNewline(false) || value.AfterExtra.hasNewline() { alignRows() continue } // If there are multiple newlines or the indentSuffix mismatches, // then this is the start of a new block or rows to align. if bytes.Count(name.BeforeExtra, newline) > 1 || !bytes.HasSuffix(name.BeforeExtra, indentSuffix) { alignRows() // flush the current block or rows } rows = append(rows, row{ extra: &value.BeforeExtra, length: len(name.Value.(Literal)) + len(name.AfterExtra) + len(":") + len(value.BeforeExtra), }) } alignRows() } // Recursively align all sub-objects. if comp, ok := v.Value.(composite); ok { for v2 := range comp.allValues() { v2.alignObjectValues() } } return true } func (v Value) hasNewline(checkTopLevelExtra bool) bool { if checkTopLevelExtra && (v.BeforeExtra.hasNewline() || v.AfterExtra.hasNewline()) { return true } if comp, ok := v.Value.(composite); ok { for v := range comp.allValues() { if v.hasNewline(true) { return true } } } return false } func (b Extra) hasNewline() bool { return bytes.IndexByte(b, '\n') >= 0 } func appendIndent(b []byte, n int) []byte { for i := 0; i < n; i++ { b = append(b, '\t') } return b }