168 lines
5.1 KiB
Diff
168 lines
5.1 KiB
Diff
From 383b2e75a7a4198c42f8f87833eefb772868a56f Mon Sep 17 00:00:00 2001
|
|
From: Russ Cox <rsc@golang.org>
|
|
Date: Mon, 9 Aug 2021 15:09:12 -0400
|
|
Subject: [PATCH] language: turn parsing panics into ErrSyntax
|
|
|
|
We keep finding new panics in the language parser.
|
|
Limit the damage by reporting those inputs as syntax errors.
|
|
|
|
Change-Id: I786fe127c3df7e4c8e042d15095d3acf3c4e4a50
|
|
Reviewed-on: https://go-review.googlesource.com/c/text/+/340830
|
|
Trust: Russ Cox <rsc@golang.org>
|
|
Run-TryBot: Russ Cox <rsc@golang.org>
|
|
TryBot-Result: Go Bot <gobot@golang.org>
|
|
Reviewed-by: Roland Shoemaker <roland@golang.org>
|
|
---
|
|
internal/language/language.go | 43 +++++++++++++++++++++++++++++++----
|
|
internal/language/parse.go | 7 ++++++
|
|
language/parse.go | 22 ++++++++++++++++++
|
|
3 files changed, 68 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/internal/language/language.go b/internal/language/language.go
|
|
index f41aedcfc..6105bc7fa 100644
|
|
--- a/internal/language/language.go
|
|
+++ b/internal/language/language.go
|
|
@@ -251,6 +251,13 @@ func (t Tag) Parent() Tag {
|
|
|
|
// ParseExtension parses s as an extension and returns it on success.
|
|
func ParseExtension(s string) (ext string, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ ext = ""
|
|
+ err = ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
scan := makeScannerString(s)
|
|
var end int
|
|
if n := len(scan.token); n != 1 {
|
|
@@ -461,7 +468,14 @@ func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
|
|
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
|
// It returns a ValueError if s is a well-formed but unknown language identifier
|
|
// or another error if another error occurred.
|
|
-func ParseBase(s string) (Language, error) {
|
|
+func ParseBase(s string) (l Language, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ l = 0
|
|
+ err = ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
if n := len(s); n < 2 || 3 < n {
|
|
return 0, ErrSyntax
|
|
}
|
|
@@ -472,7 +486,14 @@ func ParseBase(s string) (Language, error) {
|
|
// ParseScript parses a 4-letter ISO 15924 code.
|
|
// It returns a ValueError if s is a well-formed but unknown script identifier
|
|
// or another error if another error occurred.
|
|
-func ParseScript(s string) (Script, error) {
|
|
+func ParseScript(s string) (scr Script, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ scr = 0
|
|
+ err = ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
if len(s) != 4 {
|
|
return 0, ErrSyntax
|
|
}
|
|
@@ -489,7 +510,14 @@ func EncodeM49(r int) (Region, error) {
|
|
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
|
// It returns a ValueError if s is a well-formed but unknown region identifier
|
|
// or another error if another error occurred.
|
|
-func ParseRegion(s string) (Region, error) {
|
|
+func ParseRegion(s string) (r Region, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ r = 0
|
|
+ err = ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
if n := len(s); n < 2 || 3 < n {
|
|
return 0, ErrSyntax
|
|
}
|
|
@@ -578,7 +606,14 @@ type Variant struct {
|
|
|
|
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
|
// a valid variant.
|
|
-func ParseVariant(s string) (Variant, error) {
|
|
+func ParseVariant(s string) (v Variant, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ v = Variant{}
|
|
+ err = ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
s = strings.ToLower(s)
|
|
if id, ok := variantIndex[s]; ok {
|
|
return Variant{id, s}, nil
|
|
diff --git a/internal/language/parse.go b/internal/language/parse.go
|
|
index c696fd0bd..47ee0fed1 100644
|
|
--- a/internal/language/parse.go
|
|
+++ b/internal/language/parse.go
|
|
@@ -232,6 +232,13 @@ func Parse(s string) (t Tag, err error) {
|
|
if s == "" {
|
|
return Und, ErrSyntax
|
|
}
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ t = Und
|
|
+ err = ErrSyntax
|
|
+ return
|
|
+ }
|
|
+ }()
|
|
if len(s) <= maxAltTaglen {
|
|
b := [maxAltTaglen]byte{}
|
|
for i, c := range s {
|
|
diff --git a/language/parse.go b/language/parse.go
|
|
index 11acfd885..59b041008 100644
|
|
--- a/language/parse.go
|
|
+++ b/language/parse.go
|
|
@@ -43,6 +43,13 @@ func Parse(s string) (t Tag, err error) {
|
|
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
|
// The resulting tag is canonicalized using the canonicalization type c.
|
|
func (c CanonType) Parse(s string) (t Tag, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ t = Tag{}
|
|
+ err = language.ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
tt, err := language.Parse(s)
|
|
if err != nil {
|
|
return makeTag(tt), err
|
|
@@ -79,6 +86,13 @@ func Compose(part ...interface{}) (t Tag, err error) {
|
|
// tag is returned after canonicalizing using CanonType c. If one or more errors
|
|
// are encountered, one of the errors is returned.
|
|
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ t = Tag{}
|
|
+ err = language.ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
var b language.Builder
|
|
if err = update(&b, part...); err != nil {
|
|
return und, err
|
|
@@ -142,6 +156,14 @@ var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
|
// Tags with a weight of zero will be dropped. An error will be returned if the
|
|
// input could not be parsed.
|
|
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
|
+ defer func() {
|
|
+ if recover() != nil {
|
|
+ tag = nil
|
|
+ q = nil
|
|
+ err = language.ErrSyntax
|
|
+ }
|
|
+ }()
|
|
+
|
|
var entry string
|
|
for s != "" {
|
|
if entry, s = split(s, ','); entry == "" {
|