SHA256
1
0
forked from pool/seamonkey
seamonkey/1817900-13-112a1.patch

1023 lines
61 KiB
Diff
Raw Normal View History

# HG changeset patch
# User Mike Hommey <mh+mozilla@glandium.org>
# Date 1677114768 0
# Thu Feb 23 01:12:48 2023 +0000
# Node ID a9dbdd8183fe6ee0c9439feb8af9164fe2580024
# Parent b834f206aede6bd2190e0e80e3ab517665d74ae8
Bug 1817900 - Update encoding_rs to 0.8.32. r=emilio,supply-chain-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D170433
diff --git a/Cargo.lock b/Cargo.lock
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -527,19 +527,19 @@ version = "0.1.0"
dependencies = [
"encoding_rs",
"nserror",
"nsstring",
]
[[package]]
name = "encoding_rs"
-version = "0.8.31"
+version = "0.8.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
+checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
dependencies = [
"cfg-if 1.0.0",
"packed_simd_2",
]
[[package]]
name = "env_logger"
version = "0.4.3"
diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json
--- a/third_party/rust/encoding_rs/.cargo-checksum.json
+++ b/third_party/rust/encoding_rs/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{"CONTRIBUTING.md":"ca1901f3e8532fb4cec894fd3664f0eaa898c0c4b961d1b992d1ed54eacf362a","COPYRIGHT":"11789f45bb180841cd362a5eee6789c68ddb573a11105e30768c308a6add0190","Cargo.toml":"abf2c7d17500cfa1148b76b9a8a8574873a6f6de90d6110d0d8f6b519c8c99f6","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"3fa4ca83dcc9237839b1bdeb2e6d16bdfb5ec0c5ce42b24694d8bbf0dcbef72c","LICENSE-WHATWG":"838118388fe5c2e7f1dbbaeed13e1c7f3ebf88be91319c7c1d77c18e987d1a50","README.md":"bcb4b59cfc5f48fbaba954b8ae4daa9eaecf9044afc89208a78a7e995c321b81","build.rs":"9276ee24ef71433d46323c15296b3fbbb29c0b37c4b1ca45416587f14ba8e777","ci/miri.sh":"43cb8d82f49e3bfe2d2274b6ccd6f0714a4188ccef0cecc040829883cfdbee25","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b
\ No newline at end of file
+{"files":{"CONTRIBUTING.md":"ca1901f3e8532fb4cec894fd3664f0eaa898c0c4b961d1b992d1ed54eacf362a","COPYRIGHT":"11789f45bb180841cd362a5eee6789c68ddb573a11105e30768c308a6add0190","Cargo.toml":"ea1bdb0b73a66e4a6b25d8fdda6b64cadea8e99ac89f9739eeada6801d5e9010","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"3fa4ca83dcc9237839b1bdeb2e6d16bdfb5ec0c5ce42b24694d8bbf0dcbef72c","LICENSE-WHATWG":"838118388fe5c2e7f1dbbaeed13e1c7f3ebf88be91319c7c1d77c18e987d1a50","README.md":"8781ee38bba8ab4e752b2d63d7674d8ce4a557af896221434dd057a1198a9ed4","ci/miri.sh":"43cb8d82f49e3bfe2d2274b6ccd6f0714a4188ccef0cecc040829883cfdbee25","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58b
\ No newline at end of file
diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml
--- a/third_party/rust/encoding_rs/Cargo.toml
+++ b/third_party/rust/encoding_rs/Cargo.toml
@@ -7,17 +7,17 @@
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
name = "encoding_rs"
-version = "0.8.31"
+version = "0.8.32"
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
description = "A Gecko-oriented implementation of the Encoding Standard"
homepage = "https://docs.rs/encoding_rs/"
documentation = "https://docs.rs/encoding_rs/"
readme = "README.md"
keywords = [
"encoding",
"web",
diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md
--- a/third_party/rust/encoding_rs/README.md
+++ b/third_party/rust/encoding_rs/README.md
@@ -440,21 +440,27 @@ To regenerate the generated code:
- [x] Replace uconv with encoding_rs in Gecko.
- [x] Implement the rust-encoding API in terms of encoding_rs.
- [x] Add SIMD acceleration for Aarch64.
- [x] Investigate the use of NEON on 32-bit ARM.
- [ ] ~Investigate Björn Höhrmann's lookup table acceleration for UTF-8 as
adapted to Rust in rust-encoding.~
- [x] Add actually fast CJK encode options.
- [ ] ~Investigate [Bob Steagall's lookup table acceleration for UTF-8](https://github.com/BobSteagall/CppNow2018/blob/master/FastConversionFromUTF-8/Fast%20Conversion%20From%20UTF-8%20with%20C%2B%2B%2C%20DFAs%2C%20and%20SSE%20Intrinsics%20-%20Bob%20Steagall%20-%20C%2B%2BNow%202018.pdf).~
-- [ ] Provide a build mode that works without `alloc` (with lesser API surface).
+- [x] Provide a build mode that works without `alloc` (with lesser API surface).
- [ ] Migrate to `std::simd` once it is stable and declare 1.0.
## Release Notes
+### 0.8.32
+
+* Removed `build.rs`. (This removal should resolve false positives reported by some antivirus products. This may break some build configurations that have opted out of Rust's guarantees against future build breakage.)
+* Internal change to what API is used for reinterpreting the lane configuration of SIMD vectors.
+* Documentation improvements.
+
### 0.8.31
* Use SPDX with parentheses now that crates.io supports parentheses.
### 0.8.30
* Update the licensing information to take into account the WHATWG data license change.
diff --git a/third_party/rust/encoding_rs/build.rs b/third_party/rust/encoding_rs/build.rs
deleted file mode 100644
--- a/third_party/rust/encoding_rs/build.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-fn main() {
- // This does not enable `RUSTC_BOOTSTRAP=1` for `packed_simd`.
- // You still need to knowingly have a setup that makes
- // `packed_simd` compile. Therefore, having this file on
- // crates.io is harmless in terms of users of `encoding_rs`
- // accidentally depending on nightly features. Having this
- // here means that if you knowingly want this, you only
- // need to maintain a fork of `packed_simd` without _also_
- // having to maintain a fork of `encoding_rs`.
- #[cfg(feature = "simd-accel")]
- println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
-}
diff --git a/third_party/rust/encoding_rs/src/ascii.rs b/third_party/rust/encoding_rs/src/ascii.rs
--- a/third_party/rust/encoding_rs/src/ascii.rs
+++ b/third_party/rust/encoding_rs/src/ascii.rs
@@ -35,24 +35,22 @@ cfg_if! {
if #[cfg(feature = "simd-accel")] {
#[allow(unused_imports)]
use ::core::intrinsics::unlikely;
#[allow(unused_imports)]
use ::core::intrinsics::likely;
} else {
#[allow(dead_code)]
#[inline(always)]
- // Unsafe to match the intrinsic, which is needlessly unsafe.
- unsafe fn unlikely(b: bool) -> bool {
+ fn unlikely(b: bool) -> bool {
b
}
#[allow(dead_code)]
#[inline(always)]
- // Unsafe to match the intrinsic, which is needlessly unsafe.
- unsafe fn likely(b: bool) -> bool {
+ fn likely(b: bool) -> bool {
b
}
}
}
// `as` truncates, so works on 32-bit, too.
#[allow(dead_code)]
pub const ASCII_MASK: usize = 0x8080_8080_8080_8080u64 as usize;
diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs
--- a/third_party/rust/encoding_rs/src/lib.rs
+++ b/third_party/rust/encoding_rs/src/lib.rs
@@ -164,18 +164,20 @@
//! assert_eq!(&output[..], expectation);
//! assert!(!total_had_errors);
//! ```
//!
//! ## UTF-16LE, UTF-16BE and Unicode Encoding Schemes
//!
//! The Encoding Standard doesn't specify encoders for UTF-16LE and UTF-16BE,
//! __so this crate does not provide encoders for those encodings__!
-//! Along with the replacement encoding, their _output encoding_ is UTF-8,
-//! so you get an UTF-8 encoder if you request an encoder for them.
+//! Along with the replacement encoding, their _output encoding_ (i.e. the
+//! encoding used for form submission and error handling in the query string
+//! of URLs) is UTF-8, so you get an UTF-8 encoder if you request an encoder
+//! for them.
//!
//! Additionally, the Encoding Standard factors BOM handling into wrapper
//! algorithms so that BOM handling isn't part of the definition of the
//! encodings themselves. The Unicode _encoding schemes_ in the Unicode
//! Standard define BOM handling or lack thereof as part of the encoding
//! scheme.
//!
//! When used with the `_without_bom_handling` entry points, the UTF-16LE
@@ -193,16 +195,20 @@
//! not part of the behavior of the UTF-16 _encoding scheme_ per the
//! Unicode Standard.
//!
//! The UTF-32 family of Unicode encoding schemes is not supported
//! by this crate. The Encoding Standard doesn't define any UTF-32
//! family encodings, since they aren't necessary for consuming Web
//! content.
//!
+//! While gb18030 is capable of representing U+FEFF, the Encoding
+//! Standard does not treat the gb18030 byte representation of U+FEFF
+//! as a BOM, so neither does this crate.
+//!
//! ## ISO-8859-1
//!
//! ISO-8859-1 does not exist as a distinct encoding from windows-1252 in
//! the Encoding Standard. Therefore, an encoding that maps the unsigned
//! byte value to the same Unicode scalar value is not available via
//! `Encoding` in this crate.
//!
//! However, the functions whose name starts with `convert` and contains
@@ -252,17 +258,18 @@
//! For single-byte DOS encodings beyond the ones supported by the Encoding
//! Standard, there is the [`oem_cp`](https://crates.io/crates/oem_cp) crate.
//!
//! # Preparing Text for the Encoders
//!
//! Normalizing text into Unicode Normalization Form C prior to encoding text
//! into a legacy encoding minimizes unmappable characters. Text can be
//! normalized to Unicode Normalization Form C using the
-//! [`unic-normal`](https://crates.io/crates/unic-normal) crate.
+//! [`icu_normalizer`](https://crates.io/crates/icu_normalizer) crate, which
+//! is part of [ICU4X](https://icu4x.unicode.org/).
//!
//! The exception is windows-1258, which after normalizing to Unicode
//! Normalization Form C requires tone marks to be decomposed in order to
//! minimize unmappable characters. Vietnamese tone marks can be decomposed
//! using the [`detone`](https://crates.io/crates/detone) crate.
//!
//! # Streaming & Non-Streaming; Rust & C/C++
//!
@@ -277,32 +284,32 @@
//!
//! There is no analogous C API exposed via FFI, mainly because C doesn't have
//! standard types for growable byte buffers and Unicode strings that know
//! their length.
//!
//! The C API (header file generated at `target/include/encoding_rs.h` when
//! building encoding_rs) can, in turn, be wrapped for use from C++. Such a
//! C++ wrapper can re-create the non-streaming API in C++ for C++ callers.
-//! The C binding comes with a [C++14 wrapper][2] that uses standard library +
+//! The C binding comes with a [C++17 wrapper][2] that uses standard library +
//! [GSL][3] types and that recreates the non-streaming API in C++ on top of
-//! the streaming API. A C++ wrapper with XPCOM/MFBT types is being developed
-//! as part of Mozilla [bug 1261841][4].
+//! the streaming API. A C++ wrapper with XPCOM/MFBT types is available as
+//! [`mozilla::Encoding`][4].
//!
//! The `Encoding` type is common to both the streaming and non-streaming
//! modes. In the streaming mode, decoding operations are performed with a
//! `Decoder` and encoding operations with an `Encoder` object obtained via
//! `Encoding`. In the non-streaming mode, decoding and encoding operations are
//! performed using methods on `Encoding` objects themselves, so the `Decoder`
//! and `Encoder` objects are not used at all.
//!
//! [1]: https://github.com/hsivonen/encoding_c
//! [2]: https://github.com/hsivonen/encoding_c/blob/master/include/encoding_rs_cpp.h
//! [3]: https://github.com/Microsoft/GSL/
-//! [4]: https://bugzilla.mozilla.org/show_bug.cgi?id=encoding_rs
+//! [4]: https://searchfox.org/mozilla-central/source/intl/Encoding.h
//!
//! # Memory management
//!
//! The non-streaming mode never performs heap allocations (even the methods
//! that write into a `Vec<u8>` or a `String` by taking them as arguments do
//! not reallocate the backing buffer of the `Vec<u8>` or the `String`). That
//! is, the non-streaming mode uses caller-allocated buffers exclusively.
//!
@@ -677,17 +684,17 @@
//! <tr><td>TIS-620</td><td>windows-874</td></tr>
//! </tbody>
//! </table>
//!
//! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes)
//! for discussion about the UTF-16 family.
#![no_std]
-#![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))]
+#![cfg_attr(feature = "simd-accel", feature(core_intrinsics))]
#[cfg(feature = "alloc")]
#[cfg_attr(test, macro_use)]
extern crate alloc;
extern crate core;
#[macro_use]
extern crate cfg_if;
@@ -2917,33 +2924,38 @@ impl Encoding {
/// U+0000...U+007F and vice versa.
#[cfg(feature = "alloc")]
#[inline]
fn is_potentially_borrowable(&'static self) -> bool {
!(self == REPLACEMENT || self == UTF_16BE || self == UTF_16LE)
}
/// Returns the _output encoding_ of this encoding. This is UTF-8 for
- /// UTF-16BE, UTF-16LE and replacement and the encoding itself otherwise.
+ /// UTF-16BE, UTF-16LE, and replacement and the encoding itself otherwise.
+ ///
+ /// _Note:_ The _output encoding_ concept is needed for form submission and
+ /// error handling in the query strings of URLs in the Web Platform.
///
/// Available via the C wrapper.
#[inline]
pub fn output_encoding(&'static self) -> &'static Encoding {
if self == REPLACEMENT || self == UTF_16BE || self == UTF_16LE {
UTF_8
} else {
self
}
}
/// Decode complete input to `Cow<'a, str>` _with BOM sniffing_ and with
/// malformed sequences replaced with the REPLACEMENT CHARACTER when the
/// entire input is available as a single buffer (i.e. the end of the
/// buffer marks the end of the stream).
///
+ /// The BOM, if any, does not appear in the output.
+ ///
/// This method implements the (non-streaming version of) the
/// [_decode_](https://encoding.spec.whatwg.org/#decode) spec concept.
///
/// The second item in the returned tuple is the encoding that was actually
/// used (which may differ from this encoding thanks to BOM sniffing).
///
/// The third item in the returned tuple indicates whether there were
/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
@@ -2980,16 +2992,18 @@ impl Encoding {
(cow, encoding, had_errors)
}
/// Decode complete input to `Cow<'a, str>` _with BOM removal_ and with
/// malformed sequences replaced with the REPLACEMENT CHARACTER when the
/// entire input is available as a single buffer (i.e. the end of the
/// buffer marks the end of the stream).
///
+ /// Only an initial byte sequence that is a BOM for this encoding is removed.
+ ///
/// When invoked on `UTF_8`, this method implements the (non-streaming
/// version of) the
/// [_UTF-8 decode_](https://encoding.spec.whatwg.org/#utf-8-decode) spec
/// concept.
///
/// The second item in the returned pair indicates whether there were
/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
///
@@ -3212,31 +3226,32 @@ impl Encoding {
debug_assert_eq!(read, input.len());
Some(Cow::Owned(string))
}
DecoderResult::Malformed(_, _) => None,
DecoderResult::OutputFull => unreachable!(),
}
}
- /// Encode complete input to `Cow<'a, [u8]>` with unmappable characters
- /// replaced with decimal numeric character references when the entire input
- /// is available as a single buffer (i.e. the end of the buffer marks the
- /// end of the stream).
+ /// Encode complete input to `Cow<'a, [u8]>` using the
+ /// [_output encoding_](Encoding::output_encoding) of this encoding with
+ /// unmappable characters replaced with decimal numeric character references
+ /// when the entire input is available as a single buffer (i.e. the end of
+ /// the buffer marks the end of the stream).
///
/// This method implements the (non-streaming version of) the
/// [_encode_](https://encoding.spec.whatwg.org/#encode) spec concept. For
/// the [_UTF-8 encode_](https://encoding.spec.whatwg.org/#utf-8-encode)
/// spec concept, it is slightly more efficient to use
/// <code><var>string</var>.as_bytes()</code> instead of invoking this
/// method on `UTF_8`.
///
/// The second item in the returned tuple is the encoding that was actually
- /// used (which may differ from this encoding thanks to some encodings
- /// having UTF-8 as their output encoding).
+ /// used (*which may differ from this encoding thanks to some encodings
+ /// having UTF-8 as their output encoding*).
///
/// The third item in the returned tuple indicates whether there were
/// unmappable characters (that were replaced with HTML numeric character
/// references).
///
/// _Note:_ It is wrong to use this when the input buffer represents only
/// a segment of the input instead of the whole input. Use `new_encoder()`
/// when encoding segmented output.
@@ -3315,17 +3330,18 @@ impl Encoding {
fn new_variant_decoder(&'static self) -> VariantDecoder {
self.variant.new_variant_decoder()
}
/// Instantiates a new decoder for this encoding with BOM sniffing enabled.
///
/// BOM sniffing may cause the returned decoder to morph into a decoder
- /// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding.
+ /// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding. The BOM
+ /// does not appear in the output.
///
/// Available via the C wrapper.
#[inline]
pub fn new_decoder(&'static self) -> Decoder {
Decoder::new(self, self.new_variant_decoder(), BomHandling::Sniff)
}
/// Instantiates a new decoder for this encoding with BOM removal.
@@ -3353,17 +3369,21 @@ impl Encoding {
/// instead of this method to cause the BOM to be removed.
///
/// Available via the C wrapper.
#[inline]
pub fn new_decoder_without_bom_handling(&'static self) -> Decoder {
Decoder::new(self, self.new_variant_decoder(), BomHandling::Off)
}
- /// Instantiates a new encoder for the output encoding of this encoding.
+ /// Instantiates a new encoder for the [_output encoding_](Encoding::output_encoding)
+ /// of this encoding.
+ ///
+ /// _Note:_ The output encoding of UTF-16BE, UTF-16LE, and replacement is UTF-8. There
+ /// is no encoder for UTF-16BE, UTF-16LE, and replacement themselves.
///
/// Available via the C wrapper.
#[inline]
pub fn new_encoder(&'static self) -> Encoder {
let enc = self.output_encoding();
enc.variant.new_encoder(enc)
}
diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs
--- a/third_party/rust/encoding_rs/src/mem.rs
+++ b/third_party/rust/encoding_rs/src/mem.rs
@@ -45,23 +45,21 @@ macro_rules! non_fuzz_debug_assert {
}
cfg_if! {
if #[cfg(feature = "simd-accel")] {
use ::core::intrinsics::likely;
use ::core::intrinsics::unlikely;
} else {
#[inline(always)]
- // Unsafe to match the intrinsic, which is needlessly unsafe.
- unsafe fn likely(b: bool) -> bool {
+ fn likely(b: bool) -> bool {
b
}
#[inline(always)]
- // Unsafe to match the intrinsic, which is needlessly unsafe.
- unsafe fn unlikely(b: bool) -> bool {
+ fn unlikely(b: bool) -> bool {
b
}
}
}
/// Classification of text as Latin1 (all code points are below U+0100),
/// left-to-right with some non-Latin1 characters or as containing at least
/// some right-to-left characters.
@@ -910,17 +908,17 @@ pub fn is_utf8_bidi(buffer: &[u8]) -> bo
*(UTF8_DATA.table.get_unchecked(byte as usize + 0x80))
},
) | u16::from(third >> 6)
| (u16::from(fourth & 0xC0) << 2))
!= 0x202
{
return true;
}
- if unsafe { unlikely(second == 0x90 || second == 0x9E) } {
+ if unlikely(second == 0x90 || second == 0x9E) {
let third = src[read + 2];
if third >= 0xA0 {
return true;
}
}
read += 4;
}
_ => {
@@ -1168,17 +1166,17 @@ pub fn is_str_bidi(buffer: &str) -> bool
if let Some((mut byte, mut read)) = validate_ascii(bytes) {
'inner: loop {
// At this point, `byte` is not included in `read`.
if byte < 0xE0 {
if byte >= 0x80 {
// Two-byte
// Adding `unlikely` here improved throughput on
// Russian plain text by 33%!
- if unsafe { unlikely(byte >= 0xD6) } {
+ if unlikely(byte >= 0xD6) {
if byte == 0xD6 {
let second = bytes[read + 1];
if second > 0x8F {
return true;
}
} else {
return true;
}
@@ -1192,17 +1190,17 @@ pub fn is_str_bidi(buffer: &str) -> bool
// ASCII space, comma and period in non-Latin context.
// However, the extra branch seems to cost more than it's
// worth.
bytes = &bytes[read..];
continue 'outer;
}
} else if byte < 0xF0 {
// Three-byte
- if unsafe { unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) } {
+ if unlikely(!in_inclusive_range8(byte, 0xE3, 0xEE) && byte != 0xE1) {
let second = bytes[read + 1];
if byte == 0xE0 {
if second < 0xA4 {
return true;
}
} else if byte == 0xE2 {
let third = bytes[read + 2];
if second == 0x80 {
@@ -1241,17 +1239,17 @@ pub fn is_str_bidi(buffer: &str) -> bool
}
}
}
}
read += 3;
} else {
// Four-byte
let second = bytes[read + 1];
- if unsafe { unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) } {
+ if unlikely(byte == 0xF0 && (second == 0x90 || second == 0x9E)) {
let third = bytes[read + 2];
if third >= 0xA0 {
return true;
}
}
read += 4;
}
// The comparison is always < or == and never >, but including
@@ -1655,17 +1653,17 @@ pub fn convert_utf16_to_utf8_partial(src
// The two functions called below are marked `inline(never)` to make
// transitions from the hot part (first function) into the cold part
// (second function) go through a return and another call to discouge
// the CPU from speculating from the hot code into the cold code.
// Letting the transitions be mere intra-function jumps, even to
// basic blocks out-of-lined to the end of the function would wipe
// away a quarter of Arabic encode performance on Haswell!
let (read, written) = convert_utf16_to_utf8_partial_inner(src, dst);
- if unsafe { likely(read == src.len()) } {
+ if likely(read == src.len()) {
return (read, written);
}
let (tail_read, tail_written) =
convert_utf16_to_utf8_partial_tail(&src[read..], &mut dst[written..]);
(read + tail_read, written + tail_written)
}
/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs
--- a/third_party/rust/encoding_rs/src/simd_funcs.rs
+++ b/third_party/rust/encoding_rs/src/simd_funcs.rs
@@ -4,17 +4,17 @@
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use packed_simd::u16x8;
use packed_simd::u8x16;
-use packed_simd::FromBits;
+use packed_simd::IntoBits;
// TODO: Migrate unaligned access to stdlib code if/when the RFC
// https://github.com/rust-lang/rfcs/pull/1725 is implemented.
#[inline(always)]
pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 {
let mut simd = ::core::mem::uninitialized();
::core::ptr::copy_nonoverlapping(ptr, &mut simd as *mut u8x16 as *mut u8, 16);
@@ -67,18 +67,16 @@ cfg_if! {
use core::arch::x86_64::__m128i;
use core::arch::x86_64::_mm_movemask_epi8;
use core::arch::x86_64::_mm_packus_epi16;
} else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] {
use core::arch::x86::__m128i;
use core::arch::x86::_mm_movemask_epi8;
use core::arch::x86::_mm_packus_epi16;
} else if #[cfg(target_arch = "aarch64")]{
- use core::arch::aarch64::uint8x16_t;
- use core::arch::aarch64::uint16x8_t;
use core::arch::aarch64::vmaxvq_u8;
use core::arch::aarch64::vmaxvq_u16;
} else {
}
}
// #[inline(always)]
@@ -97,49 +95,49 @@ cfg_if! {
pub fn simd_byte_swap(s: u16x8) -> u16x8 {
let left = s << 8;
let right = s >> 8;
left | right
}
#[inline(always)]
pub fn to_u16_lanes(s: u8x16) -> u16x8 {
- u16x8::from_bits(s)
+ s.into_bits()
}
cfg_if! {
if #[cfg(target_feature = "sse2")] {
// Expose low-level mask instead of higher-level conclusion,
// because the non-ASCII case would perform less well otherwise.
#[inline(always)]
pub fn mask_ascii(s: u8x16) -> i32 {
unsafe {
- _mm_movemask_epi8(__m128i::from_bits(s))
+ _mm_movemask_epi8(s.into_bits())
}
}
} else {
}
}
cfg_if! {
if #[cfg(target_feature = "sse2")] {
#[inline(always)]
pub fn simd_is_ascii(s: u8x16) -> bool {
unsafe {
- _mm_movemask_epi8(__m128i::from_bits(s)) == 0
+ _mm_movemask_epi8(s.into_bits()) == 0
}
}
} else if #[cfg(target_arch = "aarch64")]{
#[inline(always)]
pub fn simd_is_ascii(s: u8x16) -> bool {
unsafe {
- vmaxvq_u8(uint8x16_t::from_bits(s)) < 0x80
+ vmaxvq_u8(s.into_bits()) < 0x80
}
}
} else {
#[inline(always)]
pub fn simd_is_ascii(s: u8x16) -> bool {
// This optimizes better on ARM than
// the lt formulation.
let highest_ascii = u8x16::splat(0x7F);
@@ -157,41 +155,41 @@ cfg_if! {
}
let above_str_latin1 = u8x16::splat(0xC4);
s.lt(above_str_latin1).all()
}
} else if #[cfg(target_arch = "aarch64")]{
#[inline(always)]
pub fn simd_is_str_latin1(s: u8x16) -> bool {
unsafe {
- vmaxvq_u8(uint8x16_t::from_bits(s)) < 0xC4
+ vmaxvq_u8(s.into_bits()) < 0xC4
}
}
} else {
#[inline(always)]
pub fn simd_is_str_latin1(s: u8x16) -> bool {
let above_str_latin1 = u8x16::splat(0xC4);
s.lt(above_str_latin1).all()
}
}
}
cfg_if! {
if #[cfg(target_arch = "aarch64")]{
#[inline(always)]
pub fn simd_is_basic_latin(s: u16x8) -> bool {
unsafe {
- vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x80
+ vmaxvq_u16(s.into_bits()) < 0x80
}
}
#[inline(always)]
pub fn simd_is_latin1(s: u16x8) -> bool {
unsafe {
- vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x100
+ vmaxvq_u16(s.into_bits()) < 0x100
}
}
} else {
#[inline(always)]
pub fn simd_is_basic_latin(s: u16x8) -> bool {
let above_ascii = u16x8::splat(0x80);
s.lt(above_ascii).all()
}
@@ -214,17 +212,17 @@ pub fn contains_surrogates(s: u16x8) ->
(s & mask).eq(surrogate_bits).any()
}
cfg_if! {
if #[cfg(target_arch = "aarch64")]{
macro_rules! aarch64_return_false_if_below_hebrew {
($s:ident) => ({
unsafe {
- if vmaxvq_u16(uint16x8_t::from_bits($s)) < 0x0590 {
+ if vmaxvq_u16($s.into_bits()) < 0x0590 {
return false;
}
}
})
}
macro_rules! non_aarch64_return_false_if_all {
($s:ident) => ()
@@ -291,34 +289,34 @@ pub fn simd_unpack(s: u8x16) -> (u16x8,
u8x16::splat(0),
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
);
let second: u8x16 = shuffle!(
s,
u8x16::splat(0),
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
);
- (u16x8::from_bits(first), u16x8::from_bits(second))
+ (first.into_bits(), second.into_bits())
}
}
cfg_if! {
if #[cfg(target_feature = "sse2")] {
#[inline(always)]
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
unsafe {
- u8x16::from_bits(_mm_packus_epi16(__m128i::from_bits(a), __m128i::from_bits(b)))
+ _mm_packus_epi16(a.into_bits(), b.into_bits()).into_bits()
}
}
} else {
#[inline(always)]
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
unsafe {
- let first = u8x16::from_bits(a);
- let second = u8x16::from_bits(b);
+ let first: u8x16 = a.into_bits();
+ let second: u8x16 = b.into_bits();
shuffle!(
first,
second,
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
)
}
}
}
diff --git a/third_party/rust/encoding_rs/src/utf_8.rs b/third_party/rust/encoding_rs/src/utf_8.rs
--- a/third_party/rust/encoding_rs/src/utf_8.rs
+++ b/third_party/rust/encoding_rs/src/utf_8.rs
@@ -16,23 +16,21 @@ use crate::mem::convert_utf16_to_utf8_pa
use crate::variant::*;
cfg_if! {
if #[cfg(feature = "simd-accel")] {
use ::core::intrinsics::unlikely;
use ::core::intrinsics::likely;
} else {
#[inline(always)]
- // Unsafe to match the intrinsic, which is needlessly unsafe.
- unsafe fn unlikely(b: bool) -> bool {
+ fn unlikely(b: bool) -> bool {
b
}
#[inline(always)]
- // Unsafe to match the intrinsic, which is needlessly unsafe.
- unsafe fn likely(b: bool) -> bool {
+ fn likely(b: bool) -> bool {
b
}
}
}
#[repr(align(64))] // Align to cache lines
pub struct Utf8Data {
pub table: [u8; 384],
@@ -83,63 +81,63 @@ pub fn utf8_valid_up_to(src: &[u8]) -> u
}
};
// Check for the longest sequence to avoid checking twice for the
// multi-byte sequences. This can't overflow with 64-bit address space,
// because full 64 bits aren't in use. In the 32-bit PAE case, for this
// to overflow would mean that the source slice would be so large that
// the address space of the process would not have space for any code.
// Therefore, the slice cannot be so long that this would overflow.
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
'inner: loop {
// At this point, `byte` is not included in `read`, because we
// don't yet know that a) the UTF-8 sequence is valid and b) that there
// is output space if it is an astral sequence.
// Inspecting the lead byte directly is faster than what the
// std lib does!
- if unsafe { likely(in_inclusive_range8(byte, 0xC2, 0xDF)) } {
+ if likely(in_inclusive_range8(byte, 0xC2, 0xDF)) {
// Two-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
if !in_inclusive_range8(second, 0x80, 0xBF) {
break 'outer;
}
read += 2;
// Next lead (manually inlined)
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
read += 1;
continue 'outer;
}
continue 'inner;
}
break 'inner;
}
- if unsafe { likely(byte < 0xF0) } {
+ if likely(byte < 0xF0) {
'three: loop {
// Three-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
let third = unsafe { *(src.get_unchecked(read + 2)) };
if ((UTF8_DATA.table[usize::from(second)]
& unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
| (third >> 6))
!= 2
{
break 'outer;
}
read += 3;
// Next lead (manually inlined)
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if in_inclusive_range8(byte, 0xE0, 0xEF) {
continue 'three;
}
- if unsafe { likely(byte < 0x80) } {
+ if likely(byte < 0x80) {
read += 1;
continue 'outer;
}
continue 'inner;
}
break 'inner;
}
}
@@ -154,17 +152,17 @@ pub fn utf8_valid_up_to(src: &[u8]) -> u
| (u16::from(fourth & 0xC0) << 2))
!= 0x202
{
break 'outer;
}
read += 4;
// Next lead
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
read += 1;
continue 'outer;
}
continue 'inner;
}
break 'inner;
@@ -253,56 +251,56 @@ pub fn convert_utf8_to_utf16_up_to_inval
}
};
// Check for the longest sequence to avoid checking twice for the
// multi-byte sequences. This can't overflow with 64-bit address space,
// because full 64 bits aren't in use. In the 32-bit PAE case, for this
// to overflow would mean that the source slice would be so large that
// the address space of the process would not have space for any code.
// Therefore, the slice cannot be so long that this would overflow.
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
'inner: loop {
// At this point, `byte` is not included in `read`, because we
// don't yet know that a) the UTF-8 sequence is valid and b) that there
// is output space if it is an astral sequence.
// We know, thanks to `ascii_to_basic_latin` that there is output
// space for at least one UTF-16 code unit, so no need to check
// for output space in the BMP cases.
// Inspecting the lead byte directly is faster than what the
// std lib does!
- if unsafe { likely(in_inclusive_range8(byte, 0xC2, 0xDF)) } {
+ if likely(in_inclusive_range8(byte, 0xC2, 0xDF)) {
// Two-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
if !in_inclusive_range8(second, 0x80, 0xBF) {
break 'outer;
}
unsafe {
*(dst.get_unchecked_mut(written)) =
((u16::from(byte) & 0x1F) << 6) | (u16::from(second) & 0x3F)
};
read += 2;
written += 1;
// Next lead (manually inlined)
if written == dst.len() {
break 'outer;
}
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
read += 1;
written += 1;
continue 'outer;
}
continue 'inner;
}
break 'inner;
}
- if unsafe { likely(byte < 0xF0) } {
+ if likely(byte < 0xF0) {
'three: loop {
// Three-byte
let second = unsafe { *(src.get_unchecked(read + 1)) };
let third = unsafe { *(src.get_unchecked(read + 2)) };
if ((UTF8_DATA.table[usize::from(second)]
& unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
| (third >> 6))
!= 2
@@ -315,22 +313,22 @@ pub fn convert_utf8_to_utf16_up_to_inval
unsafe { *(dst.get_unchecked_mut(written)) = point };
read += 3;
written += 1;
// Next lead (manually inlined)
if written == dst.len() {
break 'outer;
}
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if in_inclusive_range8(byte, 0xE0, 0xEF) {
continue 'three;
}
- if unsafe { likely(byte < 0x80) } {
+ if likely(byte < 0x80) {
unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
read += 1;
written += 1;
continue 'outer;
}
continue 'inner;
}
break 'inner;
@@ -362,17 +360,17 @@ pub fn convert_utf8_to_utf16_up_to_inval
};
read += 4;
written += 2;
// Next lead
if written == dst.len() {
break 'outer;
}
- if unsafe { likely(read + 4 <= src.len()) } {
+ if likely(read + 4 <= src.len()) {
byte = unsafe { *(src.get_unchecked(read)) };
if byte < 0x80 {
unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
read += 1;
written += 1;
continue 'outer;
}
continue 'inner;
@@ -649,28 +647,28 @@ pub fn convert_utf16_to_utf8_partial_inn
*(dst.get_unchecked_mut(written)) = (unit >> 6) as u8 | 0xC0u8;
written += 1;
*(dst.get_unchecked_mut(written)) = (unit & 0x3F) as u8 | 0x80u8;
written += 1;
}
break;
}
let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
- if unsafe { likely(unit_minus_surrogate_start > (0xDFFF - 0xD800)) } {
+ if likely(unit_minus_surrogate_start > (0xDFFF - 0xD800)) {
unsafe {
*(dst.get_unchecked_mut(written)) = (unit >> 12) as u8 | 0xE0u8;
written += 1;
*(dst.get_unchecked_mut(written)) = ((unit & 0xFC0) >> 6) as u8 | 0x80u8;
written += 1;
*(dst.get_unchecked_mut(written)) = (unit & 0x3F) as u8 | 0x80u8;
written += 1;
}
break;
}
- if unsafe { likely(unit_minus_surrogate_start <= (0xDBFF - 0xD800)) } {
+ if likely(unit_minus_surrogate_start <= (0xDBFF - 0xD800)) {
// high surrogate
// read > src.len() is impossible, but using
// >= instead of == allows the compiler to elide a bound check.
if read >= src.len() {
debug_assert_eq!(read, src.len());
// Unpaired surrogate at the end of the buffer.
unsafe {
*(dst.get_unchecked_mut(written)) = 0xEFu8;
@@ -679,17 +677,17 @@ pub fn convert_utf16_to_utf8_partial_inn
written += 1;
*(dst.get_unchecked_mut(written)) = 0xBDu8;
written += 1;
}
return (read, written);
}
let second = src[read];
let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
- if unsafe { likely(second_minus_low_surrogate_start <= (0xDFFF - 0xDC00)) } {
+ if likely(second_minus_low_surrogate_start <= (0xDFFF - 0xDC00)) {
// The next code unit is a low surrogate. Advance position.
read += 1;
let astral = (u32::from(unit) << 10) + u32::from(second)
- (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
unsafe {
*(dst.get_unchecked_mut(written)) = (astral >> 18) as u8 | 0xF0u8;
written += 1;
*(dst.get_unchecked_mut(written)) =
@@ -721,17 +719,17 @@ pub fn convert_utf16_to_utf8_partial_inn
// Now see if the next unit is Basic Latin
// read > src.len() is impossible, but using
// >= instead of == allows the compiler to elide a bound check.
if read >= src.len() {
debug_assert_eq!(read, src.len());
return (read, written);
}
unit = src[read];
- if unsafe { unlikely(unit < 0x80) } {
+ if unlikely(unit < 0x80) {
// written > dst.len() is impossible, but using
// >= instead of == allows the compiler to elide a bound check.
if written >= dst.len() {
debug_assert_eq!(written, dst.len());
return (read, written);
}
dst[written] = unit as u8;
read += 1;