+ Patch: 0001-Display-real-line-numbers-on-pull-request-s-diff-vie.patch + Patch: 0002-Show-the-assignee-s-avatar-on-the-board.patch + Patch: 0003-Allow-setting-a-status-as-closing-even-if-the-projec.patch + Patch: 0004-Include-the-assignee-in-the-list-of-people-notified-.patch + Patch: 0005-Introduce-the-collaborator_project_groups-mapping.patch + Patch: 0006-When-a-file-a-detected-as-a-binary-file-return-the-r.patch + Patch: 0007-Remove-fenced-code-block-when-checking-mention.patch + Patch: 0008-Add-support-for-using-cchardet-to-detect-files-encod.patch + Patch: 0009-Add-support-for-disabling-user-registration.patch - Remove mandatory dependency on systemd to ease containerization OBS-URL: https://build.opensuse.org/package/show/devel:tools:scm/pagure?expand=0&rev=46
285 lines
10 KiB
Diff
285 lines
10 KiB
Diff
From a8b2e001d6f4d2e4683aa65d6331b971f8e2ce33 Mon Sep 17 00:00:00 2001
|
|
From: Pierre-Yves Chibon <pingou@pingoured.fr>
|
|
Date: Mon, 14 Sep 2020 16:03:31 +0200
|
|
Subject: [PATCH 8/9] Add support for using cchardet to detect files' encoding
|
|
|
|
cchardet is a much faster version of the chardet library that can
|
|
be used to automatically detect the encoding of a file.
|
|
|
|
Since this library is only available on python3, we're making it
|
|
an optional dependency for now.
|
|
|
|
Fixes https://pagure.io/pagure/issue/4977
|
|
|
|
Signed-off-by: Pierre-Yves Chibon <pingou@pingoured.fr>
|
|
---
|
|
pagure/lib/encoding_utils.py | 12 ++++-
|
|
tests/test_pagure_flask_ui_repo.py | 58 ++++++++++++++++++++-----
|
|
tests/test_pagure_lib_encoding_utils.py | 40 ++++++++++++-----
|
|
tests/test_pagure_lib_mimetype.py | 28 ++++++++++--
|
|
4 files changed, 111 insertions(+), 27 deletions(-)
|
|
|
|
diff --git a/pagure/lib/encoding_utils.py b/pagure/lib/encoding_utils.py
|
|
index 66f7dced..304e7d84 100644
|
|
--- a/pagure/lib/encoding_utils.py
|
|
+++ b/pagure/lib/encoding_utils.py
|
|
@@ -15,7 +15,12 @@ from __future__ import unicode_literals, division, absolute_import
|
|
from collections import namedtuple
|
|
import logging
|
|
|
|
-from chardet import universaldetector, __version__ as ch_version
|
|
+try:
|
|
+ import cchardet
|
|
+ from cchardet import __version__ as ch_version
|
|
+except ImportError:
|
|
+ cchardet = None
|
|
+ from chardet import universaldetector, __version__ as ch_version
|
|
|
|
from pagure.exceptions import PagureEncodingException
|
|
|
|
@@ -44,7 +49,10 @@ def detect_encodings(data):
|
|
|
|
# We can't use ``chardet.detect`` because we want to dig in the internals
|
|
# of the detector to bias the utf-8 result.
|
|
- detector = universaldetector.UniversalDetector()
|
|
+ if cchardet is not None:
|
|
+ detector = cchardet.UniversalDetector()
|
|
+ else:
|
|
+ detector = universaldetector.UniversalDetector()
|
|
detector.reset()
|
|
detector.feed(data)
|
|
result = detector.close()
|
|
diff --git a/tests/test_pagure_flask_ui_repo.py b/tests/test_pagure_flask_ui_repo.py
|
|
index b4322e7d..e816e7f7 100644
|
|
--- a/tests/test_pagure_flask_ui_repo.py
|
|
+++ b/tests/test_pagure_flask_ui_repo.py
|
|
@@ -20,6 +20,12 @@ import tempfile
|
|
import time
|
|
import os
|
|
|
|
+cchardet = None
|
|
+try:
|
|
+ import cchardet
|
|
+except ImportError:
|
|
+ pass
|
|
+
|
|
import pygit2
|
|
import six
|
|
from mock import ANY, patch, MagicMock
|
|
@@ -2763,9 +2769,16 @@ class PagureFlaskRepotests(tests.Modeltests):
|
|
output = self.app.get("/test/raw/master")
|
|
self.assertEqual(output.status_code, 200)
|
|
output_text = output.get_data(as_text=True)
|
|
- self.assertEqual(
|
|
- output.headers["Content-Type"].lower(), "text/plain; charset=ascii"
|
|
- )
|
|
+ if cchardet is not None:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=utf-8",
|
|
+ )
|
|
+ else:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=ascii",
|
|
+ )
|
|
self.assertIn(":Author: Pierre-Yves Chibon", output_text)
|
|
|
|
# Add some more content to the repo
|
|
@@ -2784,9 +2797,16 @@ class PagureFlaskRepotests(tests.Modeltests):
|
|
|
|
# View in a branch
|
|
output = self.app.get("/test/raw/master/f/sources")
|
|
- self.assertEqual(
|
|
- output.headers["Content-Type"].lower(), "text/plain; charset=ascii"
|
|
- )
|
|
+ if cchardet is not None:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=utf-8",
|
|
+ )
|
|
+ else:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=ascii",
|
|
+ )
|
|
self.assertEqual(output.status_code, 200)
|
|
output_text = output.get_data(as_text=True)
|
|
self.assertIn("foo\n bar", output_text)
|
|
@@ -2837,9 +2857,16 @@ class PagureFlaskRepotests(tests.Modeltests):
|
|
output = self.app.get("/test/raw/master")
|
|
self.assertEqual(output.status_code, 200)
|
|
output_text = output.get_data(as_text=True)
|
|
- self.assertEqual(
|
|
- output.headers["Content-Type"].lower(), "text/plain; charset=ascii"
|
|
- )
|
|
+ if cchardet is not None:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=utf-8",
|
|
+ )
|
|
+ else:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=ascii",
|
|
+ )
|
|
self.assertTrue(
|
|
output_text.startswith("diff --git a/test_binary b/test_binary\n")
|
|
)
|
|
@@ -2877,9 +2904,16 @@ class PagureFlaskRepotests(tests.Modeltests):
|
|
output = self.app.get("/fork/pingou/test3/raw/master/f/sources")
|
|
self.assertEqual(output.status_code, 200)
|
|
output_text = output.get_data(as_text=True)
|
|
- self.assertEqual(
|
|
- output.headers["Content-Type"].lower(), "text/plain; charset=ascii"
|
|
- )
|
|
+ if cchardet is not None:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=utf-8",
|
|
+ )
|
|
+ else:
|
|
+ self.assertEqual(
|
|
+ output.headers["Content-Type"].lower(),
|
|
+ "text/plain; charset=ascii",
|
|
+ )
|
|
self.assertIn("foo\n bar", output_text)
|
|
|
|
def test_view_commit(self):
|
|
diff --git a/tests/test_pagure_lib_encoding_utils.py b/tests/test_pagure_lib_encoding_utils.py
|
|
index ccc8825f..aff7d8ba 100644
|
|
--- a/tests/test_pagure_lib_encoding_utils.py
|
|
+++ b/tests/test_pagure_lib_encoding_utils.py
|
|
@@ -5,11 +5,18 @@ Tests for :module:`pagure.lib.encoding_utils`.
|
|
|
|
from __future__ import unicode_literals, absolute_import
|
|
|
|
-import chardet
|
|
import os
|
|
import unittest
|
|
import sys
|
|
|
|
+cchardet = None
|
|
+try:
|
|
+ import cchardet
|
|
+except ImportError:
|
|
+ pass
|
|
+
|
|
+import chardet
|
|
+
|
|
sys.path.insert(
|
|
0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
|
|
)
|
|
@@ -24,7 +31,10 @@ class TestGuessEncoding(unittest.TestCase):
|
|
"""
|
|
data = "Twas bryllyg, and the slythy toves did gyre and gymble"
|
|
result = encoding_utils.guess_encoding(data.encode("ascii"))
|
|
- self.assertEqual(result, "ascii")
|
|
+ if cchardet is not None:
|
|
+ self.assertEqual(result, "utf-8")
|
|
+ else:
|
|
+ self.assertEqual(result, "ascii")
|
|
|
|
def test_guess_encoding_favor_utf_8(self):
|
|
"""
|
|
@@ -56,17 +66,24 @@ class TestGuessEncodings(unittest.TestCase):
|
|
chardet_result = chardet.detect(data)
|
|
if chardet.__version__[0] == "3":
|
|
# The first three have different confidence values
|
|
+ if cchardet is not None:
|
|
+ expexted_list = ["utf-8"]
|
|
+ # The last one in the list (which apparently has only one)
|
|
+ self.assertEqual(result[-1].encoding, "utf-8")
|
|
+ else:
|
|
+ expexted_list = ["utf-8", "ISO-8859-9", "ISO-8859-1"]
|
|
+ # This is the one with the least confidence
|
|
+ self.assertEqual(result[-1].encoding, "windows-1255")
|
|
self.assertListEqual(
|
|
- [encoding.encoding for encoding in result][:3],
|
|
- ["utf-8", "ISO-8859-9", "ISO-8859-1"],
|
|
+ [encoding.encoding for encoding in result][:3], expexted_list
|
|
)
|
|
- # This is the one with the least confidence
|
|
- self.assertEqual(result[-1].encoding, "windows-1255")
|
|
+
|
|
# The values in the middle of the list all have the same confidence
|
|
# value and can't be sorted reliably: use sets.
|
|
- self.assertEqual(
|
|
- set([encoding.encoding for encoding in result]),
|
|
- set(
|
|
+ if cchardet is not None:
|
|
+ expected_list = sorted(["utf-8"])
|
|
+ else:
|
|
+ expected_list = sorted(
|
|
[
|
|
"utf-8",
|
|
"ISO-8859-9",
|
|
@@ -89,7 +106,10 @@ class TestGuessEncodings(unittest.TestCase):
|
|
"windows-1251",
|
|
"windows-1255",
|
|
]
|
|
- ),
|
|
+ )
|
|
+ self.assertListEqual(
|
|
+ sorted(set([encoding.encoding for encoding in result])),
|
|
+ expected_list,
|
|
)
|
|
self.assertEqual(chardet_result["encoding"], "ISO-8859-9")
|
|
else:
|
|
diff --git a/tests/test_pagure_lib_mimetype.py b/tests/test_pagure_lib_mimetype.py
|
|
index d5947bee..8c2f4a31 100644
|
|
--- a/tests/test_pagure_lib_mimetype.py
|
|
+++ b/tests/test_pagure_lib_mimetype.py
|
|
@@ -9,6 +9,12 @@ import os
|
|
import unittest
|
|
import sys
|
|
|
|
+cchardet = None
|
|
+try:
|
|
+ import cchardet
|
|
+except ImportError:
|
|
+ pass
|
|
+
|
|
from pagure.lib import mimetype
|
|
|
|
sys.path.insert(
|
|
@@ -20,8 +26,18 @@ class TestMIMEType(unittest.TestCase):
|
|
def test_guess_type(self):
|
|
dataset = [
|
|
("hello.html", None, "text/html", None),
|
|
- ("hello.html", b"#!", "text/html", "ascii"),
|
|
- ("hello", b"#!", "text/plain", "ascii"),
|
|
+ (
|
|
+ "hello.html",
|
|
+ b"#!",
|
|
+ "text/html",
|
|
+ "ascii" if cchardet is None else "utf-8",
|
|
+ ),
|
|
+ (
|
|
+ "hello",
|
|
+ b"#!",
|
|
+ "text/plain",
|
|
+ "ascii" if cchardet is None else "utf-8",
|
|
+ ),
|
|
("hello.jpg", None, "image/jpeg", None),
|
|
("hello.jpg", b"#!", "image/jpeg", None),
|
|
("hello.jpg", b"\0", "image/jpeg", None),
|
|
@@ -49,7 +65,13 @@ class TestMIMEType(unittest.TestCase):
|
|
|
|
def test_get_normal_headers(self):
|
|
dataset = [
|
|
- ("hello", b"#!", "text/plain; charset=ascii"),
|
|
+ (
|
|
+ "hello",
|
|
+ b"#!",
|
|
+ "text/plain; charset=ascii"
|
|
+ if cchardet is None
|
|
+ else "text/plain; charset=utf-8",
|
|
+ ),
|
|
("hello.jpg", None, "image/jpeg"),
|
|
("hello.jpg", b"#!", "image/jpeg"),
|
|
("hello.jpg", b"\0", "image/jpeg"),
|
|
--
|
|
2.26.2
|
|
|