From aaf52539681c8e4a3d2e38c688c8dfc25c8e9bd8d97308a4a0892251b61082ce Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Tue, 12 Jan 2021 16:12:22 +0000 Subject: [PATCH] - Add no-cloudscraper.patch to avoid need to use cloudscraper OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-fanficfare?expand=0&rev=62 --- no-cloudscraper.patch | 216 ++++++++++++++++++++++++++++++++++++++ python-fanficfare.changes | 5 + python-fanficfare.spec | 3 + 3 files changed, 224 insertions(+) create mode 100644 no-cloudscraper.patch diff --git a/no-cloudscraper.patch b/no-cloudscraper.patch new file mode 100644 index 0000000..57b541f --- /dev/null +++ b/no-cloudscraper.patch @@ -0,0 +1,216 @@ +--- a/fanficfare/configurable.py ++++ b/fanficfare/configurable.py +@@ -44,7 +44,6 @@ import pickle + + ## isn't found in plugin when only imported down below inside + ## get_scraper() +-import cloudscraper + + from . import exceptions + +@@ -210,7 +209,6 @@ def get_valid_set_options(): + 'titlepage_use_table':(None,None,boollist), + + 'use_ssl_unverified_context':(None,None,boollist), +- 'use_cloudscraper':(None,None,boollist), + 'continue_on_chapter_error':(None,None,boollist), + 'conditionals_use_lists':(None,None,boollist), + 'dedup_chapter_list':(None,None,boollist), +@@ -483,7 +481,6 @@ def get_valid_keywords(): + 'tweak_fg_sleep', + 'universe_as_series', + 'use_ssl_unverified_context', +- 'use_cloudscraper', + 'user_agent', + 'username', + 'website_encodings', +@@ -598,16 +595,11 @@ class Configuration(ConfigParser): + self.override_sleep = None + self.cookiejar = self.get_empty_cookiejar() + self.opener = build_opener(HTTPCookieProcessor(self.cookiejar),GZipProcessor()) +- self.scraper = None + + self.pagecache = self.get_empty_pagecache() + self.save_cache_file = None + self.save_cookiejar_file = None + +- def __del__(self): +- if self.scraper is not None: +- self.scraper.close() +- + def section_url_names(self,domain,section_url_f): + ## domain is passed as a method to limit the damage if/when an + ## adapter screws up _section_url +@@ -1073,24 +1065,6 @@ class Configuration(ConfigParser): + logger.warning("reduce_zalgo failed(%s), continuing."%e) + return data + +- def get_scraper(self): +- if not self.scraper: +- ## ffnet adapter can't parse mobile output, so we only +- ## want desktop browser. But cloudscraper then insists on +- ## a browser and platform, too. +- self.scraper = cloudscraper.CloudScraper(browser={ +- 'browser': 'chrome', +- 'platform': 'windows', +- 'mobile': False, +- 'desktop': True, +- }) +- ## CloudScraper is subclass of requests.Session. +- ## probably need import higher up if ever used. +- # import requests +- # self.scraper = requests.Session() +- self.scraper.cookies = self.cookiejar +- return self.scraper +- + # Assumes application/x-www-form-urlencoded. parameters, headers are dict()s + def _postUrl(self, url, + parameters={}, +@@ -1132,24 +1106,15 @@ class Configuration(ConfigParser): + # headers['Authorization']=b"Basic %s" % base64string + # logger.debug("http login for SB xf2test") + +- if self.getConfig('use_cloudscraper',False): +- logger.debug("Using cloudscraper for POST") +- resp = self.get_scraper().post(url, +- headers=dict(headers), +- data=parameters) +- logger.debug("response code:%s"%resp.status_code) +- resp.raise_for_status() # raises HTTPError if error code. +- data = resp.content +- else: +- req = Request(url, +- data=ensure_binary(urlencode(parameters)), +- headers=headers) +- +- ## Specific UA because too many sites are blocking the default python UA. +- self.opener.addheaders = [('User-Agent', self.getConfig('user_agent')), +- ('X-Clacks-Overhead','GNU Terry Pratchett')] ++ req = Request(url, ++ data=ensure_binary(urlencode(parameters)), ++ headers=headers) ++ ++ ## Specific UA because too many sites are blocking the default python UA. ++ self.opener.addheaders = [('User-Agent', self.getConfig('user_agent')), ++ ('X-Clacks-Overhead','GNU Terry Pratchett')] + +- data = self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read() ++ data = self.opener.open(req,None,float(self.getConfig('connect_timeout',30.0))).read() + data = self._do_reduce_zalgo(self._decode(data)) + self._progressbar() + ## postURL saves data to the pagecache *after* _decode() while +@@ -1227,37 +1192,16 @@ class Configuration(ConfigParser): + + self.opener.addheaders = headers + +- if self.getConfig('use_cloudscraper',False): +- ## requests / cloudscraper wants a dict() for headers, not +- ## list of tuples. +- headers = dict(headers) +- ## let cloudscraper do its thing with UA. +- if 'User-Agent' in headers: +- del headers['User-Agent'] +- if parameters != None: +- logger.debug("Using cloudscraper for fetch POST") +- resp = self.get_scraper().post(url, +- headers=headers, +- data=parameters) +- else: +- logger.debug("Using cloudscraper for GET") +- resp = self.get_scraper().get(url, +- headers=headers) +- logger.debug("response code:%s"%resp.status_code) +- resp.raise_for_status() # raises HTTPError if error code. +- data = resp.content +- opened = FakeOpened(data,resp.url) ++ ## opener.open() will to POST with params(data) and GET without. ++ if parameters != None: ++ opened = self.opener.open(url, ++ ensure_binary(urlencode(parameters)), ++ float(self.getConfig('connect_timeout',30.0))) + else: +- ## opener.open() will to POST with params(data) and GET without. +- if parameters != None: +- opened = self.opener.open(url, +- ensure_binary(urlencode(parameters)), +- float(self.getConfig('connect_timeout',30.0))) +- else: +- opened = self.opener.open(url, +- None, +- float(self.getConfig('connect_timeout',30.0))) +- data = opened.read() ++ opened = self.opener.open(url, ++ None, ++ float(self.getConfig('connect_timeout',30.0))) ++ data = opened.read() + self._progressbar() + ## postURL saves data to the pagecache *after* _decode() while + ## fetchRaw saves it *before* _decode()--because raw. +--- a/fanficfare/defaults.ini ++++ b/fanficfare/defaults.ini +@@ -2793,15 +2793,6 @@ type_label:Type of Couple + website_encodings:Windows-1252,utf8 + + [www.fanfiction.net] +-## Using cloudscraper can satisfy the first couple levels of +-## Cloudflare bot-proofing, but not all levels. Older versions of +-## OpenSSL will also raise problems, so versions of Calibre older than +-## v5 will probably fail. Only fanfiction.net and fictionpress.com +-## are configured with use_cloudscraper:true by default, but it can be +-## applied in other sites' ini sections. user_agent setting is +-## ignored when use_cloudscraper:true +-use_cloudscraper:true +- + ## fanfiction.net's 'cover' images are really just tiny thumbnails. + ## Set this to true to never use them. + #never_make_cover: false +@@ -2888,15 +2879,6 @@ website_encodings:Windows-1252,utf8 + slow_down_sleep_time:10 + + [www.fictionpress.com] +-## Using cloudscraper can satisfy the first couple levels of +-## Cloudflare bot-proofing, but not all levels. Older versions of +-## OpenSSL will also raise problems, so versions of Calibre older than +-## v5 will probably fail. Only fanfiction.net and fictionpress.com +-## are configured with use_cloudscraper:true by default, but it can be +-## applied in other sites' ini sections. user_agent setting is +-## ignored when use_cloudscraper:true +-use_cloudscraper:true +- + ## Clear FanFiction from defaults, fictionpress.com is original fiction. + extratags: + +--- a/makeplugin.py ++++ b/makeplugin.py +@@ -23,7 +23,7 @@ from makezip import createZipFile + if __name__=="__main__": + filename="FanFicFare.zip" + exclude=['*.pyc','*~','*.xcf','*[0-9].png','*.po','*.pot','*default.mo','*Thumbs.db'] +- ++ + os.chdir('calibre-plugin') + files=['plugin-defaults.ini','plugin-example.ini','about.html', + 'images','translations'] +@@ -35,8 +35,8 @@ if __name__=="__main__": + exclude=exclude) + + os.chdir('../included_dependencies') +- files=['bs4','chardet','html2text','soupsieve','backports', +- 'cloudscraper','requests','requests_toolbelt','urllib3', ++ files=['bs4', 'chardet', 'html2text', 'soupsieve', 'backports', ++ 'requests', 'requests_toolbelt', 'urllib3', + 'certifi','idna'] + ## Kept only for v2.85.1 support now. + createZipFile("../"+filename,"a", +--- a/setup.py ++++ b/setup.py +@@ -84,8 +84,7 @@ setup( + install_requires=['beautifulsoup4', + 'chardet', + 'html5lib', +- 'html2text', +- 'cloudscraper'], ++ 'html2text'], + # html5lib requires 'six', FFF includes it's own copy as fanficfare.six + + # List additional groups of dependencies here (e.g. development diff --git a/python-fanficfare.changes b/python-fanficfare.changes index 666a4ea..9721c49 100644 --- a/python-fanficfare.changes +++ b/python-fanficfare.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Tue Jan 12 16:12:03 UTC 2021 - Matej Cepl + +- Add no-cloudscraper.patch to avoid need to use cloudscraper + ------------------------------------------------------------------- Mon Jan 11 17:28:41 CET 2021 - Matej Cepl diff --git a/python-fanficfare.spec b/python-fanficfare.spec index 2c4b144..44daf89 100644 --- a/python-fanficfare.spec +++ b/python-fanficfare.spec @@ -29,6 +29,9 @@ Group: Development/Languages/Python URL: https://github.com/JimmXinu/FanFicFare Source: https://github.com/JimmXinu/%{modname}/archive/v%{version}/%{modname}-%{version}.tar.gz # Source: %%{modname}-%%{version}.tar.gz +# PATCH-FEATURE-OPENSUSE no-cloudscraper.patch mcepl@suse.com +# don't use cloudscraper +Patch0: no-cloudscraper.patch BuildRequires: %{python_module beautifulsoup4} BuildRequires: %{python_module chardet} BuildRequires: %{python_module html2text}