From eb863efcb74dc354b33f5e340fa4e6e1ca6ecf2b65aa5f5fbdf7fbac489e7744 Mon Sep 17 00:00:00 2001
From: Matej Cepl <mcepl@suse.com>
Date: Fri, 10 Mar 2023 12:11:40 +0000
Subject: [PATCH] =?UTF-8?q?-=20Remove=20adapter=5Fdwiggiecom.patch=20?=
 =?UTF-8?q?=E2=80=A6=20it=20really=20doesn't=20work.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-fanficfare?expand=0&rev=101
---
 adapter_dwiggiecom.patch  | 413 --------------------------------------
 python-fanficfare.changes |   5 +
 python-fanficfare.spec    |   3 -
 3 files changed, 5 insertions(+), 416 deletions(-)
 delete mode 100644 adapter_dwiggiecom.patch

diff --git a/adapter_dwiggiecom.patch b/adapter_dwiggiecom.patch
deleted file mode 100644
index b71fbab..0000000
--- a/adapter_dwiggiecom.patch
+++ /dev/null
@@ -1,413 +0,0 @@
-From 45c6d71f57aefc3b63f2a4253eea3f730b76c6fb Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= <mcepl@cepl.eu>
-Date: Wed, 15 Feb 2023 07:38:13 +0100
-Subject: [PATCH] Add adapter_dwiggiecom, which however will not be ever pushed
- upstream.
-
----
- fanficfare/adapters/__init__.py           |    1 
- fanficfare/adapters/adapter_dwiggiecom.py |  384 ++++++++++++++++++++++++++++++
- 2 files changed, 385 insertions(+)
- create mode 100644 fanficfare/adapters/adapter_dwiggiecom.py
-
-Index: FanFicFare-4.20.0/fanficfare/adapters/__init__.py
-===================================================================
---- FanFicFare-4.20.0.orig/fanficfare/adapters/__init__.py
-+++ FanFicFare-4.20.0/fanficfare/adapters/__init__.py
-@@ -160,6 +160,7 @@ from . import adapter_psychficcom
- from . import adapter_deviantartcom
- from . import adapter_merengohu
- from . import adapter_readonlymindcom
-+from . import adapter_dwiggiecom
- 
- ## This bit of complexity allows adapters to be added by just adding
- ## importing.  It eliminates the long if/else clauses we used to need
-Index: FanFicFare-4.20.0/fanficfare/adapters/adapter_dwiggiecom.py
-===================================================================
---- /dev/null
-+++ FanFicFare-4.20.0/fanficfare/adapters/adapter_dwiggiecom.py
-@@ -0,0 +1,384 @@
-+# -*- coding: utf-8 -*-
-+
-+# DO NOT PROPOSE TO MERGE! THERE ARE MANY GOOD REASONS WHY DWIGGIE IS
-+# AMONG
-+# https://github.com/JimmXinu/FanFicFare/wiki/Supportedsites#sites-not-supported
-+# See also https://github.com/JimmXinu/FanFicFare/issues/903
-+
-+# Copyright 2011 Fanficdownloader team
-+#
-+# Licensed under the Apache License, Version 2.0 (the "License");
-+# you may not use this file except in compliance with the License.
-+# You may obtain a copy of the License at
-+#
-+#     http://www.apache.org/licenses/LICENSE-2.0
-+#
-+# Unless required by applicable law or agreed to in writing, software
-+# distributed under the License is distributed on an "AS IS" BASIS,
-+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+# See the License for the specific language governing permissions and
-+# limitations under the License.
-+#
-+
-+import logging
-+import re
-+
-+from ..htmlcleanup import stripHTML
-+from .. import exceptions as exceptions
-+from ..six.moves.urllib.error import HTTPError
-+
-+from .base_adapter import BaseSiteAdapter,  makeDate
-+
-+logger = logging.getLogger(__name__)
-+
-+
-+def getClass():
-+    return DwiggieComAdapter
-+
-+# Class name has to be unique.  Our convention is camel case the
-+# sitename with Adapter at the end.  www is skipped.
-+
-+
-+class DwiggieComAdapter(BaseSiteAdapter):
-+
-+    def __init__(self, config, url):
-+        BaseSiteAdapter.__init__(self, config, url)
-+
-+#         1252 is a superset of iso-8859-1.  Most sites that claim to be
-+#         iso-8859-1 (and some that claim to be utf8) are really windows-1252.
-+        self.decode = ["Windows-1252", "utf8"]
-+
-+#         if left empty, site doesn't return any message at all.
-+        self.username = "NoneGiven"
-+        self.password = ""
-+        self.is_adult = False
-+        self.sectionUrl = ""
-+        self.section = []
-+        self.chapters = dict()
-+
-+
-+#        # get storyId from url--url validation guarantees query is only
-+#        # sid=1234
-+#        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
-+#        logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
-+
-+#         get storyId from url--url validation guarantees query correct
-+        m = re.match(self.getSiteURLPattern(), url)
-+        if m:
-+            self.story.setMetadata('storyId', m.group('id'))
-+            logger.debug("storyId: (%s)" % self.story.getMetadata('storyId'))
-+            # normalized story URL.
-+            self._setURL('https://www.' + self.getSiteDomain() +
-+                         '/derby/'+self . story.getMetadata('storyId')+'.htm')
-+        else:
-+            raise exceptions.InvalidStoryURL(url,
-+                                             self.getSiteDomain(),
-+                                             self.getSiteExampleURLs())
-+
-+#         Each adapter needs to have a unique site abbreviation.
-+        self.story.setMetadata('siteabbrev', 'dwg')
-+
-+#         The date format will vary from site to site.
-+#         http://docs.python.org/library/datetime.html#strftime-strptime-behavior
-+        self.dateformat = "%m/%d/%y"
-+
-+    @staticmethod  # must be @staticmethod, don't remove it.
-+    def getSiteDomain():
-+        # The site domain.  Does have www here, if it uses it.
-+        return 'dwiggie.com'
-+
-+    @classmethod
-+    def getAcceptDomains(cls):
-+        return ['www.dwiggie.com', 'dwiggie.com', 'thedwg.com', 'TheDWG.com']
-+
-+    def getSiteExampleURLs(self):
-+        return "https://"+self.getSiteDomain()+"/derby/name1b.htm"
-+
-+    def getSiteURLPattern(self):
-+        # https://www.dwiggie.com/derby/mari17b.htm
-+        return r"https?://(www.)?(thedwg|TheDWG|dwiggie)\.com/derby/(?P<id>(old_\d{4}\/|old[a-z]\/)?[a-z]+\d+)(?P<part>[a-z]*)\.htm$"
-+
-+    def tryArchivePage(self, url):
-+        try:
-+            data = self.get_request(url)
-+
-+        except HTTPError as e:
-+            if e.code == 404:
-+                # need to change the exception returned
-+                raise exceptions.StoryDoesNotExist(self.meta)
-+            else:
-+                raise e
-+
-+        archivesoup = self.make_soup(data)
-+        m = re.compile(r"/derby/" +
-+                       self.story.getMetadata('storyId')+"[a-z]?.htm$")
-+#        print(m.pattern)
-+#        print(archivesoup)
-+        a = archivesoup.find('a', href=m)
-+
-+        return a
-+
-+    def getGenre(self, url):
-+        if re.search('id=E', url):
-+            genre = 'Epilogue Abbey'
-+        else:
-+            genre = 'Fantasia Gallery'
-+        self.story.addToList('genre', genre)
-+
-+    def getItemFromArchivePage(self):
-+
-+        urls = ["https://www.dwiggie.com/toc/index.php?id=E&page=all&comp=n",
-+                "https://www.dwiggie.com/toc/index.php?id=F&page=all&comp=n"]
-+        for url in urls:
-+            a = self.tryArchivePage(url)
-+            if a is not None:
-+                self.getGenre(url)
-+                return a.parent
-+        else:
-+            return None
-+
-+    def getMetaFromSearch(self):
-+
-+        params = {}
-+        params['title_name'] = self.story.getMetadata('title')
-+
-+        searchUrl = "https://" + self.getSiteDomain() + "/toc/search.php"
-+
-+        d = self._postUrl(searchUrl, params)
-+#        print(d)
-+
-+        searchsoup = self.make_soup(d)
-+        m = re.compile(r"/derby/" + self.story.getMetadata('storyId') +
-+                       "[a-z]?.htm$")
-+#        print(m.pattern)
-+#        print(self.story.getMetadata('storyId'))
-+        a = searchsoup.find('a', href=m)
-+
-+        return a
-+
-+    def getChaptersFromPage(self, url):
-+        try:
-+            data = self.get_request(url)
-+        except HTTPError as e:
-+            if e.code == 404:
-+                return []
-+            else:
-+                raise e
-+
-+        s = self.story.getMetadata('storyId').split('/')
-+        s.reverse()
-+        storyId_trimmed = s[0]
-+
-+        m = re.match('.*?<body[^>]*>(\s*<ul>)?(?P<content>.*?)(</body>|$)',
-+                     data, re.DOTALL)
-+        newdata = m.group('content')
-+        regex = re.compile(r'<a\ href\=\"' + storyId_trimmed +
-+                           '[a-z]?.htm\">(Continued\ [Ii]n\ |Continue\ [Oo]n\ [Tt]o\ )?(the\ )?([Nn]ext\ [Ss]ection|[Ss]ection\ [0-9IVXCL]+)</a>')
-+        newdata = re.sub(regex, '', newdata)
-+
-+
-+#        pagesections = filter(lambda x: x!=None, re.split('(?m)<hr( \/)?>|<p>\s*<hr( \/)?>\s*<\/p>', newdata, re.MULTILINE))
-+#        pagesections = filter(lambda x: x!=None, re.split('(?m)(<p>\s*)*<hr( \/)?>(\s*<\/p>)?', newdata, re.MULTILINE))
-+        pagesections = filter(lambda x: x != None, re.split('<hr( \/)?>', newdata))
-+        pagesections = filter(lambda x: x.strip() != '/', pagesections)
-+#        regex = re.compile(r'(href\="'+storyId_trimmed+'[a-z]?.htm$"')
-+#        pagesections = filter(lambda x: re.search(re.compile(storyId_trimmed + "[a-z]?.htm$"),x)==None, pagesections)
-+        pagesections.pop(0)     # always remove header
-+
-+        regex = re.compile(r'(?m)(href\="' + storyId_trimmed +
-+                           '[a-z]?.htm\"|Copyright\ held\ by\ the\ author|<p>\s*(Section\ I|Beginning),\s*</?p>)', re.MULTILINE)
-+        s = filter(lambda x: regex.search(x), pagesections)
-+#        print(s)
-+        pagesections = filter(lambda x: not regex.search(x), pagesections)
-+#        print(pagesections[0])
-+        return pagesections
-+
-+    # Getting the chapter list and the meta data, plus 'is adult' checking.
-+    def extractChapterUrlsAndMetadata(self):
-+
-+        url = self.url
-+        meta = self.getItemFromArchivePage()
-+#        print(meta)
-+
-+#         Title
-+        t = meta.a
-+        self.story.setMetadata('title', t.string.strip())
-+
-+#         Author
-+        author = meta.find('a', 'author_link')
-+        if author is not None:
-+            self.story.setMetadata('author', author.string.strip())
-+            self.story.setMetadata('authorId', author['href'].split('=')[1])
-+            self.story.setMetadata('authorUrl', author['href'])
-+            author = author.parent
-+        else:
-+            author = meta.i
-+            self.story.setMetadata('author',
-+                                   author.string.replace('Written by', '')
-+                                   .strip())
-+            self.story.setMetadata('authorId', 'unknown')
-+            self.story.setMetadata('authorUrl', 'unknown')
-+
-+
-+#         DateUpdated
-+        dUpdate = meta.find('i', text=re.compile('Last update'))
-+        du = dUpdate.replace('Last update', '').replace('.', '').strip()
-+        try:
-+            self.story.setMetadata('dateUpdated',
-+                                   makeDate(du, self.dateformat))
-+        except ValueError:
-+            self.story.setMetadata('dateUpdated', makeDate(du, "%m/%d/%Y"))
-+        compImg = meta.find('img', alt="Dot")
-+        if compImg is not None:
-+            self.story.setMetadata('status', 'Completed')
-+        else:
-+            self.story.setMetadata('status', 'In-Progress')
-+
-+
-+#         Summary & Category
-+#         Get the summary components from the meta listing
-+        metalist = meta.contents
-+        s = []
-+        for x in range(0, len(metalist)-1):
-+            item = metalist[x]
-+            if item == author or item == compImg:
-+                s = []
-+                continue
-+            if item == dUpdate or item == dUpdate.parent:
-+                break
-+            s.append(item)
-+
-+#         create a soup object from the summary components
-+        soup = self.make_soup("<p></p>")
-+        d = soup.p
-+        for x in s:
-+            d.append(x)
-+#        print(d)
-+
-+#         extract category from summary text
-+        desc = stripHTML(d)
-+        books = re.compile(r'(?P<book>\~P&P;?\~|\~Em;?\~|\~MP;?\~|\~S\&S;?\~|\~Per;?\~|\~NA;?\~|\~Juv;?\~|\~Misc;?\~)')
-+        booklist = dict({'~P&P~': 'Pride and Prejudice', '~Em~': 'Emma',
-+                        '~MP~': 'Mansfield Park', '~S&S~':
-+                         'Sense and Sensibility', '~Per~': 'Persuasion',
-+                         '~NA~': 'Northanger Abbey', '~Juv~': 'Juvenilia',
-+                         '~Misc~': 'Miscellaneous'})
-+        m = re.search(books, desc)
-+        print(m.group('book'))
-+        book = booklist.get(m.group('book').replace(';', ''))
-+        print(book)
-+        self.story.addToList('category', book)
-+
-+
-+#         assign summary info
-+        desc = stripHTML(desc).replace(book, '').strip()
-+        desc = re.sub('^.\s*', '', desc)
-+        if desc is not None:
-+            self.setDescription(url, desc)
-+
-+#        # Chapters (Sections in this case-don't know if we can subdivide them)
-+
-+#         get the last Section from the archive page link
-+#        chapters = ["https://www.dwiggie.com"+t['href']]
-+
-+#         get the section letter from the last page
-+        tempUrl = t['href']
-+        if "http://thedwg.com/" in tempUrl:
-+            tempUrl = tempUrl.replace("http://thedwg.com/", "/")
-+        elif "http://TheDWG.com/" in tempUrl:
-+            tempUrl = tempUrl.replace("http://TheDWG.com/", "/")
-+        elif "https://thedwg.com/" in tempUrl:
-+            tempUrl = tempUrl.replace("https://thedwg.com/", "/")
-+        elif "https://TheDWG.com/" in tempUrl:
-+            tempUrl = tempUrl.replace("https://TheDWG.com/", "/")
-+        m = re.match("/derby/" + self.story.getMetadata('storyId') +
-+                     "(?P<section>[a-z]?).htm$", tempUrl)
-+        inc = m.group('section')
-+        if inc == '':
-+            inc = 'a'
-+
-+#         get the presumed list of section urls with 'lower' section letters
-+        sections = []
-+        baseurl = "https://www.dwiggie.com/derby/"+self.story.getMetadata('storyId')
-+        extension = ".htm"
-+        ordend = ord(inc)
-+        ordbegin = ord('a')
-+        for numinc in range(ordbegin, ordend+1):
-+                inc = chr(numinc)
-+                if inc == 'a':
-+                    sections.append(baseurl+extension)
-+                else:
-+                    sections.append(baseurl+inc+extension)
-+
-+        # Process List of Chapters
-+        # create 'dummy' urls for individual chapters in the form
-+        # 'pageurl#pageindex' where page index is an index starting with 0 per
-+        # page
-+        c = 0
-+        postdate = None
-+        chapters = []
-+        for x in range(0, len(sections)):
-+            section = sections[x]
-+            i = 0
-+            for chapter in self.getChaptersFromPage(section):
-+                c += 1
-+                chaptersoup = self.make_soup(chapter)
-+#                self.chapterUrls.append(('Chapter '+str(c),section+'#'+str(i)))
-+                cUrl = section+'#'+str(i)
-+                t = chaptersoup.find('font', size="+1", color="#336666")
-+                ctitle = ''
-+                if t is not None:
-+                    ctitle = stripHTML(t)
-+#                self.chapterUrls.append(('Chapter '+str(c),cUrl))
-+                self.chapterUrls.append((ctitle, cUrl))
-+                chapters.append((cUrl, chaptersoup))
-+                if postdate is None:
-+                    regex = re.compile(r'Posted\ on\:?\ (?P<date>\d{4}\-\d{2}\-\d{2}|\w+,\ \d+\ \w+\ \d{4})')
-+                    # Sunday, 21 March 2004, at 6:00 a.m.
-+                    m = re.search(regex, chapter)
-+                    if m is not None:
-+                        postdate = m.group('date')
-+                i += 1
-+        self.chapters = dict(chapters)
-+#        print(postdate)
-+        pubdate = None
-+        if postdate is not None:
-+            format1 = re.match(re.compile(r'\d{4}\-\d{2}\-\d{2}'), postdate)
-+            format2 = re.match(re.compile(r'\w+,\ \d+\ \w+\ \d{4}'), postdate)
-+            if format1 is not None:
-+                pubdate = makeDate(postdate, "%Y-%m-%d")
-+            if format2 is not None:
-+                pubdate = makeDate(postdate, "%A, %d %B %Y")
-+
-+        if pubdate is None:
-+            pubdate = makeDate(self.story.getMetadata('dateUpdated'),
-+                               "%Y-%m-%d")
-+#        print(pubdate)
-+        self.story.setMetadata('datePublished', pubdate)
-+#        print(self.story.getMetadata('dateUpdated'))
-+#        print(self.story.getMetadata('datePublished'))
-+        self.story.setMetadata('numChapters', c)
-+        logger.debug("numChapters: (%s)" % self.story.getMetadata('numChapters'))
-+
-+    # grab the text for an individual chapter.
-+    def getChapterText(self, url):
-+        logger.debug('Getting chapter text from: %s' % url)
-+
-+        chapter = self.chapters.get(url)
-+#        for c in self.chapters:
-+#            if c[0] == url:
-+#                chapter = c[1]
-+#                chapter = self.make_soup(c[1])
-+
-+#        chapter = find(lambda c: c[0] == url, self.chapters)[1]
-+#        page_url = url.split('#')[0]
-+#        x = url.split('#')[1]
-+#        if self.sectionUrl != page_url:
-+#            self.sectionUrl = page_url
-+#            self.section = self.getChaptersFromPage(page_url)
-+#
-+#        chapter = self.make_soup(self.section[int(x)])
-+
-+#        chapter = self.make_soup(self.getChaptersFromPage(page_url)[int(x)])
-+
-+        return self.utf8FromSoup(url, chapter)
diff --git a/python-fanficfare.changes b/python-fanficfare.changes
index 3e4f826..f6a21a7 100644
--- a/python-fanficfare.changes
+++ b/python-fanficfare.changes
@@ -1,3 +1,8 @@
+-------------------------------------------------------------------
+Fri Mar 10 12:09:49 UTC 2023 - Matej Cepl <mcepl@suse.com>
+
+- Remove adapter_dwiggiecom.patch … it really doesn't work.
+
 -------------------------------------------------------------------
 Fri Mar  3 09:15:19 UTC 2023 - Dirk Müller <dmueller@suse.com>
 
diff --git a/python-fanficfare.spec b/python-fanficfare.spec
index 22747d5..f53f0be 100644
--- a/python-fanficfare.spec
+++ b/python-fanficfare.spec
@@ -27,9 +27,6 @@ License:        GPL-3.0-only
 Group:          Development/Languages/Python
 URL:            https://github.com/JimmXinu/FanFicFare
 Source:         https://github.com/JimmXinu/FanFicFare/archive/v%{version}/FanFicFare-%{version}.tar.gz
-# PATCH-FEATURE-OPENSUSE adapter_dwiggiecom.patch gh#JimmXinu/FanFicFare#903 mcepl@suse.com
-# adapter for dwiggie.com, which is probably not for upstream
-Patch0:         adapter_dwiggiecom.patch
 BuildRequires:  %{python_module beautifulsoup4}
 BuildRequires:  %{python_module chardet}
 BuildRequires:  %{python_module cloudscraper}