From 105eba57a72efcd437e9dfcf631c9ea571843cc28ce2df7dae27836ade23553d Mon Sep 17 00:00:00 2001
From: Matej Cepl <mcepl@suse.com>
Date: Wed, 15 Feb 2023 06:53:20 +0000
Subject: [PATCH] - Update to 4.20.0:   - Fixes for adapter_fictionlive story
 URLs-normalize & skip     unsub URL   - adapter_deviantartcom date changes
 #910, thanks bugmaschine   - Revamp retries for browser cache with
 open_pages_in_browser   - Fix for & in chapter title.   - Add
 r_anthmax/n_anthmax options for custom_columns_settings   - Fixed the
 Deviantart adapter not detecting that a Deviation     is Marked as needing a
 login, thanks bugmaschine   - Skip day of week for localization in
 browsercache_firefox2   - Move makeDate to dateutils to call from
 browsercache_firefox2   - adapter_mediaminerorg: Updates for site changes   -
 adapter_fastnovelsnet: Fixes for site changes -- tested with    
 use_flaresolverr_proxy   - Update language->langcode mapping for updated AO3
 list   - Browser Cache Refactor & open_pages_in_browser feature   - See PR
 #905 and this MR post.   - Fixes for config base_xenforo options, closes #902
   - Fix for adapter_quotevcom status   - Equalize ok/cancel buttons on
 user/pass & email pass dialogs   - adapter_ficbooknet: Site change for status
 + remove debug   - Tweak for adapter_storiesonlinenet description parsing -
 Add adapter_dwiggiecom.patch (gh#JimmXinu/FanFicFare#903)   for openSUSE-only
 (reasons are in the patch) support for   dwiggie.com.

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-fanficfare?expand=0&rev=97
---
 FanFicFare-4.19.0.tar.gz  |   3 -
 FanFicFare-4.20.0.tar.gz  |   3 +
 adapter_dwiggiecom.patch  | 413 ++++++++++++++++++++++++++++++++++++++
 python-fanficfare.changes |  29 +++
 python-fanficfare.spec    |   5 +-
 5 files changed, 449 insertions(+), 4 deletions(-)
 delete mode 100644 FanFicFare-4.19.0.tar.gz
 create mode 100644 FanFicFare-4.20.0.tar.gz
 create mode 100644 adapter_dwiggiecom.patch

diff --git a/FanFicFare-4.19.0.tar.gz b/FanFicFare-4.19.0.tar.gz
deleted file mode 100644
index bd0dd83..0000000
--- a/FanFicFare-4.19.0.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0bba6a27fbbd4a811ae30003888a7ba7cf7fdd3804b65f74625d8efafad04379
-size 1965733
diff --git a/FanFicFare-4.20.0.tar.gz b/FanFicFare-4.20.0.tar.gz
new file mode 100644
index 0000000..2d4d4af
--- /dev/null
+++ b/FanFicFare-4.20.0.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:719e86d23b0a724bb55926bf300dcf416bd6e1c4c1f64f7fe3dd3cedbd38cfa4
+size 1970872
diff --git a/adapter_dwiggiecom.patch b/adapter_dwiggiecom.patch
new file mode 100644
index 0000000..b71fbab
--- /dev/null
+++ b/adapter_dwiggiecom.patch
@@ -0,0 +1,413 @@
+From 45c6d71f57aefc3b63f2a4253eea3f730b76c6fb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= <mcepl@cepl.eu>
+Date: Wed, 15 Feb 2023 07:38:13 +0100
+Subject: [PATCH] Add adapter_dwiggiecom, which however will not be ever pushed
+ upstream.
+
+---
+ fanficfare/adapters/__init__.py           |    1 
+ fanficfare/adapters/adapter_dwiggiecom.py |  384 ++++++++++++++++++++++++++++++
+ 2 files changed, 385 insertions(+)
+ create mode 100644 fanficfare/adapters/adapter_dwiggiecom.py
+
+Index: FanFicFare-4.20.0/fanficfare/adapters/__init__.py
+===================================================================
+--- FanFicFare-4.20.0.orig/fanficfare/adapters/__init__.py
++++ FanFicFare-4.20.0/fanficfare/adapters/__init__.py
+@@ -160,6 +160,7 @@ from . import adapter_psychficcom
+ from . import adapter_deviantartcom
+ from . import adapter_merengohu
+ from . import adapter_readonlymindcom
++from . import adapter_dwiggiecom
+ 
+ ## This bit of complexity allows adapters to be added by just adding
+ ## importing.  It eliminates the long if/else clauses we used to need
+Index: FanFicFare-4.20.0/fanficfare/adapters/adapter_dwiggiecom.py
+===================================================================
+--- /dev/null
++++ FanFicFare-4.20.0/fanficfare/adapters/adapter_dwiggiecom.py
+@@ -0,0 +1,384 @@
++# -*- coding: utf-8 -*-
++
++# DO NOT PROPOSE TO MERGE! THERE ARE MANY GOOD REASONS WHY DWIGGIE IS
++# AMONG
++# https://github.com/JimmXinu/FanFicFare/wiki/Supportedsites#sites-not-supported
++# See also https://github.com/JimmXinu/FanFicFare/issues/903
++
++# Copyright 2011 Fanficdownloader team
++#
++# Licensed under the Apache License, Version 2.0 (the "License");
++# you may not use this file except in compliance with the License.
++# You may obtain a copy of the License at
++#
++#     http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++#
++
++import logging
++import re
++
++from ..htmlcleanup import stripHTML
++from .. import exceptions as exceptions
++from ..six.moves.urllib.error import HTTPError
++
++from .base_adapter import BaseSiteAdapter,  makeDate
++
++logger = logging.getLogger(__name__)
++
++
++def getClass():
++    return DwiggieComAdapter
++
++# Class name has to be unique.  Our convention is camel case the
++# sitename with Adapter at the end.  www is skipped.
++
++
++class DwiggieComAdapter(BaseSiteAdapter):
++
++    def __init__(self, config, url):
++        BaseSiteAdapter.__init__(self, config, url)
++
++#         1252 is a superset of iso-8859-1.  Most sites that claim to be
++#         iso-8859-1 (and some that claim to be utf8) are really windows-1252.
++        self.decode = ["Windows-1252", "utf8"]
++
++#         if left empty, site doesn't return any message at all.
++        self.username = "NoneGiven"
++        self.password = ""
++        self.is_adult = False
++        self.sectionUrl = ""
++        self.section = []
++        self.chapters = dict()
++
++
++#        # get storyId from url--url validation guarantees query is only
++#        # sid=1234
++#        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
++#        logger.debug("storyId: (%s)"%self.story.getMetadata('storyId'))
++
++#         get storyId from url--url validation guarantees query correct
++        m = re.match(self.getSiteURLPattern(), url)
++        if m:
++            self.story.setMetadata('storyId', m.group('id'))
++            logger.debug("storyId: (%s)" % self.story.getMetadata('storyId'))
++            # normalized story URL.
++            self._setURL('https://www.' + self.getSiteDomain() +
++                         '/derby/'+self . story.getMetadata('storyId')+'.htm')
++        else:
++            raise exceptions.InvalidStoryURL(url,
++                                             self.getSiteDomain(),
++                                             self.getSiteExampleURLs())
++
++#         Each adapter needs to have a unique site abbreviation.
++        self.story.setMetadata('siteabbrev', 'dwg')
++
++#         The date format will vary from site to site.
++#         http://docs.python.org/library/datetime.html#strftime-strptime-behavior
++        self.dateformat = "%m/%d/%y"
++
++    @staticmethod  # must be @staticmethod, don't remove it.
++    def getSiteDomain():
++        # The site domain.  Does have www here, if it uses it.
++        return 'dwiggie.com'
++
++    @classmethod
++    def getAcceptDomains(cls):
++        return ['www.dwiggie.com', 'dwiggie.com', 'thedwg.com', 'TheDWG.com']
++
++    def getSiteExampleURLs(self):
++        return "https://"+self.getSiteDomain()+"/derby/name1b.htm"
++
++    def getSiteURLPattern(self):
++        # https://www.dwiggie.com/derby/mari17b.htm
++        return r"https?://(www.)?(thedwg|TheDWG|dwiggie)\.com/derby/(?P<id>(old_\d{4}\/|old[a-z]\/)?[a-z]+\d+)(?P<part>[a-z]*)\.htm$"
++
++    def tryArchivePage(self, url):
++        try:
++            data = self.get_request(url)
++
++        except HTTPError as e:
++            if e.code == 404:
++                # need to change the exception returned
++                raise exceptions.StoryDoesNotExist(self.meta)
++            else:
++                raise e
++
++        archivesoup = self.make_soup(data)
++        m = re.compile(r"/derby/" +
++                       self.story.getMetadata('storyId')+"[a-z]?.htm$")
++#        print(m.pattern)
++#        print(archivesoup)
++        a = archivesoup.find('a', href=m)
++
++        return a
++
++    def getGenre(self, url):
++        if re.search('id=E', url):
++            genre = 'Epilogue Abbey'
++        else:
++            genre = 'Fantasia Gallery'
++        self.story.addToList('genre', genre)
++
++    def getItemFromArchivePage(self):
++
++        urls = ["https://www.dwiggie.com/toc/index.php?id=E&page=all&comp=n",
++                "https://www.dwiggie.com/toc/index.php?id=F&page=all&comp=n"]
++        for url in urls:
++            a = self.tryArchivePage(url)
++            if a is not None:
++                self.getGenre(url)
++                return a.parent
++        else:
++            return None
++
++    def getMetaFromSearch(self):
++
++        params = {}
++        params['title_name'] = self.story.getMetadata('title')
++
++        searchUrl = "https://" + self.getSiteDomain() + "/toc/search.php"
++
++        d = self._postUrl(searchUrl, params)
++#        print(d)
++
++        searchsoup = self.make_soup(d)
++        m = re.compile(r"/derby/" + self.story.getMetadata('storyId') +
++                       "[a-z]?.htm$")
++#        print(m.pattern)
++#        print(self.story.getMetadata('storyId'))
++        a = searchsoup.find('a', href=m)
++
++        return a
++
++    def getChaptersFromPage(self, url):
++        try:
++            data = self.get_request(url)
++        except HTTPError as e:
++            if e.code == 404:
++                return []
++            else:
++                raise e
++
++        s = self.story.getMetadata('storyId').split('/')
++        s.reverse()
++        storyId_trimmed = s[0]
++
++        m = re.match('.*?<body[^>]*>(\s*<ul>)?(?P<content>.*?)(</body>|$)',
++                     data, re.DOTALL)
++        newdata = m.group('content')
++        regex = re.compile(r'<a\ href\=\"' + storyId_trimmed +
++                           '[a-z]?.htm\">(Continued\ [Ii]n\ |Continue\ [Oo]n\ [Tt]o\ )?(the\ )?([Nn]ext\ [Ss]ection|[Ss]ection\ [0-9IVXCL]+)</a>')
++        newdata = re.sub(regex, '', newdata)
++
++
++#        pagesections = filter(lambda x: x!=None, re.split('(?m)<hr( \/)?>|<p>\s*<hr( \/)?>\s*<\/p>', newdata, re.MULTILINE))
++#        pagesections = filter(lambda x: x!=None, re.split('(?m)(<p>\s*)*<hr( \/)?>(\s*<\/p>)?', newdata, re.MULTILINE))
++        pagesections = filter(lambda x: x != None, re.split('<hr( \/)?>', newdata))
++        pagesections = filter(lambda x: x.strip() != '/', pagesections)
++#        regex = re.compile(r'(href\="'+storyId_trimmed+'[a-z]?.htm$"')
++#        pagesections = filter(lambda x: re.search(re.compile(storyId_trimmed + "[a-z]?.htm$"),x)==None, pagesections)
++        pagesections.pop(0)     # always remove header
++
++        regex = re.compile(r'(?m)(href\="' + storyId_trimmed +
++                           '[a-z]?.htm\"|Copyright\ held\ by\ the\ author|<p>\s*(Section\ I|Beginning),\s*</?p>)', re.MULTILINE)
++        s = filter(lambda x: regex.search(x), pagesections)
++#        print(s)
++        pagesections = filter(lambda x: not regex.search(x), pagesections)
++#        print(pagesections[0])
++        return pagesections
++
++    # Getting the chapter list and the meta data, plus 'is adult' checking.
++    def extractChapterUrlsAndMetadata(self):
++
++        url = self.url
++        meta = self.getItemFromArchivePage()
++#        print(meta)
++
++#         Title
++        t = meta.a
++        self.story.setMetadata('title', t.string.strip())
++
++#         Author
++        author = meta.find('a', 'author_link')
++        if author is not None:
++            self.story.setMetadata('author', author.string.strip())
++            self.story.setMetadata('authorId', author['href'].split('=')[1])
++            self.story.setMetadata('authorUrl', author['href'])
++            author = author.parent
++        else:
++            author = meta.i
++            self.story.setMetadata('author',
++                                   author.string.replace('Written by', '')
++                                   .strip())
++            self.story.setMetadata('authorId', 'unknown')
++            self.story.setMetadata('authorUrl', 'unknown')
++
++
++#         DateUpdated
++        dUpdate = meta.find('i', text=re.compile('Last update'))
++        du = dUpdate.replace('Last update', '').replace('.', '').strip()
++        try:
++            self.story.setMetadata('dateUpdated',
++                                   makeDate(du, self.dateformat))
++        except ValueError:
++            self.story.setMetadata('dateUpdated', makeDate(du, "%m/%d/%Y"))
++        compImg = meta.find('img', alt="Dot")
++        if compImg is not None:
++            self.story.setMetadata('status', 'Completed')
++        else:
++            self.story.setMetadata('status', 'In-Progress')
++
++
++#         Summary & Category
++#         Get the summary components from the meta listing
++        metalist = meta.contents
++        s = []
++        for x in range(0, len(metalist)-1):
++            item = metalist[x]
++            if item == author or item == compImg:
++                s = []
++                continue
++            if item == dUpdate or item == dUpdate.parent:
++                break
++            s.append(item)
++
++#         create a soup object from the summary components
++        soup = self.make_soup("<p></p>")
++        d = soup.p
++        for x in s:
++            d.append(x)
++#        print(d)
++
++#         extract category from summary text
++        desc = stripHTML(d)
++        books = re.compile(r'(?P<book>\~P&P;?\~|\~Em;?\~|\~MP;?\~|\~S\&S;?\~|\~Per;?\~|\~NA;?\~|\~Juv;?\~|\~Misc;?\~)')
++        booklist = dict({'~P&P~': 'Pride and Prejudice', '~Em~': 'Emma',
++                        '~MP~': 'Mansfield Park', '~S&S~':
++                         'Sense and Sensibility', '~Per~': 'Persuasion',
++                         '~NA~': 'Northanger Abbey', '~Juv~': 'Juvenilia',
++                         '~Misc~': 'Miscellaneous'})
++        m = re.search(books, desc)
++        print(m.group('book'))
++        book = booklist.get(m.group('book').replace(';', ''))
++        print(book)
++        self.story.addToList('category', book)
++
++
++#         assign summary info
++        desc = stripHTML(desc).replace(book, '').strip()
++        desc = re.sub('^.\s*', '', desc)
++        if desc is not None:
++            self.setDescription(url, desc)
++
++#        # Chapters (Sections in this case-don't know if we can subdivide them)
++
++#         get the last Section from the archive page link
++#        chapters = ["https://www.dwiggie.com"+t['href']]
++
++#         get the section letter from the last page
++        tempUrl = t['href']
++        if "http://thedwg.com/" in tempUrl:
++            tempUrl = tempUrl.replace("http://thedwg.com/", "/")
++        elif "http://TheDWG.com/" in tempUrl:
++            tempUrl = tempUrl.replace("http://TheDWG.com/", "/")
++        elif "https://thedwg.com/" in tempUrl:
++            tempUrl = tempUrl.replace("https://thedwg.com/", "/")
++        elif "https://TheDWG.com/" in tempUrl:
++            tempUrl = tempUrl.replace("https://TheDWG.com/", "/")
++        m = re.match("/derby/" + self.story.getMetadata('storyId') +
++                     "(?P<section>[a-z]?).htm$", tempUrl)
++        inc = m.group('section')
++        if inc == '':
++            inc = 'a'
++
++#         get the presumed list of section urls with 'lower' section letters
++        sections = []
++        baseurl = "https://www.dwiggie.com/derby/"+self.story.getMetadata('storyId')
++        extension = ".htm"
++        ordend = ord(inc)
++        ordbegin = ord('a')
++        for numinc in range(ordbegin, ordend+1):
++                inc = chr(numinc)
++                if inc == 'a':
++                    sections.append(baseurl+extension)
++                else:
++                    sections.append(baseurl+inc+extension)
++
++        # Process List of Chapters
++        # create 'dummy' urls for individual chapters in the form
++        # 'pageurl#pageindex' where page index is an index starting with 0 per
++        # page
++        c = 0
++        postdate = None
++        chapters = []
++        for x in range(0, len(sections)):
++            section = sections[x]
++            i = 0
++            for chapter in self.getChaptersFromPage(section):
++                c += 1
++                chaptersoup = self.make_soup(chapter)
++#                self.chapterUrls.append(('Chapter '+str(c),section+'#'+str(i)))
++                cUrl = section+'#'+str(i)
++                t = chaptersoup.find('font', size="+1", color="#336666")
++                ctitle = ''
++                if t is not None:
++                    ctitle = stripHTML(t)
++#                self.chapterUrls.append(('Chapter '+str(c),cUrl))
++                self.chapterUrls.append((ctitle, cUrl))
++                chapters.append((cUrl, chaptersoup))
++                if postdate is None:
++                    regex = re.compile(r'Posted\ on\:?\ (?P<date>\d{4}\-\d{2}\-\d{2}|\w+,\ \d+\ \w+\ \d{4})')
++                    # Sunday, 21 March 2004, at 6:00 a.m.
++                    m = re.search(regex, chapter)
++                    if m is not None:
++                        postdate = m.group('date')
++                i += 1
++        self.chapters = dict(chapters)
++#        print(postdate)
++        pubdate = None
++        if postdate is not None:
++            format1 = re.match(re.compile(r'\d{4}\-\d{2}\-\d{2}'), postdate)
++            format2 = re.match(re.compile(r'\w+,\ \d+\ \w+\ \d{4}'), postdate)
++            if format1 is not None:
++                pubdate = makeDate(postdate, "%Y-%m-%d")
++            if format2 is not None:
++                pubdate = makeDate(postdate, "%A, %d %B %Y")
++
++        if pubdate is None:
++            pubdate = makeDate(self.story.getMetadata('dateUpdated'),
++                               "%Y-%m-%d")
++#        print(pubdate)
++        self.story.setMetadata('datePublished', pubdate)
++#        print(self.story.getMetadata('dateUpdated'))
++#        print(self.story.getMetadata('datePublished'))
++        self.story.setMetadata('numChapters', c)
++        logger.debug("numChapters: (%s)" % self.story.getMetadata('numChapters'))
++
++    # grab the text for an individual chapter.
++    def getChapterText(self, url):
++        logger.debug('Getting chapter text from: %s' % url)
++
++        chapter = self.chapters.get(url)
++#        for c in self.chapters:
++#            if c[0] == url:
++#                chapter = c[1]
++#                chapter = self.make_soup(c[1])
++
++#        chapter = find(lambda c: c[0] == url, self.chapters)[1]
++#        page_url = url.split('#')[0]
++#        x = url.split('#')[1]
++#        if self.sectionUrl != page_url:
++#            self.sectionUrl = page_url
++#            self.section = self.getChaptersFromPage(page_url)
++#
++#        chapter = self.make_soup(self.section[int(x)])
++
++#        chapter = self.make_soup(self.getChaptersFromPage(page_url)[int(x)])
++
++        return self.utf8FromSoup(url, chapter)
diff --git a/python-fanficfare.changes b/python-fanficfare.changes
index 4f75b26..85c8a93 100644
--- a/python-fanficfare.changes
+++ b/python-fanficfare.changes
@@ -1,3 +1,32 @@
+-------------------------------------------------------------------
+Wed Feb 15 06:46:36 UTC 2023 - Matej Cepl <mcepl@suse.com>
+
+- Update to 4.20.0:
+  - Fixes for adapter_fictionlive story URLs-normalize & skip
+    unsub URL
+  - adapter_deviantartcom date changes #910, thanks bugmaschine
+  - Revamp retries for browser cache with open_pages_in_browser
+  - Fix for & in chapter title.
+  - Add r_anthmax/n_anthmax options for custom_columns_settings
+  - Fixed the Deviantart adapter not detecting that a Deviation
+    is Marked as needing a login, thanks bugmaschine
+  - Skip day of week for localization in browsercache_firefox2
+  - Move makeDate to dateutils to call from browsercache_firefox2
+  - adapter_mediaminerorg: Updates for site changes
+  - adapter_fastnovelsnet: Fixes for site changes -- tested with
+    use_flaresolverr_proxy
+  - Update language->langcode mapping for updated AO3 list
+  - Browser Cache Refactor & open_pages_in_browser feature
+  - See PR #905 and this MR post.
+  - Fixes for config base_xenforo options, closes #902
+  - Fix for adapter_quotevcom status
+  - Equalize ok/cancel buttons on user/pass & email pass dialogs
+  - adapter_ficbooknet: Site change for status + remove debug
+  - Tweak for adapter_storiesonlinenet description parsing
+- Add adapter_dwiggiecom.patch (gh#JimmXinu/FanFicFare#903)
+  for openSUSE-only (reasons are in the patch) support for
+  dwiggie.com.
+
 -------------------------------------------------------------------
 Mon Jan  2 07:40:26 UTC 2023 - Matej Cepl <mcepl@suse.com>
 
diff --git a/python-fanficfare.spec b/python-fanficfare.spec
index 6dcec7f..f342e2f 100644
--- a/python-fanficfare.spec
+++ b/python-fanficfare.spec
@@ -20,13 +20,16 @@
 %define modnamedown fanficfare
 %define skip_python2 1
 Name:           python-fanficfare
-Version:        4.19.0
+Version:        4.20.0
 Release:        0
 Summary:        Tool for making eBooks from stories on fanfiction and other web sites
 License:        GPL-3.0-only
 Group:          Development/Languages/Python
 URL:            https://github.com/JimmXinu/FanFicFare
 Source:         https://github.com/JimmXinu/%{modname}/archive/v%{version}/%{modname}-%{version}.tar.gz
+# PATCH-FEATURE-OPENSUSE adapter_dwiggiecom.patch gh#JimmXinu/FanFicFare#903 mcepl@suse.com
+# adapter for dwiggie.com, which is probably not for upstream
+Patch0:         adapter_dwiggiecom.patch
 BuildRequires:  %{python_module beautifulsoup4}
 BuildRequires:  %{python_module chardet}
 BuildRequires:  %{python_module cloudscraper}