diff options
author | David Luevano <55825613+luevano@users.noreply.github.com> | 2019-10-11 22:34:27 -0600 |
---|---|---|
committer | David Luevano <55825613+luevano@users.noreply.github.com> | 2019-10-11 22:34:27 -0600 |
commit | 82f3f961b9342a3d5eeeb578cef2aae89e61b074 (patch) | |
tree | 69622026f4948edbfddd3f880180e177bed5c7e1 | |
parent | 08ae7b8221fa322bc21dee1494174f56b67517ad (diff) |
Add version.py, update __init__.py
-rw-r--r-- | tirante/__init__.py | 32 | ||||
-rw-r--r-- | tirante/gcl.py | 87 | ||||
-rw-r--r-- | tirante/tirante.py | 65 | ||||
-rw-r--r-- | tirante/version.py | 23 |
4 files changed, 119 insertions, 88 deletions
diff --git a/tirante/__init__.py b/tirante/__init__.py index e69de29..e42515c 100644 --- a/tirante/__init__.py +++ b/tirante/__init__.py @@ -0,0 +1,32 @@ +"""MIT License + +Copyright (c) 2019 David Luevano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +from .version import __version__ +from .tirante import create_database +from .tirante import update_database +from .tirante import download_manga + +# If somebody does "from package import *", this is what they will +# be able to access: +__all__ = [create_database, + update_database, + download_manga] diff --git a/tirante/gcl.py b/tirante/gcl.py deleted file mode 100644 index 6956e7c..0000000 --- a/tirante/gcl.py +++ /dev/null @@ -1,87 +0,0 @@ -"""MIT License - -Copyright (c) 2019 David Luevano - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import urllib3 -from bs4 import BeautifulSoup - - -def get_chapters_list(main_url, - manga_name_url, - manga_name, - reverse_sorted=True): - """ - Retrieves chapter urls and names. Returns a list of lists - containing the url and the title of the chapter. - main_url: Main webpage name (source). - manga_name_url: Name of the manga in the url format - that's used by the webpage. - manga_name: Actual name of the manga, as it appears in the webpage. - reverse_sorted: Sorting of the final array. - """ - - manga_url = ''.join([main_url, manga_name_url]) - - # Not actually a file, but the content of the html. - html = urllib3.PoolManager().request('GET', manga_url) - - # Get the data from the html and parse it. - soup = BeautifulSoup(html.data, 'html.parser') - - # Get the "rows" class, this contains the url - # and title data for each chapter. - # Deletes the first tag, since it's not useful. - soup_rows = soup.find_all('div', {'class': 'row'}) - del soup_rows[0] - - # Creates a list to store date for each url and chapter name. - chapter_list = [] - - for row in soup_rows: - - # Gets the url name from the a tag. - href = row.a['href'] - # Same, for the title. Deletes every ocurrance of the manga name, - # unwanted characters and then gets everyword. - title_words = row.a['title'].replace(manga_name, '').replace('?', '') - title_words = title_words.replace(':', '').replace('-', '') - title_words = title_words.replace('...', '').replace(',', '').split() - - # Doing all the work in oneliner doesn't work for some chapters, - # for some reason. - # title = '_'.join(row.a['title'].replace(manga_name, '') - # .replace(':', '').replace('-', '').lower().split()) - - # Lowers every word and appends it to a new list, - # then it gets joined with '_' as a sep. - title_words_lower = [] - for word in title_words: - title_words_lower.append(word.lower()) - - title = '_'.join(title_words_lower) - - # print(href, title) - chapter_list.append([href, title]) - - if reverse_sorted: - return chapter_list[::-1] - else: - return chapter_list diff --git a/tirante/tirante.py b/tirante/tirante.py index c179475..3800b18 100644 --- a/tirante/tirante.py +++ b/tirante/tirante.py @@ -26,7 +26,70 @@ from bs4 import BeautifulSoup import requests # Project specific imports. -from gcl import get_chapters_list +# from .gcl import get_chapters_list + + +def get_chapters_list(main_url, + manga_name_url, + manga_name, + reverse_sorted=True): + """ + Retrieves chapter urls and names. Returns a list of lists + containing the url and the title of the chapter. + main_url: Main webpage name (source). + manga_name_url: Name of the manga in the url format + that's used by the webpage. + manga_name: Actual name of the manga, as it appears in the webpage. + reverse_sorted: Sorting of the final array. + """ + + manga_url = ''.join([main_url, manga_name_url]) + + # Not actually a file, but the content of the html. + html = urllib3.PoolManager().request('GET', manga_url) + + # Get the data from the html and parse it. + soup = BeautifulSoup(html.data, 'html.parser') + + # Get the "rows" class, this contains the url + # and title data for each chapter. + # Deletes the first tag, since it's not useful. + soup_rows = soup.find_all('div', {'class': 'row'}) + del soup_rows[0] + + # Creates a list to store date for each url and chapter name. + chapter_list = [] + + for row in soup_rows: + + # Gets the url name from the a tag. + href = row.a['href'] + # Same, for the title. Deletes every ocurrance of the manga name, + # unwanted characters and then gets everyword. + title_words = row.a['title'].replace(manga_name, '').replace('?', '') + title_words = title_words.replace(':', '').replace('-', '') + title_words = title_words.replace('...', '').replace(',', '').split() + + # Doing all the work in oneliner doesn't work for some chapters, + # for some reason. + # title = '_'.join(row.a['title'].replace(manga_name, '') + # .replace(':', '').replace('-', '').lower().split()) + + # Lowers every word and appends it to a new list, + # then it gets joined with '_' as a sep. + title_words_lower = [] + for word in title_words: + title_words_lower.append(word.lower()) + + title = '_'.join(title_words_lower) + + # print(href, title) + chapter_list.append([href, title]) + + if reverse_sorted: + return chapter_list[::-1] + else: + return chapter_list def chapters_list_to_csv(chapters_list, diff --git a/tirante/version.py b/tirante/version.py new file mode 100644 index 0000000..59c6a2f --- /dev/null +++ b/tirante/version.py @@ -0,0 +1,23 @@ +"""MIT License + +Copyright (c) 2019 David Luevano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +__version__ = '0.0.1' |