From 08ae7b8221fa322bc21dee1494174f56b67517ad Mon Sep 17 00:00:00 2001 From: David Luevano <55825613+luevano@users.noreply.github.com> Date: Fri, 11 Oct 2019 22:04:03 -0600 Subject: File restructure and bug fix. --- tests/basic_tests.py | 32 --------------- tirante/__init__.py | 0 tirante/__main__.py | 24 +++++++++++ tirante/basic_tests.py | 71 +++++++++++++++++++++++++++++++++ tirante/gcl.py | 87 ++++++++++++++++++++++++++++++++++++++++ tirante/tirante.py | 105 ++++++++----------------------------------------- 6 files changed, 198 insertions(+), 121 deletions(-) delete mode 100644 tests/basic_tests.py create mode 100644 tirante/__init__.py create mode 100644 tirante/__main__.py create mode 100644 tirante/basic_tests.py create mode 100644 tirante/gcl.py diff --git a/tests/basic_tests.py b/tests/basic_tests.py deleted file mode 100644 index d7bf557..0000000 --- a/tests/basic_tests.py +++ /dev/null @@ -1,32 +0,0 @@ -# create_database() -# update_database() -# download_manga() - -# print(os.listdir()) - -# chapter_csv_to_df(chapter_csv='kimetsu_no_yaiba.csv') -# download_manga() - -# chapter_list = get_chapters_list() -# for chapter in chapter_list: -# print(chapter) - -# chapters_list_to_csv(chapters_list=chapter_list) - -# os.chdir('data/kimetsu_no_yaiba') -# for image in chapter_image_csv_to_list('chapter_1_cruelty.csv'): -# print(image) - -# chapters_list = chapters_csv_to_list(chapter_csv='kimetsu_no_yaiba.csv') - -# chapter_image_list_to_csv(chapters_list[0]) - - -# first_chapter_img_url_list = get_chapter_image_list(chapter_list[0]) - -# download_image(first_chapter_img_url_list[0]) - - -# os.chdir(MANGA_DIR) - -# download_chapter(first_chapter_img_url_list) diff --git a/tirante/__init__.py b/tirante/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tirante/__main__.py b/tirante/__main__.py new file mode 100644 index 0000000..be5f1b0 --- /dev/null +++ b/tirante/__main__.py @@ -0,0 +1,24 @@ +"""MIT License + +Copyright (c) 2019 David Luevano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +if __name__ == "__main__": + pass diff --git a/tirante/basic_tests.py b/tirante/basic_tests.py new file mode 100644 index 0000000..3259575 --- /dev/null +++ b/tirante/basic_tests.py @@ -0,0 +1,71 @@ +"""MIT License + +Copyright (c) 2019 David Luevano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import tirante + +# Main manga source. +MAIN_URL = 'https://manganelo.com/manga/' +# Manga name. +MANGA_NAME = 'Kimetsu no Yaiba' +# Manga name in the form of how appears in the url. +MANGA_NAME_URL = 'kimetsu_no_yaiba/' + +# PC main file location. +MANGA_DIR = 'E:\\Mangas\\' +# PC main manga data location. +MANGA_DATA_DIR = ''.join(['C:\\Users\\Lorentzeus\\Google Drive\\', + 'Personal\\Python\\tirante\\test_data']) + +tirante.create_database(main_url=MAIN_URL, + manga_name_url=MANGA_NAME_URL, + manga_name=MANGA_NAME) +# update_database() +# download_manga() + +# print(os.listdir()) + +# chapter_csv_to_df(chapter_csv='kimetsu_no_yaiba.csv') +# download_manga() + +# chapter_list = get_chapters_list() +# for chapter in chapter_list: +# print(chapter) + +# chapters_list_to_csv(chapters_list=chapter_list) + +# os.chdir('data/kimetsu_no_yaiba') +# for image in chapter_image_csv_to_list('chapter_1_cruelty.csv'): +# print(image) + +# chapters_list = chapters_csv_to_list(chapter_csv='kimetsu_no_yaiba.csv') + +# chapter_image_list_to_csv(chapters_list[0]) + + +# first_chapter_img_url_list = get_chapter_image_list(chapter_list[0]) + +# download_image(first_chapter_img_url_list[0]) + + +# os.chdir(MANGA_DIR) + +# download_chapter(first_chapter_img_url_list) diff --git a/tirante/gcl.py b/tirante/gcl.py new file mode 100644 index 0000000..6956e7c --- /dev/null +++ b/tirante/gcl.py @@ -0,0 +1,87 @@ +"""MIT License + +Copyright (c) 2019 David Luevano + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import urllib3 +from bs4 import BeautifulSoup + + +def get_chapters_list(main_url, + manga_name_url, + manga_name, + reverse_sorted=True): + """ + Retrieves chapter urls and names. Returns a list of lists + containing the url and the title of the chapter. + main_url: Main webpage name (source). + manga_name_url: Name of the manga in the url format + that's used by the webpage. + manga_name: Actual name of the manga, as it appears in the webpage. + reverse_sorted: Sorting of the final array. + """ + + manga_url = ''.join([main_url, manga_name_url]) + + # Not actually a file, but the content of the html. + html = urllib3.PoolManager().request('GET', manga_url) + + # Get the data from the html and parse it. + soup = BeautifulSoup(html.data, 'html.parser') + + # Get the "rows" class, this contains the url + # and title data for each chapter. + # Deletes the first tag, since it's not useful. + soup_rows = soup.find_all('div', {'class': 'row'}) + del soup_rows[0] + + # Creates a list to store date for each url and chapter name. + chapter_list = [] + + for row in soup_rows: + + # Gets the url name from the a tag. + href = row.a['href'] + # Same, for the title. Deletes every ocurrance of the manga name, + # unwanted characters and then gets everyword. + title_words = row.a['title'].replace(manga_name, '').replace('?', '') + title_words = title_words.replace(':', '').replace('-', '') + title_words = title_words.replace('...', '').replace(',', '').split() + + # Doing all the work in oneliner doesn't work for some chapters, + # for some reason. + # title = '_'.join(row.a['title'].replace(manga_name, '') + # .replace(':', '').replace('-', '').lower().split()) + + # Lowers every word and appends it to a new list, + # then it gets joined with '_' as a sep. + title_words_lower = [] + for word in title_words: + title_words_lower.append(word.lower()) + + title = '_'.join(title_words_lower) + + # print(href, title) + chapter_list.append([href, title]) + + if reverse_sorted: + return chapter_list[::-1] + else: + return chapter_list diff --git a/tirante/tirante.py b/tirante/tirante.py index b29ef5b..c179475 100644 --- a/tirante/tirante.py +++ b/tirante/tirante.py @@ -25,85 +25,12 @@ import urllib3 from bs4 import BeautifulSoup import requests -# Main manga source. -MAIN_URL = 'https://manganelo.com/manga/' -# Manga name. -MANGA_NAME = 'Kimetsu no Yaiba' -# Manga name in the form of how appears in the url. -MANGA_NAME_URL = 'kimetsu_no_yaiba/' - -# PC main file location. -MANGA_DIR = 'E:\\Mangas\\' -# PC main manga data location. -MANGA_DATA_DIR = ''.join(['C:\\Users\\Lorentzeus\\Google Drive\\', - 'Personal\\Python\\mangas\\test_data']) - - -def get_chapters_list(main_url=MAIN_URL, - manga_name_url=MANGA_NAME_URL, - manga_name=MANGA_NAME, - reverse_sorted=True): - """ - Retrieves chapter urls and names. Returns a list of lists - containing the url and the title of the chapter. - main_url: Main webpage name (source). - manga_name_url: Name of the manga in the url format - that's used by the webpage. - manga_name: Actual name of the manga, as it appears in the webpage. - reverse_sorted: Sorting of the final array. - """ - - manga_url = ''.join([MAIN_URL, MANGA_NAME_URL]) - - # Not actually a file, but the content of the html. - html = urllib3.PoolManager().request('GET', manga_url) - - # Get the data from the html and parse it. - soup = BeautifulSoup(html.data, 'html.parser') - - # Get the "rows" class, this contains the url - # and title data for each chapter. - # Deletes the first tag, since it's not useful. - soup_rows = soup.find_all('div', {'class': 'row'}) - del soup_rows[0] - - # Creates a list to store date for each url and chapter name. - chapter_list = [] - - for row in soup_rows: - - # Gets the url name from the a tag. - href = row.a['href'] - # Same, for the title. Deletes every ocurrance of the manga name, - # unwanted characters and then gets everyword. - title_words = row.a['title'].replace(manga_name, '').replace('?', '') - title_words = title_words.replace(':', '').replace('-', '') - title_words = title_words.replace('...', '').replace(',', '').split() - - # Doing all the work in oneliner doesn't work for some chapters, - # for some reason. - # title = '_'.join(row.a['title'].replace(manga_name, '') - # .replace(':', '').replace('-', '').lower().split()) - - # Lowers every word and appends it to a new list, - # then it gets joined with '_' as a sep. - title_words_lower = [] - for word in title_words: - title_words_lower.append(word.lower()) - - title = '_'.join(title_words_lower) - - # print(href, title) - chapter_list.append([href, title]) - - if reverse_sorted: - return chapter_list[::-1] - else: - return chapter_list +# Project specific imports. +from gcl import get_chapters_list def chapters_list_to_csv(chapters_list, - manga_name=MANGA_NAME): + manga_name): """ Creates a csv file from the input chapter_list. chapters_list: List of data of the chapters. @@ -200,11 +127,11 @@ def chapter_image_csv_to_list(chapter_image_csv): return out_chapter_image_list -def create_database(main_url=MAIN_URL, - manga_name_url=MANGA_NAME_URL, - manga_name=MANGA_NAME, - manga_dir=MANGA_DIR, - manga_data_dir=MANGA_DATA_DIR): +def create_database(main_url, + manga_name_url, + manga_name, + manga_dir, + manga_data_dir): """ Creates a database from zero, made of csv files. main_url: Main webpage name (source). @@ -258,11 +185,11 @@ def create_database(main_url=MAIN_URL, print(''.join([chapter_name_ext, ' already exists.'])) -def update_database(main_url=MAIN_URL, - manga_name_url=MANGA_NAME_URL, - manga_name=MANGA_NAME, - manga_dir=MANGA_DIR, - manga_data_dir=MANGA_DATA_DIR): +def update_database(main_url, + manga_name_url, + manga_name, + manga_dir, + manga_data_dir): """ Updates the database already created, adding missing ones. main_url: Main webpage name (source). @@ -357,9 +284,9 @@ def download_chapter(image_list): download_image(image) -def download_manga(manga_name=MANGA_NAME, - manga_dir=MANGA_DIR, - manga_data_dir=MANGA_DATA_DIR): +def download_manga(manga_name, + manga_dir, + manga_data_dir): """ Downloads a whole manga, saving it to subfolders. Uses the database already created. -- cgit v1.2.3-70-g09d2