summaryrefslogtreecommitdiff
path: root/tirante.py
diff options
context:
space:
mode:
authorDavid Luevano <55825613+luevano@users.noreply.github.com>2019-10-11 20:45:44 -0600
committerDavid Luevano <55825613+luevano@users.noreply.github.com>2019-10-11 20:45:44 -0600
commit9a9ee70ffcfa1b695cea18a019f7d510c14e6b12 (patch)
treeebc2cd9aa2237135c6954cefe1bacae04556dde6 /tirante.py
parent17bef995a65d18fddb19dba11243853cf6ae2233 (diff)
Restructure files.
Diffstat (limited to 'tirante.py')
-rw-r--r--tirante.py486
1 files changed, 0 insertions, 486 deletions
diff --git a/tirante.py b/tirante.py
deleted file mode 100644
index 0781ed2..0000000
--- a/tirante.py
+++ /dev/null
@@ -1,486 +0,0 @@
-"""MIT License
-
-Copyright (c) 2019 David Luevano
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-import os
-import urllib3
-from bs4 import BeautifulSoup
-import requests
-
-# Main manga source.
-MAIN_URL = 'https://manganelo.com/manga/'
-# Manga name.
-MANGA_NAME = 'Kimetsu no Yaiba'
-# Manga name in the form of how appears in the url.
-MANGA_NAME_URL = 'kimetsu_no_yaiba/'
-
-# PC main file location.
-MANGA_DIR = 'E:\\Mangas\\'
-# PC main manga data location.
-MANGA_DATA_DIR = ''.join(['C:\\Users\\Lorentzeus\\Google Drive\\',
- 'Personal\\Python\\mangas\\data'])
-
-
-def get_chapters_list(main_url=MAIN_URL,
- manga_name_url=MANGA_NAME_URL,
- manga_name=MANGA_NAME,
- reverse_sorted=True):
- """
- Retrieves chapter urls and names. Returns a list of lists
- containing the url and the title of the chapter.
- main_url: Main webpage name (source).
- manga_name_url: Name of the manga in the url format
- that's used by the webpage.
- manga_name: Actual name of the manga, as it appears in the webpage.
- reverse_sorted: Sorting of the final array.
- """
-
- manga_url = ''.join([MAIN_URL, MANGA_NAME_URL])
-
- # Not actually a file, but the content of the html.
- html = urllib3.PoolManager().request('GET', manga_url)
-
- # Get the data from the html and parse it.
- soup = BeautifulSoup(html.data, 'html.parser')
-
- # Get the "rows" class, this contains the url
- # and title data for each chapter.
- # Deletes the first tag, since it's not useful.
- soup_rows = soup.find_all('div', {'class': 'row'})
- del soup_rows[0]
-
- # Creates a list to store date for each url and chapter name.
- chapter_list = []
-
- for row in soup_rows:
-
- # Gets the url name from the a tag.
- href = row.a['href']
- # Same, for the title. Deletes every ocurrance of the manga name,
- # unwanted characters and then gets everyword.
- title_words = row.a['title'].replace(manga_name, '').replace('?', '')
- title_words = title_words.replace(':', '').replace('-', '')
- title_words = title_words.replace('...', '').replace(',', '').split()
-
- # Doing all the work in oneliner doesn't work for some chapters,
- # for some reason.
- # title = '_'.join(row.a['title'].replace(manga_name, '')
- # .replace(':', '').replace('-', '').lower().split())
-
- # Lowers every word and appends it to a new list,
- # then it gets joined with '_' as a sep.
- title_words_lower = []
- for word in title_words:
- title_words_lower.append(word.lower())
-
- title = '_'.join(title_words_lower)
-
- # print(href, title)
- chapter_list.append([href, title])
-
- if reverse_sorted:
- return chapter_list[::-1]
- else:
- return chapter_list
-
-
-def chapters_list_to_csv(chapters_list,
- manga_name=MANGA_NAME):
- """
- Creates a csv file from the input chapter_list.
- chapters_list: List of data of the chapters.
- manga_name: Name of the manga, folder naming friendly.
- """
-
- # Adding '.csv' for csv creation.
- m_name_ext = ''.join([manga_name, '.csv'])
- # print(m_name)
-
- with open(m_name_ext, 'w') as outcsv:
- for chapter in chapters_list:
- outcsv.write(''.join([chapter[0], ',', chapter[1], '\n']))
-
-
-def chapters_csv_to_list(chapter_csv):
- """
- Gives a list of chaptesrs from a csv file.
- chapters_list: List of data of the chapters.
- """
-
- out_chapters_list = []
-
- with open(chapter_csv, 'r') as incsv:
- lines = incsv.readlines()
- for line in lines:
- out_chapters_list.append(line.strip().split(','))
-
- return out_chapters_list
-
-
-def get_chapter_image_list(chapter_data):
- """
- Gets the links for each image in the chapter,
- and returns a list of the links.
- Returns a list of the image urls and its file name.
- chapter_data: A list containing a url and a title.
- NOTE: Not for direct use with the result of 'get_chapters_list'
- """
-
- # Not actually a file, but the content of the html.
- html = urllib3.PoolManager().request('GET', chapter_data[0])
-
- # Get the data from the html and parse it.
- soup = BeautifulSoup(html.data, 'html.parser')
-
- # Get the "vung-doc" class, this contains a url for each page,
- # which redirects to the source of the image.
- # Deletes the first and last items, since they're trash.
- soup_img = soup.find_all('img')
- del soup_img[0]
- del soup_img[len(soup_img)-1]
-
- # Stores each image url in a list.
- image_url_list = []
- for img in soup_img:
- # Gets the sring of the url, splits it by the char '/',
- # and gets the last item, which is the name of the file.
-
- image_url_list.append([img['src'], img['src'].split('/')[-1]])
-
- return image_url_list
-
-
-def chapter_image_list_to_csv(chapter_data):
- """
- Creates csv file for a chapter, given the list.
- chapter_data: A list containing a url and a title.
- """
-
- ch_name = ''.join([chapter_data[1], '.csv'])
-
- chapter_image_list = get_chapter_image_list(chapter_data)
-
- with open(ch_name, 'w') as outcsv:
- for image in chapter_image_list:
- outcsv.write(''.join([image[0], ',', image[1], '\n']))
-
-
-def chapter_image_csv_to_list(chapter_image_csv):
- """
- Returns a list given the csv file.
- chapter_image_csv: csv containing data for the chapter.
- """
-
- out_chapter_image_list = []
-
- with open(chapter_image_csv, 'r') as incsv:
- lines = incsv.readlines()
- for line in lines:
- # print(line.strip().split(','))
- out_chapter_image_list.append(line.strip().split(','))
-
- return out_chapter_image_list
-
-
-def create_database(main_url=MAIN_URL,
- manga_name_url=MANGA_NAME_URL,
- manga_name=MANGA_NAME,
- manga_dir=MANGA_DIR,
- manga_data_dir=MANGA_DATA_DIR):
- """
- Creates a database from zero, made of csv files.
- main_url: Main webpage name (source).
- manga_name_url: Name of the manga in the url format
- that's used by the webpage.
- manga_name: Actual name of the manga, as it appears in the webpage.
- manga_dir: Main manga folder in computer, subfolders here will be created.
- manga_data_dir: Main manga data folder in computer.
- NOTE: This does not updates the database.
- If a database already exists, omits the creation of new files.
- """
-
- # A better "naming" for the manga, for use with folder creation.
- # As well as the name of the main database.
- m_name = '_'.join(word.lower() for word in manga_name.split())
- m_name_ext = ''.join([m_name, '.csv'])
-
- # Navigate to where the main data folder is,
- # then to where the manga folder is.
- os.chdir(manga_data_dir)
- try:
- os.mkdir(m_name)
- os.chdir(m_name)
- except FileExistsError:
- print(''.join([m_name,
- ' folder already exists.']))
- os.chdir(m_name)
-
- # List of files and folders in the current path.
- data_list_dir = os.listdir()
-
- # Get the list of chapters, if this already exists,
- # read it from the database.
- # This is the main manga data.
- if m_name_ext not in data_list_dir:
- chapters_list = get_chapters_list(main_url=main_url,
- manga_name_url=manga_name_url,
- manga_name=manga_name)
- chapters_list_to_csv(chapters_list=chapters_list, manga_name=m_name)
- else:
- print(''.join([m_name_ext, ' already exists.']))
- chapters_list = chapters_csv_to_list(m_name_ext)
-
- # Data for each chapter.
- for chapter in chapters_list:
- # Get the list for the images of each chapter.
- chapter_name_ext = ''.join([chapter[1], '.csv'])
- if chapter_name_ext not in data_list_dir:
- chapter_image_list_to_csv(chapter)
- else:
- print(''.join([chapter_name_ext, ' already exists.']))
-
-
-def update_database(main_url=MAIN_URL,
- manga_name_url=MANGA_NAME_URL,
- manga_name=MANGA_NAME,
- manga_dir=MANGA_DIR,
- manga_data_dir=MANGA_DATA_DIR):
- """
- Updates the database already created, adding missing ones.
- main_url: Main webpage name (source).
- manga_name_url: Name of the manga in the url format
- that's used by the webpage.
- manga_name: Actual name of the manga, as it appears in the webpage.
- manga_dir: Main manga folder in computer, subfolders here will be created.
- manga_data_dir: Main manga data folder in computer.
- """
-
- # A better "naming" for the manga, for use with folder creation.
- # As well as the name of the main database.
- m_name = '_'.join(word.lower() for word in manga_name.split())
- m_name_ext = ''.join([m_name, '.csv'])
-
- # Navigate to where the main data folder is,
- # then to where the manga folder is.
- os.chdir(manga_data_dir)
- try:
- os.mkdir(m_name)
- os.chdir(m_name)
- except FileExistsError:
- print(''.join([m_name,
- ' folder already exists.']))
- os.chdir(m_name)
-
- # Get a list of files present in path.
- data_list_dir = os.listdir()
-
- # First, download the data from the web.
- new_chapter_list = get_chapters_list(main_url=main_url,
- manga_name_url=manga_name_url,
- manga_name=manga_name,
- reverse_sorted=False)
-
- # And then, read the current database.
- last_chapter = chapters_csv_to_list(m_name_ext)[-1]
-
- # The missing chapters.
- missing_chapters = []
- for chapter in new_chapter_list:
- # If we get to the last acquired chapter, exit loop.
- if chapter == last_chapter:
- break
- missing_chapters.append(chapter)
-
- # Reverse the order.
- missing_chapters = missing_chapters[::-1]
-
- # Write the last chapters to already existing csv file.
- # No need for checking if items are present,
- # since that's checked on the last steps,
- # that's how missing_chapters is acquired.
- with open(m_name_ext, 'a') as outcsv:
- for chapter in missing_chapters:
- outcsv.write(''.join([chapter[0], ',', chapter[1], '\n']))
-
- # Create the missing csv data files for each chapter.
- for chapter in missing_chapters:
- # Get the list for the images of each chapter.
- chapter_name_ext = ''.join([chapter[1], '.csv'])
- if chapter_name_ext not in data_list_dir:
- chapter_image_list_to_csv(chapter)
- else:
- print(''.join([chapter_name_ext, ' already exists.']))
-
-
-def download_image(image_list):
- """
- Downloads an image from the specified url,
- and saves it with the specified name.
- image_list: list that contains url and name.
- """
-
- # Gets the content of an image rom its url.
- img_data = requests.get(image_list[0]).content
-
- # Opens a file with its corresponding name as 'wb' (write, binary),
- # and then, writes the img_data.
- with open(image_list[1], 'wb') as handler:
- handler.write(img_data)
-
-
-def download_chapter(image_list):
- """
- Downloads the whole chapter as images.
- image_url_list: List containing urls and file names for each image.
- """
-
- for image in image_list:
- print(image)
- download_image(image)
-
-
-def download_manga(manga_name=MANGA_NAME,
- manga_dir=MANGA_DIR,
- manga_data_dir=MANGA_DATA_DIR):
- """
- Downloads a whole manga, saving it to subfolders.
- Uses the database already created.
- that's used by the webpage.
- manga_name: Actual name of the manga, as it appears in the webpage.
- manga_dir: Main manga folder in computer, subfolders here will be created.
- manga_data_dir: Main manga data folder in computer.
- NOTE: This updates the manga, downloading the missing chapters
- if they're listed in the database.
- """
-
- # A better "naming" for the manga, for use with folder creation.
- # As well as the name of the main database.
- m_name = '_'.join(word.lower() for word in manga_name.split())
- m_name_ext = ''.join([m_name, '.csv'])
-
- # Go to where the database is located.
- os.chdir(manga_data_dir)
- try:
- os.chdir(m_name)
- except FileNotFoundError:
- print(''.join([m_name,
- ' folder doesn\'t exist.',
- ' Most likely, the database hasn\'t been created.']))
- raise NameError('Create database first.')
-
- # Get info of the files in the database.
- data_list_dir = os.listdir()
-
- # Reads data from the main database.
- if m_name_ext not in data_list_dir:
- print(''.join([m_name,
- ' database hasn\'t been created.',
- ' Most likely, the database hasn\'t been created.']))
- raise NameError('Create database first.')
- else:
- chapters_list = chapters_csv_to_list(m_name_ext)
-
- # Navigate to the main manga dir,
- # and either create or go to manga folder.
- os.chdir(manga_dir)
- try:
- os.mkdir(manga_name)
- os.chdir(manga_name)
- except FileExistsError:
- print(''.join([manga_name,
- ' folder already exists.']))
- os.chdir(manga_name)
-
- # Get data of the folders in the manga folder.
- manga_list_dir = os.listdir()
-
- for chapter in chapters_list:
- # chapter_url = chapter[0]
- chapter_name = chapter[1]
- ch_name_ext = ''.join([chapter_name, '.csv'])
-
- if chapter_name not in manga_list_dir:
- print(''.join(['Downloading ',
- chapter_name,
- ' now.']))
- # First, create the chapter folder.
- os.mkdir(chapter_name)
-
- # Go to where the database is located.
- os.chdir(manga_data_dir)
- os.chdir(m_name)
- chapter_image_list = chapter_image_csv_to_list(ch_name_ext)
-
- # Go back to where the manga is going ot be downloaded.
- os.chdir(manga_dir)
- os.chdir(manga_name)
- os.chdir(chapter_name)
-
- # Download all the chapter images on its respective folder.
- download_chapter(chapter_image_list)
-
- # Go back one folder to repeat the process
- # for the next chapter.
- os.chdir('..')
- else:
- print(''.join([chapter_name,
- ' already downloaded.']))
-
-
-#
-#
-# TEST STUFF
-#
-
-# create_database()
-# update_database()
-# download_manga()
-
-# print(os.listdir())
-
-# chapter_csv_to_df(chapter_csv='kimetsu_no_yaiba.csv')
-# download_manga()
-
-# chapter_list = get_chapters_list()
-# for chapter in chapter_list:
-# print(chapter)
-
-# chapters_list_to_csv(chapters_list=chapter_list)
-
-# os.chdir('data/kimetsu_no_yaiba')
-# for image in chapter_image_csv_to_list('chapter_1_cruelty.csv'):
-# print(image)
-
-# chapters_list = chapters_csv_to_list(chapter_csv='kimetsu_no_yaiba.csv')
-
-# chapter_image_list_to_csv(chapters_list[0])
-
-
-# first_chapter_img_url_list = get_chapter_image_list(chapter_list[0])
-
-# download_image(first_chapter_img_url_list[0])
-
-
-# os.chdir(MANGA_DIR)
-
-# download_chapter(first_chapter_img_url_list)
-
-print('This is a test, bitch.')