blob: a0881c6464190b98eeecd8da23b990812f842f95 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
import urllib3
from bs4 import BeautifulSoup
def get_chapter_image_list(chapter_data):
"""
Gets the links for each image in the chapter,
and returns a list of the links.
Returns a list of the image urls and its file name.
chapter_data: A list containing a url and a title.
NOTE: Not for direct use with the result of 'get_chapters_list'
"""
# Not actually a file, but the content of the html.
html = urllib3.PoolManager().request('GET', chapter_data[0])
# Get the data from the html and parse it.
soup = BeautifulSoup(html.data, 'html.parser')
# Get the "vung-doc" class, this contains a url for each page,
# which redirects to the source of the image.
# Deletes the first and last items, since they're trash.
soup_img = soup.find_all('img')
del soup_img[0]
del soup_img[len(soup_img)-1]
# Stores each image url in a list.
image_url_list = []
for img in soup_img:
# Gets the sring of the url, splits it by the char '/',
# and gets the last item, which is the name of the file.
image_url_list.append([img['src'], img['src'].split('/')[-1]])
return image_url_list
|