1 files changed, 35 insertions, 0 deletions
diff --git a/src/tirante/get_chapter_image_list.py b/src/tirante/get_chapter_image_list.py
new file mode 100644
index 0000000..a0881c6
--- /dev/null
+++ b/src/tirante/get_chapter_image_list.py
@@ -0,0 +1,35 @@
+import urllib3
+from bs4 import BeautifulSoup
+
+
+def get_chapter_image_list(chapter_data):
+    """
+    Gets the links for each image in the chapter,
+    and returns a list of the links.
+    Returns a list of the image urls and its file name.
+    chapter_data: A list containing a url and a title.
+    NOTE: Not for direct use with the result of 'get_chapters_list'
+    """
+
+    # Not actually a file, but the content of the html.
+    html = urllib3.PoolManager().request('GET', chapter_data[0])
+
+    # Get the data from the html and parse it.
+    soup = BeautifulSoup(html.data, 'html.parser')
+
+    # Get the "vung-doc" class, this contains a url for each page,
+    # which redirects to the source of the image.
+    # Deletes the first and last items, since they're trash.
+    soup_img = soup.find_all('img')
+    del soup_img[0]
+    del soup_img[len(soup_img)-1]
+
+    # Stores each image url in a list.
+    image_url_list = []
+    for img in soup_img:
+        # Gets the sring of the url, splits it by the char '/',
+        # and gets the last item, which is the name of the file.
+
+        image_url_list.append([img['src'], img['src'].split('/')[-1]])
+
+    return image_url_list