1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
| # -*- coding: UTF-8 -*-
from selenium import webdriver
from bs4 import BeautifulSoup
import requests, threading
def download_img(img, num):
r = requests.get(img)
with open(save_url + str(num+1) + '.jpg', 'wb') as f:
f.write(r.content)
if __name__ == "__main__":
save_url = './test/'
url = 'https://www.ohmanhua.com/13621/1/1.html'
chop = webdriver.ChromeOptions()
chop.add_extension('Adblock-Plus_v3.8.4.crx')
browser = webdriver.Chrome(options = chop)
browser.implicitly_wait(10)
browser.get(url)
soup = BeautifulSoup(browser.page_source, 'lxml')
img_count = int(soup.find('select', {'class': 'mh_select'}).find_all('option')[-1].get('value'))
img_url = soup.find_all('div', {'class': 'mh_comicpic'})[0].find('img').get('src')
if img_url[0] == '/':
img_url = 'https:' + img_url
m = len(img_url.rsplit('/', 1)[1].split('.')[0])
img_url = img_url.rsplit('/', 1)[0] + '/'
th_list = []
for i in range(img_count):
img = img_url + str(i + 1).zfill(m) + '.jpg'
th = threading.Thread(target=download_img, args=(img, i))
th.start()
th_list.append(th)
for th in th_list:
th.join()
browser.quit()
|