1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| import requests import parsel import os
for page in range(1, 2): print(f'===========正在抓取第{page}页数据==============')
base_url = f'https://www.leshetu.top/xz/slct/page/{page}'
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', 'Referer':'https://www.leshetu.me/xz/slct'
}
response = requests.get(url=base_url, headers=headers) html = response.text
selector = parsel.Selector(html)
divs = selector.xpath('//div[@class="row posts-wrapper"]/div')
for div in divs: pic_title = div.xpath('.//h2/a/text()').get() pic_url = div.xpath('.//h2/a/@href').get() print('正在下载相册:', pic_title)
if not os.path.exists('C:/Users/13089/Desktop/images\\' + pic_title): os.mkdir('C:/Users/13089/Desktop/images\\' + pic_title)
try:
html_2 = requests.get(url=pic_url, headers=headers).text except: continue selector_2 = parsel.Selector(html_2) img_url_list = selector_2.xpath('//div[@class="entry-content u-text-format u-clearfix"]/p//img/@src').getall()
for img_url in img_url_list: try: img_data = requests.get(url=img_url, headers=headers).content except: continue
file_name = img_url.split('/')[-1]
with open(f'C:/Users/13089/Desktop\images//{pic_title}//{file_name}' + file_name, mode='wb') as f: f.write(img_data) print('下载完成:', file_name)
|