1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
   |  import requests import parsel import os
  for page in range(1, 2):     print(f'===========正在抓取第{page}页数据==============')
      base_url = f'https://www.leshetu.top/xz/slct/page/{page}'
      headers = {         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',         'Referer':'https://www.leshetu.me/xz/slct'
      }
      response = requests.get(url=base_url, headers=headers)     html = response.text     
      selector = parsel.Selector(html)
      divs = selector.xpath('//div[@class="row posts-wrapper"]/div')
      for div in divs:         pic_title = div.xpath('.//h2/a/text()').get()         pic_url = div.xpath('.//h2/a/@href').get()                  print('正在下载相册:', pic_title)
 
          if not os.path.exists('C:/Users/13089/Desktop/images\\' + pic_title):             os.mkdir('C:/Users/13089/Desktop/images\\' + pic_title)
          try:
              html_2 = requests.get(url=pic_url, headers=headers).text         except:             continue         selector_2 = parsel.Selector(html_2)         img_url_list = selector_2.xpath('//div[@class="entry-content u-text-format u-clearfix"]/p//img/@src').getall()         
 
          for img_url in img_url_list:             try:                 img_data = requests.get(url=img_url, headers=headers).content             except:                 continue
              file_name = img_url.split('/')[-1]
              with open(f'C:/Users/13089/Desktop\images//{pic_title}//{file_name}' + file_name, mode='wb') as f:                 f.write(img_data)                 print('下载完成:', file_name)
 
  |