复习:XPath语法
需求:解析下载图片数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 import requestsfrom lxml import etreeimport osif __name__ == "__main__" : url = 'http://pic.netbian.com/4kmeinv/' headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' } response = requests.get(url=url, headers=headers) page_text = response.text tree = etree.HTML(page_text) li_list = tree.xpath('//div[@class="slist"]/ul/li' ) if not os.path.exists('./picLibs' ): os.mkdir('./picLibs' ) for li in li_list: img_src = 'http://pic.netbian.com' +li.xpath('./a/img/@src' )[0 ] img_name = li.xpath('./a/img/@alt' )[0 ]+'.jpg' img_name = img_name.encode('iso-8859-1' ).decode('gbk' ) img_data = requests.get(url=img_src, headers=headers).content img_path = 'picLibs/' +img_name with open (img_path, 'wb' ) as fp: fp.write(img_data) print (img_name, '下载成功!!!' )
Tips:
Please indicate the source and original author when reprinting or quoting this article.