复习:XPath语法
需求:解析出所有城市名称
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
import requests from lxml import etree
if __name__ == "__main__": headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' } url = 'https://www.aqistudy.cn/historydata/' page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text) a_list = tree.xpath('//div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a') all_city_names = [] for a in a_list: city_name = a.xpath('./text()')[0] all_city_names.append(city_name) print(all_city_names, len(all_city_names))
|
Tips:
Please indicate the source and original author when reprinting or quoting this article.