1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| """ 7.12 Kevin 猪八戒网 Xpath 爬虫 """ from lxml import html import requests
etree = html.etree url = 'https://beijing.zbj.com/search/shop/?type=new&kw=saas' resp = requests.get(url)
html = etree.HTML(resp.text)
divs = html.xpath('//*[@id="__layout"]/div/div[3]/div/div[3]/div[4]/div[1]/div') for div in divs: name = div.xpath('./div[1]/div/div/a/div/div/div/div[1]/div[1]/div[2]/div[1]/text()') if len(name) != 0: com_name = name[0] else: com_name = "no info" city = div.xpath('./div[1]/div/div/a/div/div/div/div[1]/div[1]/div[2]/div[2]/text()') price = div.xpath('./div/div/div/a[@target="_blank"]/div/div/div/div[1]/div[2]/div/div/text()') com_good = div.xpath('.//div[1]/div/div/a/div/div/div/div[2]/div[2]/text()') if len(city) != 0: city_name = (city[2][5:-1]) else: city_name = "no info" print(com_name, city_name, com_good[0], price[0])
|