使用BeautifulSoup查询元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
from bs4 import BeautifulSoup
import json
header = {
"User-Agent" :"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36 Edg/99.0.1150.36"
}
houses=[]
for page in range(1,11):
url =f"https://www.fangstar.com/rentls/pg{page}/"
h = requests.get(url=url,headers=header)
html = BeautifulSoup(h.text,"html.parser")
box = html.find_all(attrs={"class":"rhs-list-item"})
for i in range(0,len(box)):
titleModel = box[i].find(attrs={"class": "base-info"}).find(attrs={"class": "dic-name nowrap"})
title = titleModel["title"]
house_typeModel = box[i].find(attrs={"class": "base-info"}).find_all("span")[1]
house_type = house_typeModel.string
areaModel = box[i].find(attrs={"class": "base-info"}).find_all("span")[2]
area = areaModel.string
floorModel = box[i].find(attrs={"class": "base-info"}).find_all("span")[3]
floor = floorModel.string
yearModel = box[i].find(attrs={"class": "base-info"}).find_all("span")[4]
year = yearModel.string
priceModel = box[i].find(attrs={"class": "total-price"}).find_all("span")
price = "{}{}".format(priceModel[0].string, priceModel[1].string)
addressModel = box[i].find(attrs={"class": "location-wrap nowrap"}).find_all("a")
addressModelSpan = box[i].find(attrs={"class": "location-wrap nowrap"}).find_all("span")
address = "{0}-{1}-{2}".format(addressModel[0].string, addressModel[1].string, addressModelSpan[0].string)
authorTimeModel = box[i].find(attrs={"class": "post-info"}).find_all("span")
author = authorTimeModel[0].string
times = authorTimeModel[1].string
house = {"标题": title, "房屋类型": house_type, "面积": area, "楼层": floor, "建筑年限": year, "租金": price,
"地点": address, "作者": author, "发布时间": times}
houses.append(house)
print(f"第{page}页")
print(house)
print(houses)
j = {
"data" :houses
}
with open("house_homework.json","a",encoding="UTF-8") as f:
out = json.dumps(j,ensure_ascii=False)
f.write(out)