start
逻辑一样,换成 bs4 的 find_all 方法即可
html = BeautifulSoup(response.text,'lxml')
luck = html.find_all('p',class_="txt")[1].text.strip()
选取 html 中所有 p 标签,class 值为 txt 的标签,[1] 由于我们提取到有多余的标签,选取我们要的,.text 获得标签中的文本,去掉前后空格
其中我们解析器使用的是 lxml,推荐使用它,解析快,准确
end
import requests
import time
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
def get_html(url):
'''
请求 html
:param url:
:return: 成功返回 html,否则返回 None
'''
count = 0 # 用来计数
while True:
headers = {
'User-agent' : UserAgent().random
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
response.encoding = 'utf-8'
return response
else:
count += 1
if count == 3: # 超过 3 次请求失败则跳过
return
else:
continue
def get_infos(response):
'''
提取信息
:param response:
:return:
'''
html = BeautifulSoup(response.text,'lxml')
luck = html.find_all('p',class_="txt")[1].text.strip()
return luck
def write_txt(_,info):
'''
写入 txt 文件
:param _: 星座名
:param info: 星座运势
:return:
'''
with open('luck.txt','a+',encoding='utf-8') as f:
info = info.strip()
f.write(_ + '\n')
f.write(info + '\n\n')
if __name__ == '__main__':
# 所有 12 星座的名称, 并构造 urls
constellation_name = ['Aries', 'Taurus', 'Gemini', 'Cancer', 'Leo',
'Virgo', 'Libra', 'Scorpio', 'Sagittarius',
'Capricorn', 'Aquarius', 'Pisces']
for _ in constellation_name:
url = 'https://www.d1xz.net/astro/{}/'.format(_)
response = get_html(url)
if response == None:
continue
info = get_infos(response)
write_txt(_,info)
time.sleep(1)