start

逻辑一样，换成 bs4 的 find_all 方法即可

html = BeautifulSoup(response.text,'lxml')
luck = html.find_all('p',class_="txt")[1].text.strip()

选取 html 中所有 p 标签，class 值为 txt 的标签，[1] 由于我们提取到有多余的标签，选取我们要的，.text 获得标签中的文本,去掉前后空格

其中我们解析器使用的是 lxml，推荐使用它，解析快，准确

end

import requests
import time
from bs4 import BeautifulSoup
from fake_useragent import UserAgent


def get_html(url):
    '''
    请求 html
    :param url:
    :return: 成功返回 html，否则返回 None
    '''
    count = 0 # 用来计数
    while True:
        headers = {
            'User-agent' : UserAgent().random
        }
        response = requests.get(url,headers=headers)
        if response.status_code == 200:
            response.encoding = 'utf-8'
            return response
        else:
            count += 1
            if count == 3: # 超过 3 次请求失败则跳过
                return
            else:
                continue


def get_infos(response):
    '''
    提取信息
    :param response:
    :return:
    '''
    html = BeautifulSoup(response.text,'lxml')
    luck = html.find_all('p',class_="txt")[1].text.strip()
    return luck


def write_txt(_,info):
    '''
    写入 txt 文件
    :param _: 星座名
    :param info: 星座运势
    :return:
    '''
    with open('luck.txt','a+',encoding='utf-8') as f:
        info = info.strip()
        f.write(_ + '\n')
        f.write(info + '\n\n')


if __name__ == '__main__':
    # 所有 12 星座的名称, 并构造 urls
    constellation_name = ['Aries', 'Taurus', 'Gemini', 'Cancer', 'Leo',
                          'Virgo', 'Libra', 'Scorpio', 'Sagittarius',
                          'Capricorn', 'Aquarius', 'Pisces']
    for _ in constellation_name:
        url = 'https://www.d1xz.net/astro/{}/'.format(_)
        response = get_html(url)
        if response == None:
            continue
        info = get_infos(response)
        write_txt(_,info)
        time.sleep(1)

Python3Turtle

木下瞳的爬虫专栏

start

end