仅供学习交流使用
import time as ti
import requests
from bs4 import BeautifulSoup
# 设置封面URL
book_url = 'https://www.xbiquge.bz/book/7138/'
response = requests.get(book_url)
soup = BeautifulSoup(response.content, 'lxml')
# 提取dl tag, 这里面包含了dd tag, 是章节的入口
soup = soup.find('dl')
# 提取dd tag 25去掉则下载全本
a_list = soup.find_all('a')[12:25]
t1 = ti.time()
# 打开文件,参数1是文件路径,参数2是打开模式('w'表示写入模式)
with open('逆天邪神.txt', 'w', encoding='utf-8') as file:
for a in a_list:
href = a.get('href')
chap_url = book_url + href
response = requests.get(chap_url)
soup = BeautifulSoup(response.content, 'lxml')
# 提取章节标题
title = soup.title.get_text()
# 提取正文
maintext = soup.find('div', {'id':'content'}).get_text()
maintext = maintext.replace('$ ()', '')
maintext = maintext.replace('\xa0\xa0', '\n')
maintext = title + '\n' + maintext + '\n\n\n'
# 写入内容
file.write(maintext)
t2 = ti.time()
print('用时:', t2 - t1, 's')
import time as ti
import requests
from bs4 import BeautifulSoup
# 设置封面URL
book_url = 'https://www.xbiquge.bz/book/7138/'
response = requests.get(book_url)
soup = BeautifulSoup(response.content, 'lxml')
# 提取dl tag, 这里面包含了dd tag, 是章节的入口
soup = soup.find('dl')
# 提取dd tag 25去掉则下载全本
a_list = soup.find_all('a')[12:25]
t1 = ti.time()
# 打开文件,参数1是文件路径,参数2是打开模式('w'表示写入模式)
with open('逆天邪神.txt', 'w', encoding='utf-8') as file:
for a in a_list:
href = a.get('href')
chap_url = book_url + href
response = requests.get(chap_url)
soup = BeautifulSoup(response.content, 'lxml')
# 提取章节标题
title = soup.title.get_text()
# 提取正文
maintext = soup.find('div', {'id':'content'}).get_text()
maintext = maintext.replace('$ ()', '')
maintext = maintext.replace('\xa0\xa0', '\n')
maintext = title + '\n' + maintext + '\n\n\n'
# 写入内容
file.write(maintext)
t2 = ti.time()
print('用时:', t2 - t1, 's')