import requests,time
agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.42"
headers = {"User-Agent": agent}
i, j, k = 1, 1, 1
while True:
ii = str(i).zfill(3)
jj = str(j).zfill(3)
kk = str(k).zfill(3)
url = f'https://t.cnki.net/rbook-api/v1/book/R201211157/catalog?code={ii}{jj}{kk}&type=&size=50&start=1'
r = requests.get(url, headers=headers)
text = r.json()
n = text['data']['total']
if n != 0:
k += 1
data = text['data']['data']
with open('d:/fydcd.txt','a',encoding='utf-8') as f:
for ddd in data:
f.write(ddd['title']+' ')
f.write('\n')
time.sleep(5)
elif k != 1:
k = 1
j += 1
elif k == 1:
i += 1
k = 1
j = 1
elif k == 1 and j == 1:
break
试试行不行,我没加延迟,被封ip了,现在代码已经加了延迟。文件保存在D盘fydcd.txt中。