我使用下面的代码来收集比特币数据:
# -*- coding: UTF-8 -*-
import os
import re
import requests
import datetime
from bs4 import BeautifulSoup
first_date = datetime.datetime(2010,7,16)
def parse_record(record):
new_date = record[1:23]
date = datetime.datetime(int(record[11:15]), int(record[16:18]), int(record[19:21]))
value = record[24:-1]
# print(record1[11:15])
# return [date,value]
# translation = {39: None}
# return str([date,value]).translate(translation)
if date > first_date:
return str([new_date,value]).translate(str.maketrans({"'":None}))
translation = {39: None}
url_price = 'https://bitinfocharts.com/comparison/bitcoin-price.html#alltime'
response_price = requests.get(url_price)
soup_price = BeautifulSoup(response_price.text,'lxml')
script_tag_price = soup_price.findAll('script')[4]
script_text_price = script_tag_price.text
pattern_price = re.compile(r'\[new Date\("\d{4}/\d{2}/\d{2}"\),\d*\.?\w*\]')
records_price = pattern_price.findall(script_text_price)
price = []
for record in records_price:
price.append(parse_record(record))
text_file = open("btc-price.txt", "w")
text_file.write(str(price).replace("None,","")[1:-1].translate(translation).translate(str.maketrans({" ":None})).replace("newDate", "new Date"))
text_file.close()
一年前,我可以在 txt 中获取价格信息,一切正常。但现在它无法工作,似乎网站页面不知何故被更改,代码无法再次工作,我是代码新手,任何人都可以帮助我吗?
对于这样一个简单的任务,你的代码太复杂了,你没有指出应该以什么格式保存结果,我将给出一个仅使用 pandas 输出的示例。但结果变量
中有完整的字典import pandas as pd
import re
from datetime import datetime
response = requests.get('https://bitinfocharts.com/comparison/bitcoin-price.html#alltime')
result = [{'Date': str(datetime.strptime(str(x.split(',')[0]), '"%Y/%m/%d")').date()),
'Value': x.split(',')[1]} for x in re.findall(r'"\d+\/\d+\/\d+"\),\d+', response.text)]
df = pd.DataFrame(result)
print(df)
输出:
Date Value
0 2010-07-17 0
1 2010-07-18 0
2 2010-07-19 0
3 2010-07-20 0
4 2010-07-21 0
... ... ...
4845 2023-10-22 29916
4846 2023-10-23 30877
4847 2023-10-24 34086
4848 2023-10-25 34329
4849 2023-10-26 34259