我一直在尝试从以下站点提取数据:https://bitinfocharts.com/comparison/activeaddresses-btc-eth-ltc.html
令人沮丧的是,没有下载按钮,我一直很难找到一些方法来访问该图表中包含的数据。我尝试“检查”图表,然后查看“网络”选项卡,但无济于事。我将非常感谢一些建议。谢谢!
我编写了这个脚本来提取以太坊和比特币数据。您可以为其他人修改它。该代码下载数据并对任何缺失值应用三次样条插值。如果不需要插值部分,可以跳过该部分:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from scipy.interpolate import CubicSpline
import numpy as np
def parse_strlist(sl):
clean = re.sub("[\[\],\s]","",sl)
splitted = re.split("[\'\"]",clean)
values_only = [s for s in splitted if s != '']
return values_only
def fetch_data(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
scripts = soup.find_all('script')
for script in scripts:
if 'd = new Dygraph(document.getElementById("container")' in script.text:
StrList = script.text
StrList = '[[' + StrList.split('[[')[-1]
StrList = StrList.split(']]')[0] +']]'
StrList = StrList.replace("new Date(", '').replace(')','')
dataList = parse_strlist(StrList)
return dataList
def create_dataframe(dataList):
date = []
value = []
for each in dataList:
if (dataList.index(each) % 2) == 0:
date.append(each)
else:
value.append(each)
df = pd.DataFrame(list(zip(date, value)), columns=["Date","Price"])
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
return df
def interpolate_data(df):
df = df.resample('D').mean()
x = np.arange(len(df))
y = df['Price'].values
mask = np.isfinite(y)
cs = CubicSpline(x[mask], y[mask])
df['Price'] = cs(x)
return df
# URLs for Bitcoin and Ethereum
btc_url = 'https://bitinfocharts.com/comparison/bitcoin-price.html#alltime'
eth_url = 'https://bitinfocharts.com/comparison/ethereum-price.html#alltime'
# Fetch and process data
btc_data_list = fetch_data(btc_url)
eth_data_list = fetch_data(eth_url)
btc_df = create_dataframe(btc_data_list)
eth_df = create_dataframe(eth_data_list)
btc_df_interpolated = interpolate_data(btc_df)
eth_df_interpolated = interpolate_data(eth_df)
# Save to CSV
btc_df_interpolated.to_csv('bitcoin_prices.csv')
eth_df_interpolated.to_csv('ethereum_prices.csv')