从 HTML 图表获取数据

问题描述 投票:0回答:1

我一直在尝试从以下站点提取数据:https://bitinfocharts.com/comparison/activeaddresses-btc-eth-ltc.html

令人沮丧的是,没有下载按钮,我一直很难找到一些方法来访问该图表中包含的数据。我尝试“检查”图表,然后查看“网络”选项卡,但无济于事。我将非常感谢一些建议。谢谢!

html charts xmlhttprequest
1个回答
0
投票

我编写了这个脚本来提取以太坊和比特币数据。您可以为其他人修改它。该代码下载数据并对任何缺失值应用三次样条插值。如果不需要插值部分,可以跳过该部分:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from scipy.interpolate import CubicSpline
import numpy as np

def parse_strlist(sl):
    clean = re.sub("[\[\],\s]","",sl)
    splitted = re.split("[\'\"]",clean)
    values_only = [s for s in splitted if s != '']
    return values_only

def fetch_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    scripts = soup.find_all('script')
    for script in scripts:
        if 'd = new Dygraph(document.getElementById("container")' in script.text:
            StrList = script.text
            StrList = '[[' + StrList.split('[[')[-1]
            StrList = StrList.split(']]')[0] +']]'
            StrList = StrList.replace("new Date(", '').replace(')','')
            dataList = parse_strlist(StrList)
            return dataList

def create_dataframe(dataList):
    date = []
    value = []
    for each in dataList:
        if (dataList.index(each) % 2) == 0:
            date.append(each)
        else:
            value.append(each)
    df = pd.DataFrame(list(zip(date, value)), columns=["Date","Price"])
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
    return df

def interpolate_data(df):
    df = df.resample('D').mean()
    x = np.arange(len(df))
    y = df['Price'].values
    mask = np.isfinite(y)
    cs = CubicSpline(x[mask], y[mask])
    df['Price'] = cs(x)
    return df

# URLs for Bitcoin and Ethereum
btc_url = 'https://bitinfocharts.com/comparison/bitcoin-price.html#alltime'
eth_url = 'https://bitinfocharts.com/comparison/ethereum-price.html#alltime'

# Fetch and process data
btc_data_list = fetch_data(btc_url)
eth_data_list = fetch_data(eth_url)

btc_df = create_dataframe(btc_data_list)
eth_df = create_dataframe(eth_data_list)

btc_df_interpolated = interpolate_data(btc_df)
eth_df_interpolated = interpolate_data(eth_df)

# Save to CSV
btc_df_interpolated.to_csv('bitcoin_prices.csv')
eth_df_interpolated.to_csv('ethereum_prices.csv')
© www.soinside.com 2019 - 2024. All rights reserved.