如何使用 scipy 和 xarray 计算 cdf

问题描述 投票:0回答:0

我想拟合一个

gamma
分布并从下面的 nd DataArray
cdf
得到
test_data
。目前,我可以使用 numpy.apply_along_axis 函数来完成,但无法在 xarray.apply_ufunc() 上实现。我不确定为什么会这样。

这里是示例代码。

import xarray as xr
import numpy as np
import pandas as pd
import scipy.stats as st
# make some random data
random_data=np.random.randint(1,100,(100,50,50))
# Make ramdom datetime 
time_dim=pd.date_range("2000-01-01",periods=len(random_data),freq='MS')
# Make DataArray
test_data=xr.DataArray(random_data,dims=("time","y","x"))
test_data["time"]=time_dim
# Fitting gamma distribution and calculate cdf to each data point as time dimension. But it failed.
def fit_gamma(a):
    if np.isnan(a).all():
        return np.array(a, dtype=np.float32)
    else:
        filter_nan=a[~np.isnan(a)]
        shape, loc, scale=st.gamma.fit(filter_nan, scale=np.std(filter_nan))
        cdf=st.gamma.cdf(a, shape, loc=loc, scale=scale)
        ppf=st.norm.ppf(cdf)
        return np.array(ppf, dtype=np.float32)
# Test for numpy apply_along_axis, and it worked.
result_numpy=np.apply_along_axis(fit_gamma,0, test_data.values)
# test for xarray ufunc, but failed to work
result=xr.apply_ufunc(fit_gamma, test_data, input_core_dims=[["time"]], vectorize=True)

它会抛出很长的错误

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
TypeError: only size-1 arrays can be converted to Python scalars

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_6860/3359275878.py in <module>
     23 # result_numpy=np.apply_along_axis(fit_gamma,0, test_data.values)
     24 # test for xarray ufunc, but failed to work
---> 25 result=xr.apply_ufunc(fit_gamma, test_data, input_core_dims=[["time"]], vectorize=True)

c:\users\hava_tu\appdata\local\programs\python\python38\lib\site-packages\xarray\core\computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
   1163     # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
   1164     elif any(isinstance(a, DataArray) for a in args):
-> 1165         return apply_dataarray_vfunc(
   1166             variables_vfunc,
   1167             *args,

c:\users\hava_tu\appdata\local\programs\python\python38\lib\site-packages\xarray\core\computation.py in apply_dataarray_vfunc(func, signature, join, exclude_dims, keep_attrs, *args)
    288 
    289     data_vars = [getattr(a, "variable", a) for a in args]
--> 290     result_var = func(*data_vars)
    291 
    292     if signature.num_outputs > 1:

c:\users\hava_tu\appdata\local\programs\python\python38\lib\site-packages\xarray\core\computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
    731             )
    732 
--> 733     result_data = func(*input_data)
    734 
    735     if signature.num_outputs == 1:

c:\users\hava_tu\appdata\local\programs\python\python38\lib\site-packages\numpy\lib\function_base.py in __call__(self, *args, **kwargs)
   2327             vargs.extend([kwargs[_n] for _n in names])
   2328 
-> 2329         return self._vectorize_call(func=func, args=vargs)
   2330 
   2331     def _get_ufunc_and_otypes(self, func, args):

c:\users\hava_tu\appdata\local\programs\python\python38\lib\site-packages\numpy\lib\function_base.py in _vectorize_call(self, func, args)
   2401         """Vectorized call to `func` over positional `args`."""
   2402         if self.signature is not None:
-> 2403             res = self._vectorize_call_with_signature(func, args)
   2404         elif not args:
   2405             res = func()

c:\users\hava_tu\appdata\local\programs\python\python38\lib\site-packages\numpy\lib\function_base.py in _vectorize_call_with_signature(self, func, args)
   2461 
   2462             for output, result in zip(outputs, results):
-> 2463                 output[index] = result
   2464 
   2465         if outputs is None:

ValueError: setting an array element with a sequence.
python scipy numpy-ndarray python-xarray
© www.soinside.com 2019 - 2024. All rights reserved.