以下是我的代码:
import numpy as np
import pandas as pd
import statsmodels.api as sm
# Generate dummy data
np.random.seed(123)
X = np.random.normal(size=(100, 5))
y1 = np.random.normal(size=100)
y2 = np.random.normal(size=100)
data = pd.DataFrame(X, columns=['x1', 'x2', 'x3', 'x4', 'x5'])
data['y1'] = y1
data['y2'] = y2
# Fit model for y1
X = sm.add_constant(data[['x1', 'x2', 'x3', 'x4', 'x5']])
model1 = sm.OLS(data['y1'], X).fit()
# Fit model for y2
model2 = sm.OLS(data['y2'], X).fit()
# Print model summaries
print(model1.summary())
print(model2.summary())
# Fit joint model
X = sm.add_constant(data[['x1', 'x2', 'x3', 'x4', 'x5']])
y = data[['y1', 'y2']]
model_joint = sm.OLS(y, X).fit()
results_df = pd.DataFrame()
results_df['Coefficients Y1'] = model_joint.params.iloc[:, 0]
results_df['Coefficients Y2'] = model_joint.params.iloc[:, 1]
print(results_df)
results_df['Std Errors Y1'] = model_joint.bse.iloc[:, 0].values
results_df['Std Errors Y2'] = model_joint.bse.iloc[:, 1].values
print(results_df)
我在代码中遇到以下错误。我正在尝试拟合 2 个响应变量和 1 个预测变量的模型...请帮忙...
ValueError Traceback (most recent call last)
<ipython-input-1-e65d97313ad8> in <cell line: 34>()
32 results_df['Coefficients Y2'] = model_joint.params.iloc[:, 1]
33 print(results_df)
---> 34 results_df['Std Errors Y1'] = model_joint.bse.iloc[:, 0].values
35 results_df['Std Errors Y2'] = model_joint.bse.iloc[:, 1].values
36 print(results_df)
6 frames
/usr/local/lib/python3.10/dist-packages/statsmodels/base/wrapper.py in __getattribute__(self, attr)
32 pass
33
---> 34 obj = getattr(results, attr)
35 data = results.model.data
36 how = self._wrap_attrs.get(attr)
/usr/local/lib/python3.10/dist-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.__get__()
/usr/local/lib/python3.10/dist-packages/statsmodels/regression/linear_model.py in bse(self)
1881 def bse(self):
1882 """The standard errors of the parameter estimates."""
-> 1883 return np.sqrt(np.diag(self.cov_params()))
1884
1885 @cache_readonly
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py in cov_params(self, r_matrix, column, scale, cov_p, other)
1526 else:
1527 if scale is None:
-> 1528 scale = self.scale
1529 cov_p = self.normalized_cov_params * scale
1530
/usr/local/lib/python3.10/dist-packages/statsmodels/tools/decorators.py in __get__(self, obj, type)
93 _cachedval = _cache.get(name, None)
94 if _cachedval is None:
---> 95 _cachedval = self.fget(obj)
96 _cache[name] = _cachedval
97
/usr/local/lib/python3.10/dist-packages/statsmodels/regression/linear_model.py in scale(self)
1714 """
1715 wresid = self.wresid
-> 1716 return np.dot(wresid, wresid) / self.df_resid
1717
1718 @cache_readonly
/usr/local/lib/python3.10/dist-packages/numpy/core/overrides.py in dot(*args, **kwargs)
ValueError: shapes (100,2) and (100,2) not aligned: 2 (dim 1) != 100 (dim 0)
我在代码中遇到以下错误。我正在尝试拟合 2 个响应变量和 1 个预测变量的模型...请帮忙...
你能检查一下这是否是你想要的吗?这将为 x1..x5 找到最佳系数,以便它们同时适合 y1 和 y2。为此,我将数据集一个一个地堆叠在一起。
data1 = pd.DataFrame(X, columns=['x1', 'x2', 'x3', 'x4', 'x5'])
data1['y'] = y1
data2 = pd.DataFrame(X, columns=['x1', 'x2', 'x3', 'x4', 'x5'])
data2['y'] = y2
model1 = sm.OLS(data1['y'], X).fit()
model2 = sm.OLS(data2['y'], X).fit()
data_joint = pd.concat((data1, data2))
X_joint = sm.add_constant(data_joint[['x1', 'x2', 'x3', 'x4', 'x5']])
model_joint = sm.OLS(data_joint['y'], X_joint).fit()