我正在学习多项式 logit,但我在使用 xlogit 运行此代码时遇到问题。
# Long format
from xlogit.utils import wide_to_long
ATUS_data_LA_2020_Long = wide_to_long(ATUS_data_LA_2020_wide, id_col='custom_id', alt_name='alt', sep='_',
alt_list=['Non-Shopping', 'In-store Shopping', 'Online Shopping', 'Both'], empty_val=0, alt_is_prefix=True)
# List of variables
index_var_names = ['MSASIZE','FAMINCOME','AGE_1','SEX','EDUC','EMPSTAT','DIFFMOB_1','FSTRUC']
# Reshape
y = ATUS_data_LA_2020_Long['DEPVAR_1'].values.ravel() # reshape 1 dimension
alt = ATUS_data_LA_2020_Long['alt'].values.ravel() # reshape 1 dimension
# Model
from xlogit import MultinomialLogit
model = MultinomialLogit()
model.fit(X=ATUS_data_LA_2020_Long[index_var_names],
y=y,
varnames=index_var_names,
ids=ATUS_data_LA_2020_Long['custom_id'],
alts=alt,
fit_intercept=True,
weights=np.asarray(ATUS_data_LA_2020_Long['WEIGHT']))
model.summary()
我收到这个错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[14], line 40
38 from xlogit import MultinomialLogit
39 model = MultinomialLogit()
---> 40 model.fit(X=ATUS_data_LA_2020_Long[index_var_names],
41 y=y,
42 varnames=index_var_names,
43 ids=ATUS_data_LA_2020_Long['custom_id'],
44 alts=ATUS_data_LA_2020_Long['alt'],
45 fit_intercept=True,
46 weights=np.asarray(ATUS_data_LA_2020_Long['WEIGHT']))
47 model.summary()
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\xlogit\multinomial_logit.py:139, in MultinomialLogit.fit(self, X, y, varnames, alts, ids, isvars, weights, avail, base_alt, fit_intercept, init_coeff, maxiter, random_state, tol_opts, verbose, robust, num_hess, scale_factor)
134 self._validate_inputs(X, y, alts, varnames, isvars, ids, weights)
136 self._pre_fit(alts, varnames, isvars, base_alt, fit_intercept, maxiter)
138 betas, X, y, weights, avail, Xnames, scale = \
--> 139 self._setup_input_data(X, y, varnames, alts, ids,
140 isvars=isvars, weights=weights, avail=avail,
141 init_coeff=init_coeff,
142 random_state=random_state, verbose=verbose,
143 predict_mode=False, scale_factor=scale_factor)
145 tol = {'ftol': 1e-10}
146 if tol_opts is not None:
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\xlogit\multinomial_logit.py:276, in MultinomialLogit._setup_input_data(self, X, y, varnames, alts, ids, isvars, weights, avail, base_alt, fit_intercept, init_coeff, random_state, verbose, predict_mode, scale_factor)
271 def _setup_input_data(self, X, y, varnames, alts, ids, isvars=None,
272 weights=None, avail=None, base_alt=None, fit_intercept=False,
273 init_coeff=None, random_state=None, verbose=1, predict_mode=False,
274 scale_factor=None):
275 self._check_long_format_consistency(ids, alts)
--> 276 y = self._format_choice_var(y, alts) if not predict_mode else None
277 X, Xnames = self._setup_design_matrix(X)
278 N, J, K = X.shape
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\xlogit\_choice_model.py:193, in ChoiceModel._format_choice_var(self, y, alts)
191 return y1h
192 else:
--> 193 raise ValueError("inconsistent 'y' values. Make sure the "
194 "data has one choice per sample")
ValueError: inconsistent 'y' values. Make sure the data has one choice per sample
数据的所有列的行数相同。此外,“y”没有缺失值。
print(y.shape)
print(alt.shape)
----------------
(1420,)
(1420,)
当我使用 reshape 时会发生这种情况
y = np.array(ATUS_data_LA_2020_Long['DEPVAR_1']).reshape(len(ATUS_data_LA_2020_Long['alt']), 1).astype(int)
---------------
ValueError: y must be an array of one dimension in long format
我寻找类似的问题,但没有一个能解决我的问题。 我正在使用 Python 3.10.11.
我想要解决'y'值不一致错误的建议
提前致谢!