我有一个方差-协方差矩阵作为优化问题的一部分,我想获得 Cholesky 因子。
# The first eight values are irrevalent (which is another part for my optimization problem).
# These values are initiated by scipy.optimize.differential_evolution.
params = np.array([ 0.7806441 , 0.43681974, 0.44591321, 0.40755392, 0.95581486,
0.45218698, 0.72242087, 0.10477314, 0.49510088, 0.46714018,
0.06582374, 0.11473761, 0.07053657, 0.4480119 , 0.47490398,
-0.58165201, 0.80810424, -0.42242146, 0.16330766, 0.88495337,
-0.73821011, 0.82768099, 0.66367593, -0.84882425, 0.51292046,
-0.78708664, -0.97173908, 0.5514854 , -0.1384802 , -0.00899825,
0.55532738, 0.4374165 , 0.63025894, 0.75539676, -0.08648026,
0.40995866])
# standard errors params[8:15], bounded between 0 and 0.5
# because the variable is between 0 and 1.
dev = np.diag(params[8:15])
# correlation coefficients params[15:35], bounded between -1 and 1.
corr1 = np.insert(params[15:21],0,1.0)
corr2 = np.insert(np.concatenate((np.zeros(1), params[21:26])), 1, 1.0)
corr3 = np.insert(np.concatenate((np.zeros(2), params[26:30])), 2, 1.0)
corr4 = np.insert(np.concatenate((np.zeros(3), params[30:33])), 3, 1.0)
corr5 = np.insert(np.concatenate((np.zeros(4), params[33:35])), 4, 1.0)
corr6 = np.insert(np.concatenate((np.zeros(5), params[35:36])), 5, 1.0)
corr7 = np.append(np.zeros(6), 1.0)
X=np.vstack((corr1,corr2,corr3,corr4,corr5,corr6,corr7))
X = X + X.T - np.diag(np.diag(X))
cov = dev.dot(X).dot(dev)
L = np.linalg.cholesky(cov)
Traceback (most recent call last):
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/code.py", line 90, in runcode
exec(code, self.locals)
File "<input>", line 1, in <module>
File "/Library/Python/3.9/site-packages/numpy/linalg/linalg.py", line 779, in cholesky
r = gufunc(a, signature=signature, extobj=extobj)
File "/Library/Python/3.9/site-packages/numpy/linalg/linalg.py", line 115, in _raise_linalgerror_nonposdef
raise LinAlgError("Matrix is not positive definite")
numpy.linalg.LinAlgError: Matrix is not positive definite
# check egienvalues
print(np.linalg.eigvalsh(cov))
[-0.22219591 -0.00343992 0.00541225 0.0116191 0.27866962 0.38321562
0.45878541]
为什么我的方差-协方差矩阵
cov
具有负特征值?我哪里做错了?
由于任何协方差矩阵都保证是正定的,我开始寻找该协方差矩阵包含某种错误或内部矛盾的方式。
我首先看到的是这一行:
cov = dev.dot(X).dot(dev)
虽然这一行对我来说看起来很可疑,但我编写了一个替代实现,它沿行和列乘以标准错误,并且它与这一行一致。
我看的第二件事是相关系数矩阵。虽然相关性一般来说不是传递性的,但对于足够强的相关性来说,它确实遵守一些传递性属性。
利用链接问题中的不等式,我寻找相关系数之间的矛盾。这是乘以标准差之前的相关矩阵 X。
[[ 1. -0.58165201 0.80810424 -0.42242146 0.16330766 0.88495337 -0.73821011]
[-0.58165201 1. 0.82768099 0.66367593 -0.84882425 0.51292046 -0.78708664]
[ 0.80810424 0.82768099 1. -0.97173908 0.5514854 -0.1384802 -0.00899825]
[-0.42242146 0.66367593 -0.97173908 1. 0.55532738 0.4374165 0.63025894]
[ 0.16330766 -0.84882425 0.5514854 0.55532738 1. 0.75539676 -0.08648026]
[ 0.88495337 0.51292046 -0.1384802 0.4374165 0.75539676 1. 0.40995866]
[-0.73821011 -0.78708664 -0.00899825 0.63025894 -0.08648026 0.40995866 1. ]]
在这个矩阵中,1和2之间的相关性是0.82768099。 2 和 0 之间的相关性为 0.80810424。根据所链接问题中的不等式,0 和 1 之间的相关性一定是正的。但它是负面的。我编写的程序还发现了许多其他与此类似的问题。这组相关性是不可能的,结果是协方差矩阵也不可能。
代码:
import numpy as np
import matplotlib.pyplot as plt
# The first eight values are irrevalent (which is another part for my optimization problem).
# These values are initiated by scipy.optimize.differential_evolution.
params = np.array([ 0.7806441 , 0.43681974, 0.44591321, 0.40755392, 0.95581486,
0.45218698, 0.72242087, 0.10477314, 0.49510088, 0.46714018,
0.06582374, 0.11473761, 0.07053657, 0.4480119 , 0.47490398,
-0.58165201, 0.80810424, -0.42242146, 0.16330766, 0.88495337,
-0.73821011, 0.82768099, 0.66367593, -0.84882425, 0.51292046,
-0.78708664, -0.97173908, 0.5514854 , -0.1384802 , -0.00899825,
0.55532738, 0.4374165 , 0.63025894, 0.75539676, -0.08648026,
0.40995866])
# standard errors params[8:15], bounded between 0 and 0.5
# because the variable is between 0 and 1.
dev = params[8:15]
# correlation coefficients params[15:35], bounded between -1 and 1.
corr1 = np.insert(params[15:21],0,1.0)
corr2 = np.insert(np.concatenate((np.zeros(1), params[21:26])), 1, 1.0)
corr3 = np.insert(np.concatenate((np.zeros(2), params[26:30])), 2, 1.0)
corr4 = np.insert(np.concatenate((np.zeros(3), params[30:33])), 3, 1.0)
corr5 = np.insert(np.concatenate((np.zeros(4), params[33:35])), 4, 1.0)
corr6 = np.insert(np.concatenate((np.zeros(5), params[35:36])), 5, 1.0)
corr7 = np.append(np.zeros(6), 1.0)
X=np.vstack((corr1,corr2,corr3,corr4,corr5,corr6,corr7))
X = X + X.T - np.diag(np.diag(X))
np.set_printoptions(edgeitems=30, linewidth=100000)
print(X)
def sign(x):
if x > 0:
return 1
elif x < 0:
return -1
else:
raise Exception()
for corr_idx_x in range(7):
for corr_idx_y in range(corr_idx_x + 1, 7):
for corr_idx_z in range(7):
if len(set((corr_idx_x, corr_idx_y, corr_idx_z))) != 3:
# Skip duplicate correlation indexes
continue
corr_val_xy = X[corr_idx_x, corr_idx_y]
corr_val_yz = X[corr_idx_y, corr_idx_z]
corr_val_zx = X[corr_idx_z, corr_idx_x]
if corr_val_yz ** 2 + corr_val_zx ** 2 > 1:
condition = "anything"
if sign(corr_val_yz) == sign(corr_val_zx):
condition = "positive"
elif -1 * sign(corr_val_yz) == sign(corr_val_zx):
condition = "negative"
print_it = False
if condition == "positive" and corr_val_xy < 0:
print_it = True
elif condition == "negative" and corr_val_xy > 0:
print_it = True
if print_it:
print(f"Correlation between {corr_idx_y} and {corr_idx_z} is {corr_val_yz}")
print(f"Correlation between {corr_idx_z} and {corr_idx_x} is {corr_val_zx}")
print(f"Therefore, correlation between {corr_idx_x} and {corr_idx_y} must be {condition}, actually {corr_val_xy}")
print()