我正在尝试在Python中构建一个简单的jpeg压缩过程,但使用DCT和量化,而不是霍夫曼编码。
这就是我到目前为止所做的(压缩和解压缩同一图像):
import cv2 as cv
from scipy.fftpack import dct, idct
import numpy as np
from PIL import Image
image = cv.imread("test.png")
# image_dct = cv.imread("test.png")
# image_quant = cv.imread("test.png")
x = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
arr = np.asarray(x, float)
image_quant = np.asarray(x, float)
#Quantization matrix for jpeg standard
qm = [[16, 11, 10, 16, 24, 40, 51, 61],
[12, 12, 14, 19, 26, 58, 60, 55],
[14, 13, 16, 24, 40, 57, 69, 56],
[14, 17, 22, 29, 51, 87, 80, 62],
[18, 22, 37, 56, 68, 109, 103, 77],
[24, 35, 55, 64, 81, 104, 113, 92],
[49, 64, 78, 87, 103, 121, 120, 101],
[72, 92, 95, 98, 112, 100, 103, 99]]
# 8x8 jpeg window size
ws_r = 8
ws_c = 8
# Method/function to quantize each element of the DCTed matrix 8x8 by the
# standard 50% qm matrix
def quantize(inMatrix, qm):
outMatrix = np.empty((8, 8))
for i in range(0, 8):
for j in range(0, 8):
# outMatrix[i][j] = np.linalg.norm(inMatrix[i][j]/qm[i][j])
# outMatrix[i][j] = np.round(inMatrix[i][j]/qm[i][j])
outMatrix[i][j] = inMatrix[i][j]/qm[i][j]
# print(outMatrix)
return outMatrix
# Devide the image into 8x8 blocks and apply to each block DCT
for r in range(0, np.size(arr, 0)-ws_r, ws_r):
for c in range(0, np.size(arr, 1)-ws_c, ws_c):
window = arr[r:r+ws_r, c:c+ws_c]
# print(dct(window))
image_quant[r:r+ws_r, c:c+ws_c] = quantize(dct(window),qm)
image_quant = Image.fromarray(image_quant)
image_quant = image_quant.convert('RGB')
image_quant.save('test_quant_dct.png')
image_quant.show()
#Now invert the above process by dequantizing and applying inverce DCT
image = cv.imread("test_quant_dct.png")
x = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
arr = np.asarray(x, float)
image_dequant = np.asarray(x, float)
def dequantize(inMatrix, qm):
outMatrix = np.empty((8, 8))
for i in range(0, 8):
for j in range(0, 8):
# outMatrix[i][j] = np.linalg.norm(inMatrix[i][j]/qm[i][j])
# outMatrix[i][j] = np.round(inMatrix[i][j]*qm[i][j])
outMatrix[i][j] = inMatrix[i][j]*qm[i][j]
# print(outMatrix)
return outMatrix
for r in range(0, np.size(arr, 0)-ws_r, ws_r):
for c in range(0, np.size(arr, 1)-ws_c, ws_c):
window = arr[r:r+ws_r, c:c+ws_c]
# image_dequant[r:r+ws_r, c:c+ws_c] = dequantize(idct(window),qm)
image_dequant[r:r+ws_r, c:c+ws_c] = idct(dequantize(window,qm))
image_dequant = Image.fromarray(image_dequant)
image_dequant = image_dequant.convert('RGB')
image_dequant.save('test_dequant_dct.png')
image_dequant.show()
运行上述代码可以看到图像被 8x8 窗口 DCT 处理和量化破坏,结果只是返回白色图像,一些模糊部分完全被破坏。到目前为止我还不知道我在这个过程中做错了什么。
你基本上已经正确地做到了这一点。我做了三处更改,您的代码似乎可以正确压缩图像。首先,您必须完成压缩的有损部分。我将
.astype(int)
添加到量化方法的输出中。这会将矩阵中的浮点数转换为整数,从而导致信息丢失。
def quantize(inMatrix, qm):
outMatrix = np.empty((8, 8))
for i in range(0, 8):
for j in range(0, 8):
outMatrix[i][j] = inMatrix[i][j]/qm[i][j]
return outMatrix.astype(int)
其次,JPEG 使用归一化比例因子来使变换正交。 Scipy 的 DCT 默认实现使用与 JPEG 不同的缩放因子。由于您已经导入了 OpenCV,因此您只需将
cv.
添加到 DCT 和 IDCT 方法的前面即可。 OpenCV 的实现默认匹配 JPEG 缩放因子。
第三,我从 for 循环中删除了
-8
,以便操作能够到达图像的边缘。
for r in range(0, np.size(arr, 0), ws_r):
for c in range(0, np.size(arr, 1), ws_c):
window = arr[r:r+ws_r, c:c+ws_c]
image_quant[r:r+ws_r, c:c+ws_c] = quantize(cv.dct(window),qm)
for r in range(0, np.size(arr, 0), ws_r):
for c in range(0, np.size(arr, 1), ws_c):
window = arr[r:r+ws_r, c:c+ws_c]
image_dequant[r:r+ws_r, c:c+ws_c] = cv.idct(dequantize(window,qm))