使用Python从相册中提取照片

Question

我有一本实体相册，其中每一页可能包含一张或多张粘贴在其上的照片。

我为每个单独的页面拍了一张照片，其中包含多张照片。现在，我将拍摄的所有照片放入一个文件夹中，我想使用 Python 对其进行迭代，以提取粘贴在该页面上的所有照片。

我有以下 Python 脚本，但该脚本的缺点是它找到太多轮廓（在图片本身上也是如此）。

当页面背景为白色时，获得正确对比度的好（替代）方法是什么？

# Read the image
img = cv2.imread("images/" + image)

# Convert the image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Show gray image
cv2.imshow('Gray Image', gray)
cv2.waitKey(0)

blurred = cv2.GaussianBlur(gray, (5, 5), 0)



# Apply edge detection using the Canny edge detector
edged = cv2.Canny(blurred, 50, 150)

contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

min_area = 50000
filtered_contours = [cnt for cnt in contours if min_area < cv2.contourArea(cnt)]

extracted_photos = []
for i, contour in enumerate(filtered_contours):
    x, y, w, h = cv2.boundingRect(contour)
    extracted_photos.append(img[y:y+h, x:x+w])
    # Uncomment the following line to save individual photos
    # cv2.imwrite(f'photo_{i}.jpg', image[y:y+h, x:x+w])

# Show the extracted photos
cv2.imshow('Original Image', img)
cv2.waitKey(0)

for i, photo in enumerate(extracted_photos):
    cv2.imshow(f'Photo {i}', photo)
    cv2.waitKey(0)

cv2.destroyAllWindows()

原图

灰度照片

轮廓

Answer 1

图片不是很清楚。您可能会获得更好的结果直接在图像前面拍摄的照片。分水岭算法建议分离重叠或非常接近的物体。

算法一步一步的输出如下。

导入CV2 从 matplotlib 导入 pyplot 作为 plt 将 numpy 导入为 np

# Function to calculate kernel size based on sigma value
def get_ksize(sigma):
    return int(((sigma - 0.8) / 0.15) + 2.0)

# Function to apply Gaussian blur to the image
def get_gaussian_blur(img, ksize=0, sigma=5):
    if ksize == 0:
        ksize = get_ksize(sigma)
    sep_k = cv2.getGaussianKernel(ksize, sigma)
    return cv2.filter2D(img, -1, np.outer(sep_k, sep_k))

# Load the image
img = cv2.imread("img.jpg")

# Convert the image to grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply Gaussian blur to the grayscale image
img_blur = get_gaussian_blur(img_gray, ksize=0, sigma=5)
plt.figure(),plt.imshow(img_blur,cmap="gray"),plt.axis("off"),plt.title("img_blur_1")

# Apply histogram equalization to enhance contrast
equalized_img = cv2.equalizeHist(img_blur)
plt.figure(),plt.imshow(equalized_img,cmap="gray"),plt.axis("off"),plt.title("equalized_img_2")

# Apply thresholding to create a binary image
_, threshold = cv2.threshold(equalized_img, 140, 255, cv2.THRESH_BINARY_INV)
plt.figure(),plt.imshow(threshold,cmap="gray"),plt.axis("off"),plt.title("threshold_3")

# Define a kernel for morphological operations
kernel = np.ones((11, 11), np.uint8)

# Perform closing operation to fill gaps in the objects
closing = cv2.morphologyEx(threshold, cv2.MORPH_CLOSE, kernel, iterations=2)
plt.figure(),plt.imshow(closing,cmap="gray"),plt.axis("off"),plt.title("closing_4")

# Compute distance transform to estimate the distance of each pixel from the nearest zero pixel
dist_transform = cv2.distanceTransform(closing, cv2.DIST_L2, 5)
plt.figure(),plt.imshow(dist_transform,cmap="gray"),plt.axis("off"),plt.title("dist_transform_5")

# Threshold the distance transform to obtain markers of the foreground objects
ret, sure_foreground = cv2.threshold(dist_transform, 0.2*np.max(dist_transform), 255, 0)
sure_foreground = np.uint8(sure_foreground)
plt.figure(),plt.imshow(sure_foreground,cmap="gray"),plt.axis("off"),plt.title("sure_foreground_6")

# Dilate the closed image to obtain markers of the background objects
sure_background = cv2.dilate(closing, kernel, iterations=1)
plt.figure(),plt.imshow(sure_background,cmap="gray"),plt.axis("off"),plt.title("sure_background_7")

# Subtract the sure foreground markers from the sure background markers to get unknown region
unknown = cv2.subtract(sure_background, sure_foreground)

# Label the markers to be used as seeds for watershed algorithm
marker = cv2.connectedComponents(sure_foreground)[1]
marker = marker+1
marker[unknown == 255] = 0

# Apply watershed algorithm to segment the objects
marker = cv2.watershed(img, marker)
plt.figure(),plt.imshow(marker,cmap="gray"),plt.axis("off"),plt.title("marker watershed_10")

# Get unique markers excluding the background marker
unique_markers = np.unique(marker)
unique_markers = unique_markers[unique_markers != -1]  

# Print the unique markers
print(unique_markers)

# Get the last two markers which correspond to the objects
last_two_markers = unique_markers[-2:]  # considering there are two objects in the frame

# Iterate over the last two markers
for marker_value in last_two_markers:
    # Create a mask for the current marker
    white_mask = np.isin(marker, marker_value)
    modified_marker = np.where(white_mask, 255, 0)
    modified_marker = modified_marker.astype(np.uint8)

    # Apply the mask to the original image
    segmented_img = cv2.bitwise_and(img, img, mask=modified_marker)

    # Plot the segmented image for the current marker
    plt.figure()
    plt.imshow(segmented_img, cmap="gray")
    plt.axis("off")
    plt.title(f"img{marker_value}")

# Show all plots
plt.show()

使用Python从相册中提取照片

问题描述投票：0回答：1

原图

灰度照片

轮廓

1个回答

最新问题

使用Python从相册中提取照片

问题描述 投票：0回答：1

原图

灰度照片

轮廓

1个回答

最新问题

问题描述投票：0回答：1