从图像中正确嵌入和提取 Unicode 数据

Question

我最近问了一个关于将数据嵌入图像的问题。在其他论坛的帮助下，我立即解决了该问题。我遇到了一个新问题：我的程序适用于所有拉丁字符，甚至带有变音（重音）标记的字符。我的程序无法处理其他 unicode 字符，例如西里尔文、希腊文和阿拉伯文等脚本中的字符。

这是我当前程序的代码，无法执行 unicode。

def embed_text(self, image_path, text, output_path):
    # Convert text message to binary format
    binary_message = ''.join(format(ord(char), '08b') for char in text) #aligning the bytes for embedding the message 
    # Load the image
    image = Image.open(image_path)
    w, h = image.size

    # Calculate the number of embedding characters (eN)
    eN = (h * w * 3) // 8
    if len(text) > eN:
        raise ValueError("Message too long to fit in the image")

    # Embedding loop
    message_index = 0
    for i in range(h):
        for j in range(w):
            pixel = list(image.getpixel((j, i)))

            for k in range(3):  # For R, G, B components
                if message_index < len(binary_message):
                    M = int(binary_message[message_index])
                    # Perform XOR operation with the 7th bit of the RGB component
                    pixel[k] = (pixel[k] & 0xFE) | (((pixel[k] >> 1) & 1) ^ M)
                    message_index += 1
                else:
                    break  # No more message bits to embed

            image.putpixel((j, i), tuple(pixel))

    # Save the Stego Image
    image.save(output_path)

def xor_substitution(self, component, bit):
    # Perform XOR on the least significant bit of the component with the bit
    return (component & 0xFE) | (component & 1) ^ bit

def extract_text(self, image_path):
    stego_image = Image.open(image_path)
    w, h = stego_image.size
    binary_message = ""

    for i in range(h):
        for j in range(w):
            pixel = stego_image.getpixel((j, i))
            for k in range(3):
                binary_message += str(pixel[k] & 1)

    # Extract only up to the NULL character
    end = binary_message.find('00000000')
    if end != -1:
        binary_message = binary_message[:end]

    return self.binary_to_string(binary_message)

def binary_to_string(self, binary_message):
    text = ""
    for i in range(0, len(binary_message), 8):
        byte = binary_message[i:i+8]
        text += chr(int(byte, 2))
    return text

正如我所说，这些方法在从我扔给它的图像中嵌入和提取拉丁文本方面做得很好。当我尝试嵌入诸如

之类的东西时

το

قili

8

我得到类似的东西

ñ;

ÈY♣

[]

为了解决此问题，我已将表示“08b”的行中的 8 位值更改为“16b”。我发现该程序仍然能够将内容嵌入到图像中，但会取出日语汉字或汉字。

Heere是我更改的代码：

def embed_text(self, image_path, text, output_path):
    # Convert text message to binary format
    binary_message = ''.join(format(ord(char), '16b') for char in text)

    # Load the image
    image = Image.open(image_path)
    w, h = image.size

    # Calculate the number of embedding characters (eN)
    eN = (h * w * 3) // 16
    if len(text) > eN:
        raise ValueError("Message too long to fit in the image")
    binary_message = binary_message.ljust(eN * 16, '0')


    # Embedding loop
    message_index = 0
    for i in range(h):
        for j in range(w):
            pixel = list(image.getpixel((j, i)))

            for k in range(3):  # For R, G, B components
                if message_index < len(binary_message):
                    M = int(binary_message[message_index:message_index+16], 2)
                    # Perform XOR operation with the 7th bit of the RGB component
                    pixel[k] = (pixel[k] & 0xFFFE) | (((pixel[k] >> 1) & 1) ^ M)
                    message_index += 16
                else:
                    break  # No more message bits to embed

            image.putpixel((j, i), tuple(pixel))

    # Save the Stego Image
    image.save(output_path)

def xor_substitution(self, component, bit):
    # Perform XOR on the least significant bit of the component with the bit
    return (component & 0xFE) | (component & 1) ^ bit

def extract_text(self, image_path):
    stego_image = Image.open(image_path)
    w, h = stego_image.size
    binary_message = ""

    for i in range(h):
        for j in range(w):
            pixel = stego_image.getpixel((j, i))
            for k in range(3):
                binary_message += format(pixel[k] & 1, 'b').zfill(16)[-1]

    # Extract only up to the NULL character
    end = binary_message.find('00000000')
    if end != -1:
        binary_message = binary_message[:end]

    return self.binary_to_string(binary_message)

def binary_to_string(self, binary_message):
    text = ""
    for i in range(0, len(binary_message), 16):
        byte = binary_message[i:i+16]
        text += chr(int(byte, 2))
    return text

我想知道如何解决我的程序存在的这些问题，因为我需要在 12 月 5 日之前完成此实施。预先感谢您的帮助！

Answer 1

对原始代码进行最小程度的更改以使其正常工作，并带有注释：

# Added missing import
from PIL import Image

# removed "self" from arguments.
def embed_text(image_path, text, output_path):
    # encode the message in UTF-8 and add a null.
    message = text.encode() + b'\x00'
    binary_message = ''.join(format(byte, '08b') for byte in message)
    image = Image.open(image_path)
    w, h = image.size

    # Calculate the number of embedding characters (eN)
    eN = (h * w * 3) // 8
    # Check that encoded message has enough space in image
    if len(message) > eN:
        raise ValueError("Message too long to fit in the image")

    message_index = 0
    for i in range(h):
        for j in range(w):
            pixel = list(image.getpixel((j, i)))

            for k in range(3):  # For R, G, B components
                if message_index < len(binary_message):
                    M = int(binary_message[message_index])
                    # Set bit 0 of the RGB component to the message bit
                    pixel[k] = (pixel[k] & 0xFE) | M
                    message_index += 1
                else:
                    break

            image.putpixel((j, i), tuple(pixel))

    image.save(output_path)

def extract_text(image_path):
    stego_image = Image.open(image_path)
    w, h = stego_image.size
    binary_message = ""

    for i in range(h):
        for j in range(w):
            pixel = stego_image.getpixel((j, i))
            for k in range(3):
                binary_message += str(pixel[k] & 1)

    # The NULL find here could false locate 8 zero bits that are parts of two bytes so removed.

    # removed "self." from function call.
    return binary_to_string(binary_message)

# removed "self" parameter.
def binary_to_string(binary_message):
    # Use a byte array to store extracted bytes
    text = bytearray()
    for i in range(0, len(binary_message), 8):
        byte = binary_message[i:i+8]
        # stop on NULL byte
        if byte == '00000000': break
        # add extracted byte
        text.append(int(byte, 2))
    # decode the message
    return text.decode()

# testing.  Use appropriate input PNG file.
embed_text('in.png', 'Hello, world! 世界您好！', 'out.png')
print(extract_text('out.png'))

更高效的算法，删除 int/str/int 转换，只处理输入文件直到 null:

from PIL import Image

def embed_text(image_path, text, output_path):
    # Convert text message to bytes format and add null
    message_bytes = text.encode() + b'\x00'

    # Load the image and extract the bytes into a mutable array
    image = Image.open(image_path)
    image_bytes = bytearray(image.tobytes())

    # Need 8 bytes of image to store 8 bits of a message byte
    if len(message_bytes) * 8 > len(image_bytes):
        raise ValueError("Message too long to fit in the image")

    # Embedding loop:
    # 1. Clear the image byte MSB.
    # 2. Compute the message_byte/bit indices.
    # 3. Compute the message bit and set the image byte LSB.
    for i in range(len(message_bytes) * 8):
        image_bytes[i] &= 0xFE
        message_index, bit_index = divmod(i, 8)
        image_bytes[i] |= (message_bytes[message_index] >> bit_index) & 1

    # Load the updated bytes into the image and save
    image.frombytes(image_bytes)
    image.save(output_path)

def extract_text(image_path):
    stego_image = Image.open(image_path)
    image_bytes = stego_image.tobytes()
    message_bytes = bytearray()
    message_byte = 0

    # Extraction loop:
    # 1. Extract LSB and store in message_byte at the correct index
    # 2. Once 8 bits are extracted (indices 0-7):
    #    IF message_byte is not null:
    #      append message byte to message and clear message byte
    #    ELSE stop processing image bytes.
    for i, byte in enumerate(image_bytes):
        bit_index = i % 8
        message_byte |= (byte & 1) << bit_index
        if bit_index == 7:
            if message_byte:  # if not null
                message_bytes.append(message_byte)
                message_byte = 0
            else:
                break
    try:
        return message_bytes.decode()
    except UnicodeDecodeError:
        return 'No valid message found.'

embed_text('in.png', 'Hello, world! 世界您好！', 'out.png')
print(extract_text('out.png'))

从图像中正确嵌入和提取 Unicode 数据

问题描述投票：0回答：1

1个回答

最新问题

从图像中正确嵌入和提取 Unicode 数据

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1