我有一个大约 2.5GB 的大文件存储在 AWS S3 中。我在上传此文件时使用 SHA-256 作为校验和函数:
然后我继续使用标题为“检查对象完整性”的官方 AWS 用户指南。具体来说,我复制了“使用 AWS 开发工具包”部分中的 validateExistingFileAgainstS3Checksum
函数。然后我继续进行一些更改:
getPartBreak
System.out.print
getPartBreak
package app.service.vendor.amazon;
import app.service.vendor.amazon.exception.ChecksumValidationException;
import io.netty.handler.codec.base64.Base64Encoder;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
import org.slf4j.Logger;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.GetObjectAttributesRequest;
import software.amazon.awssdk.services.s3.model.GetObjectAttributesResponse;
import software.amazon.awssdk.services.s3.model.ObjectAttributes;
import software.amazon.awssdk.services.s3.model.ObjectPart;
import java.io.*;
import java.nio.channels.FileChannel;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.List;
import static software.amazon.awssdk.services.s3.internal.resource.S3ResourceType.BUCKET;
@Singleton
public class S3ChecksumValidator {
private final S3Client client;
private final Logger logger;
@Inject
public S3ChecksumValidator(S3Client client, Logger logger) {
this.client = client;
this.logger = logger;
}
public boolean validateMultipartUpload(File file, String bucket, String s3Key) throws ChecksumValidationException {
int chunkSize = 5 * 1024 * 1024;
GetObjectAttributesResponse
objectAttributes = client.getObjectAttributes(GetObjectAttributesRequest.builder().bucket(bucket).key(s3Key)
.objectAttributes(ObjectAttributes.OBJECT_PARTS, ObjectAttributes.CHECKSUM).build());
try (InputStream localInput = new FileInputStream(file)) {
MessageDigest sha256ChecksumOfChecksums = MessageDigest.getInstance("SHA-256");
MessageDigest sha256Part = MessageDigest.getInstance("SHA-256");
byte[] buffer = new byte[chunkSize];
int currentPart = 0;
long partBreak = getPartBreak(objectAttributes, currentPart);
int totalRead = 0;
int read = localInput.read(buffer);
while (read != -1) {
totalRead += read;
if (totalRead >= partBreak) {
int difference = totalRead - (int) partBreak;
byte[] partChecksum;
if (totalRead != partBreak) {
sha256Part.update(buffer, 0, read - difference);
partChecksum = sha256Part.digest();
sha256ChecksumOfChecksums.update(partChecksum);
sha256Part.reset();
sha256Part.update(buffer, read - difference, difference);
} else {
sha256Part.update(buffer, 0, read);
partChecksum = sha256Part.digest();
sha256ChecksumOfChecksums.update(partChecksum);
sha256Part.reset();
}
String base64PartChecksum = Base64.getEncoder().encodeToString(partChecksum);
if (!base64PartChecksum.equals(objectAttributes.objectParts().parts().get(currentPart).checksumSHA256())) {
logger.info(String.format("Part checksum of local file does not match s3 file '%s'.", s3Key));
return false;
}
currentPart++;
if (currentPart < objectAttributes.objectParts().totalPartsCount()) {
partBreak += objectAttributes.objectParts().parts().get(currentPart - 1).size();
}
} else {
sha256Part.update(buffer, 0, read);
}
read = localInput.read(buffer);
}
logger.info(String.format("local parts: %s , remote parts: %s", currentPart + 1, objectAttributes.objectParts().totalPartsCount()));
if (currentPart != objectAttributes.objectParts().totalPartsCount()) {
currentPart++;
byte[] partChecksum = sha256Part.digest();
sha256ChecksumOfChecksums.update(partChecksum);
String base64PartChecksum = Base64.getEncoder().encodeToString(partChecksum);
}
String base64CalculatedChecksumOfChecksums = Base64.getEncoder().encodeToString(sha256ChecksumOfChecksums.digest());
if (!base64CalculatedChecksumOfChecksums.equals(objectAttributes.checksum().checksumSHA256())) {
logger.info(String.format("Checksum of checksums of local file does not match s3 file '%s'.", s3Key));
logger.info(String.format("%s vs %s", base64CalculatedChecksumOfChecksums, objectAttributes.checksum().checksumSHA256()));
return false;
}
}
catch (IOException | NoSuchAlgorithmException e) {
String msg = String.format("Could not read local checksum - %s", e.getMessage());
throw new ChecksumValidationException(msg, e);
}
return true;
}
private static long getPartBreak(GetObjectAttributesResponse objectAttributes, int currentPart) throws ChecksumValidationException {
if(objectAttributes.objectParts() == null) {
String msg = "Not a multipart upload - object attributes -> object parts is null";
throw new ChecksumValidationException(msg);
}
List<ObjectPart> parts = objectAttributes.objectParts().parts();
if(parts.isEmpty()) {
String msg = "File was uploaded without checksum algorithm - object attributes -> object parts is empty";
throw new ChecksumValidationException(msg);
}
return parts.get(currentPart).size();
}
}
问题
false
,而不是成功验证:
[2024-02-01 18:34:08,768]-[Execution worker] INFO app.App - local parts: 125 , remote parts: 144
[2024-02-01 18:34:08,768]-[Execution worker] INFO app.App - Checksum of checksums of local file does not match s3 file 'shared/database.mmdb'.
[2024-02-01 18:34:08,768]-[Execution worker] INFO app.App - faceAGZGc36kITYRStsK5zEw+iBJTgttwRWbmnQC+jQ= vs j3L01d+7qyiJ4zYSadr0/+N+Q8IfYbpWM7JTYvXrIlw=
似乎 S3 对象的每个部分的校验和验证都是成功的,但验证器由于某种原因缺少本地文件的尾部(125 与 144 部分),而部分数量与我测试的其他文件匹配与?
任何有关可能导致此处出现问题的原因的想法将不胜感激。我已经确认文件大小在本地和S3中完全相同。