如何使用 Python 创建去哈希密码的统计信息？

Question

MD5 密码哈希器

我已经在这个 MD5（破解程序）上工作了一两天了，但似乎不知道如何实现下一部分。现在，代码打开一个包含用户名和散列密码的文件，以及 rockyou.txt 以检查散列密码。然后，它会根据 rockyou 文本文件对所有密码进行哈希处理，并输出一个 csv 文件，其中包含它可以找到的用户名和密码。现在我还希望程序输出一个统计文本文件，其中列出：

破解密码数量
拥有相同（重复）破解密码的用户数量
重复破解最多的10个明文密码以及每个密码的重复次数（降序排列）
未破解的密码数量
拥有相同（重复）未破解密码的用户数量

#Import necessary libraries
import hashlib
import openpyxl
import csv
import chardet

# Configuration settings for file locations
passwordFile = "Random Projects/Password Cracker/passwords.xlsx"
wordListFile = "Random Projects/Password Cracker/wordlist.csv"
outputFile = "Random Projects/Password Cracker/decodedPasswords.csv"
statsFile = "Random Projects/Password Cracker/passwordStats.txt"

#Open word list csv
def loadWordList(wordListFile):
    print("Loading wordlist...")
    #Start an empty dictionary
    wordlist = {}
    
    #Use chardet to detect and use correct encoding for file
    with open(wordListFile, 'rb') as csvfile:
        result = chardet.detect(csvfile.read())
        encoding = result['encoding']
        #Print the corret encoding used
        print("Detected encoding:", encoding)

    #Open the word file for reading with correct encoding
    with open(wordListFile, 'r', encoding=encoding) as csvfile:
        reader = csv.reader(csvfile)
        #Set the correct format for reading the word file
        for row in reader:
            word = row[1]  # Assuming words are in the 2nd column
            
            # Hash the word and store it as the key, with the actual word as the value
            wordlist[hashlib.md5(word.encode()).hexdigest()] = word
    
    print("Wordlist loaded with", len(wordlist), "hashed words.")
    return wordlist

def decodePasswords(passwordFile, wordListFile, outputFile):
    try:
        #Set wordlist as the loaded list
        wordlist = loadWordList(wordListFile)

        #Open the excel sheet and set as active worksheet
        workbook = openpyxl.load_workbook(passwordFile)
        worksheet = workbook.active
        
        #Set the max rows to calculate the percentage completed and progress
        max_rows = worksheet.max_row
        print("Excel file loaded.")   #Print the loaded message

        #Start an empty list for decoded passwords
        decoded_passwords = []
        record_number = 0   #Start counting records from 0, regardless of position


        #For loop to count records from correct position
        for row in worksheet.iter_rows(min_row=2, values_only=True):
            record_number += 1
            username = row[1]  # Assuming username is in the 2nd column
            md5_hash = row[2]  # Assuming hashed passwords are in the 3rd column

            #Convert wordlist into md5 hash form for lookup
            unhashedPassword = wordlist.get(md5_hash)
            
            #Append the decoded password to the final output format
            if unhashedPassword:
                decoded_passwords.append([username, unhashedPassword])
                
                #Print progress statement for each found hash
                print(f"Password found for user {username} (Record {record_number}/{max_rows}, {record_number / max_rows * 100:.2f}% completed)")

        #After all possible hashes found, print number of passwords decoded
        print("Passwords decoded:", len(decoded_passwords))

        #Create the output file and write the decoded passwords to it
        with open(outputFile, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(decoded_passwords)
        
        print("Output file created:", outputFile)   #Print output creation statement
        
    #Set the Exception as an error and print the error message
    except Exception as e:
        print("Error decoding passwords:", e)

#Run the main function
if __name__ == "__main__":
    decodePasswords(passwordFile, wordListFile, outputFile)

这是我迄今为止尝试过的，但统计数据的计算似乎不对，而且我也没有实现前 10 名。

import hashlib
import openpyxl
import csv
import chardet

# Configuration settings for file locations
passwordFile = "Random Projects/Password Cracker/passwords.xlsx"
wordListFile = "Random Projects/Password Cracker/wordlist.csv"
decodedOutputFile = "Random Projects/Password Cracker/decodedPasswords.csv"
statsOutputFile = "Random Projects/Password Cracker/passwordStats.txt"

# Open word list csv
def loadWordList(wordListFile):
    print("Loading wordlist...")
    wordlist = {}
    
    # Use chardet to detect and use the correct encoding for the file
    with open(wordListFile, 'rb') as csvfile:
        result = chardet.detect(csvfile.read())
        encoding = result['encoding']
        # Print the correct encoding used
        print("Detected encoding:", encoding)

    # Open the word file for reading with the correct encoding
    with open(wordListFile, 'r', encoding=encoding) as csvfile:
        reader = csv.reader(csvfile)
        # Set the correct format for reading the word file
        for row in reader:
            word = row[1]  # Assuming words are in the 2nd column
            
            # Hash the word and store it as the key, with the actual word as the value
            wordlist[hashlib.md5(word.encode()).hexdigest()] = word
    
    print("Wordlist loaded with", len(wordlist), "hashed words.")
    return wordlist

def decodePasswords(passwordFile, wordListFile, decodedOutputFile, statsOutputFile):
    try:
        # Set wordlist as the loaded list
        wordlist = loadWordList(wordListFile)

        # Open the excel sheet and set it as the active worksheet
        workbook = openpyxl.load_workbook(passwordFile)
        worksheet = workbook.active
        
        # Set the max rows to calculate the percentage completed and progress
        max_rows = worksheet.max_row
        print("Excel file loaded.")   # Print the loaded message

        # Start an empty list for decoded passwords
        decoded_passwords = []
        record_number = 0   # Start counting records from 0, regardless of position

        users_passwords = {}
        decoded_users = set()
        decoded_password_count = 0

        # Change the following line to use dictionaries to track the occurrence of passwords
        cracked_password_occurrences = {}
        uncracked_password_occurrences = {}

        # For loop to count records from the correct position
        for row in worksheet.iter_rows(min_row=2, values_only=True):
            record_number += 1
            username = row[1]  # Assuming username is in the 2nd column
            md5_hash = row[2]  # Assuming hashed passwords are in the 3rd column

            # Convert wordlist into md5 hash form for lookup
            unhashedPassword = wordlist.get(md5_hash)

            if unhashedPassword:
                decoded_passwords.append([username, unhashedPassword])
                decoded_users.add(username)
                decoded_password_count += 1

                # Track password occurrences for cracked passwords
                if unhashedPassword in cracked_password_occurrences:
                    cracked_password_occurrences[unhashedPassword].append(username)
                else:
                    cracked_password_occurrences[unhashedPassword] = [username]

                # Print progress statement for each found hash
                print(f"Password found for user {username} (Record {record_number}/{max_rows}, {record_number / max_rows * 100:.2f}% completed)")
            else:
                # Track password occurrences for uncracked passwords
                if md5_hash in uncracked_password_occurrences:
                    uncracked_password_occurrences[md5_hash].append(username)
                else:
                    uncracked_password_occurrences[md5_hash] = [username]

        # After all possible hashes found, print the number of passwords decoded
        print("Passwords decoded:", decoded_password_count)

        # Calculate the number of repeated cracked passwords and users with repeated cracked passwords
        repeated_cracked_password_count = sum(1 for usernames in cracked_password_occurrences.values() if len(usernames) > 1)
        users_with_repeated_cracked_passwords = sum(1 for usernames in cracked_password_occurrences.values() if len(usernames) > 1)

        # Calculate the number of repeated uncracked passwords
        repeated_uncracked_password_count = sum(1 for usernames in uncracked_password_occurrences.values() if len(usernames) > 1)

        # Create the output file and write the decoded passwords to it
        with open(decodedOutputFile, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(decoded_passwords)
        
        print("Output file created:", decodedOutputFile)   # Print output creation statement

        # Generate statistics
        stats = []

        stats.append(f"Passwords Successfully Cracked: {decoded_password_count}")
        stats.append(f"Users with Repeated Cracked Passwords: {users_with_repeated_cracked_passwords}")
        stats.append(f"Repeated Cracked Passwords: {repeated_cracked_password_count}")
        stats.append(f"Passwords NOT Cracked: {max_rows - decoded_password_count}")
        stats.append(f"Repeated Uncracked Passwords: {repeated_uncracked_password_count}")

        # Write the statistics to the stats file
        with open(statsOutputFile, 'w', encoding='utf-8') as statsfile:
            statsfile.write("\n".join(stats))

        print(f"Statistics file created: {statsOutputFile}")

    # Set the Exception as an error and print the error message
    except Exception as e:
        print("Error decoding passwords:", e)

# Run the main function
if __name__ == "__main__":
    decodePasswords(passwordFile, wordListFile, decodedOutputFile, statsOutputFile)

Answer 1

想回到这篇文章并发布我发现最适合我的需求的完成代码。我最终同意@Barmar的建议，使用 pandas 数据框来帮助生成有关破解和未破解密码的统计数据。

import hashlib
import openpyxl
import csv
import chardet
import pandas as pd

# Configuration settings for file locations
passwordFile = "Random Projects/Password Cracker/passwords.xlsx"
wordListFile = "Random Projects/Password Cracker/wordlist.csv"
outputFile = "Random Projects/Password Cracker/decodedPasswords.csv"
statsFile = "Random Projects/Password Cracker/passwordStats.txt"

# Open word list csv
def loadWordList(wordListFile):
    """
        This function loads the wordlist file (copy of rockyou.txt) and opens it with the correct encoding.
        1.) It then goes through each word and adds it to a list so it can be searched later.
        2.) From there, it then hashes each word and stores both the hash, and the plaintext.
        3.) Prints completion statement and returns the wordlist
    """
    
    print("Loading wordlist...\nDetecting encoding...")
    
    # Start an empty dictionary
    wordlist = {}
    
    # Use chardet to detect and the correct encoding for the file
    with open(wordListFile, 'rb') as csvfile:
        result = chardet.detect(csvfile.read())
        encoding = result['encoding']
        # Print the correct encoding used
        print("Detected encoding:", encoding)

    # Open the word file for reading with the correct encoding
    with open(wordListFile, 'r', encoding=encoding) as csvfile:
        reader = csv.reader(csvfile)
        # Set the correct format for reading the word file
        for row in reader:
            word = row[1]  # Assuming words are in the 2nd column
            
            # Hash the word and store it as the key, with the actual word as the value
            wordlist[hashlib.md5(word.encode()).hexdigest()] = word
    
    print("Wordlist loaded with", len(wordlist), "hashed words.")
    return wordlist

def decodePasswords(passwordFile, wordListFile, outputFile, statsFile):
    """
        This function loads the password file, iterates through each password, trying to find a match with the hashlist of words from the wordlist (rockyou.txt).
        It counts the number of times the passwords show up in the excel file, making it easier to calculate the statistics of the unhashing results later on.
        After all possible hashes are found, the function outputs the results into an csv file with the username first, and then the unhashed password in plaintext.
        The creation of a dataframe using the pandas library allows easy manipulation, analysis, and output to the csv file using "to_csv"
    """
        
    try:
        # Set wordlist as the loaded list
        wordlist = loadWordList(wordListFile)

        # Open the excel sheet and set as the active worksheet
        workbook = openpyxl.load_workbook(passwordFile)
        worksheet = workbook.active
        
        # Set the max rows to calculate the percentage completed and progress
        maxRows = worksheet.max_row
        print("Excel file loaded.")  # Print the loaded message

        # Start empty lists for decoded and undecoded passwords
        decodedPasswords = []
        undecodedPasswords = []
        recordNumber = 0  # Start counting records from 0, regardless of position
        
        # Create a dictionary to count how many times passwords have been reused
        passwordReuseCount = {}

        # For loop to count records from the correct position
        for row in worksheet.iter_rows(min_row=2, values_only=True):
            recordNumber += 1
            username = row[1]  # Assuming username is in the 2nd column
            md5Hash = row[2]  # Assuming hashed passwords are in the 3rd column

            # Convert wordlist into MD5 hash form for lookup
            unhashedPassword = wordlist.get(md5Hash)
            
            # Append the decoded password to the final output format
            if unhashedPassword:
                decodedPasswords.append([username, unhashedPassword])
            else:
                undecodedPasswords.append(username)  # Collect undecoded usernames
                
                # Count password reuse
                if md5Hash in passwordReuseCount:
                    passwordReuseCount[md5Hash] += 1
                else:
                    passwordReuseCount[md5Hash] = 1

                # Print progress statement for each found hash
                print(f"Password found for user {username} (Record {recordNumber}/{maxRows}, {round(recordNumber / maxRows * 100, 2)}% completed)")
                
        # Create a DataFrame from the decoded passwords
        decodedpasswordsDF = pd.DataFrame(decodedPasswords, columns=['Username', 'DecodedPassword'])

        # After all possible hashes found, print the number of passwords decoded and not cracked
        print(f"\nPasswords decoded: {len(decodedPasswords)}")
        print(f"Passwords not cracked: {len(undecodedPasswords)}")
        
        # Save the decoded passwords to a CSV file
        decodedpasswordsDF.to_csv(outputFile, index=False, encoding='utf-8')

        print("Output file created:", outputFile)  # Print output creation statement
        
        # Call the calcStats function
        calcStats(decodedpasswordsDF, passwordReuseCount, statsFile, maxRows)

    #Error exception
    except Exception as e:
        print("Error decoding passwords:", e)


def calcStats(decodedpasswordsDF, passwordReuseCount, statsFile, maxRows):
    """
        This function calculates statistics for the decoded passwords, such as the most common passwords, 
        how many times each one was usesd, the amount of passwords that were unable to be cracked, and 
        the count of users that have used the same password
    """
        
    # Calculate additional statistics
    totalUsers = maxRows - 1  # Subtract 1 to exclude the header
    
    try:
        # Calculate additional statistics
        passwordsCracked = len(decodedpasswordsDF)
        passwordsNotCracked = totalUsers - passwordsCracked
        
        # Calculate the count of all uncracked passwords that were used more than once
        uncrackedPasswordsReuseCount = sum(1 for count in passwordReuseCount.values() if count > 1)

        # Save the additional statistics to the text file
        with open(statsFile, 'w', newline='', encoding='utf-8') as statsfile:
            
            # Count the number of users with the same cracked passwords
            repeatedPasswords = decodedpasswordsDF['DecodedPassword'].value_counts()
            
            #Begin output and write header
            statsfile.write("               STATISTICS OUTPUT\n")
            statsfile.write("--------------------------------------------------\n")
            statsfile.write(f"Total users in the file: {totalUsers}\n")
            statsfile.write(f"Number of passwords cracked: {passwordsCracked}\n")
            statsfile.write(f"Number of passwords not cracked: {passwordsNotCracked}\n\n")
            
            
            #Start the cracked password statistics
            statsfile.write("               CRACKED PASSWORD STATS\n")
            statsfile.write("--------------------------------------------------\n")
            
            #Count the total reused cracked password count
            totalCount = 0
            topTotalCrackedPasswords = repeatedPasswords.head()
            for password, count in topTotalCrackedPasswords.items():
                totalCount += count
            statsfile.write(f"Count of users with the same cracked passwords: {totalCount}\n")
            
            # Add the count of users with the same cracked passwords
            statsfile.write("Total count of cracked passwords used more than once: " + str(repeatedPasswords.count()) + "\n")

            
            # Get the top 10 most frequently used passwords
            top10Passwords = repeatedPasswords.head(10)
            statsfile.write("\nTop 10 most frequently used cracked passwords:\n")
            for password, count in top10Passwords.items():
                statsfile.write(f"{password}: {count} times\n")
            
                        
            #Start the uncracked password statistics
            statsfile.write("\n             UNCRACKED PASSWORD STATS\n")
            statsfile.write("--------------------------------------------------\n")
            # Save the count of uncracked passwords that were used more than once
            statsfile.write(f"Count of users with the same uncracked passwords: {uncrackedPasswordsReuseCount}\n")
            
            # Get the most used uncracked passwords (still hashed)
            topUncrackedPasswords = [(k, v) for k, v in passwordReuseCount.items() if v > 1]
            topUncrackedPasswords.sort(key=lambda x: x[1], reverse=True)
            
            #Calculate the total count of uncracked passwords that have been repeated
            totalCount = 0
            for password, count in topUncrackedPasswords:
                totalCount += count
            statsfile.write(f"Total count of uncracked passwords used more than once: {totalCount}\n")
                        
            #Output the top passwords unable to be cracked and frequency
            statsfile.write("\nTop 10 most frequently used uncracked passwords (still hashed):\n")
            for password, count in topUncrackedPasswords[:10]:
                statsfile.write(f"{password}: {count} times\n")
                
            #Sucess message to tell user where file was saved
            print("Statistics saved to:", statsFile)
    
    #Error exception           
    except Exception as e:
        print("Error calculating statistics:", e)

# Run the main function
if __name__ == "__main__":
    decodePasswords(passwordFile, wordListFile, outputFile, statsFile)

如何使用 Python 创建去哈希密码的统计信息？

问题描述投票：0回答：1

MD5 密码哈希器

1个回答

最新问题

如何使用 Python 创建去哈希密码的统计信息？

问题描述 投票：0回答：1

MD5 密码哈希器

1个回答

最新问题

问题描述投票：0回答：1