我有两个功能。
代码有效,但 y 轴 2 上的累积和在图中未正确缩放。欢迎任何指点。
以下是函数的代码。结果图的概率结果远低于 1。我运行了一个函数来检查结果从 0 到 1(也在下面)。
#dependencies
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
#Reference: Stochastic Risk Analysis: Monte Carlo Simulation and FMEA (Failure Mode and Effect Analysis),
#Revista Espacios, 2017, 38(4), 26-
#Functions
#Program workflow:
#1. generate dataframe from failure modes (FM) their associated RPNs with
# "generate_rpn_dataframe"
#2. generate histograms of FMs with 5% and 95% indicator lines with
# "plot_histogram_with_percentiles". Overlay the probability distribution.
#function for generating a panda df of RPN for different failure modes
def generate_rpn_dataframe(df):
# Initialize an empty DataFrame to store the results
result = pd.DataFrame()
# Loop over the columns in the input DataFrame
for col in df.columns:
# Get the min, max, and average from the first three elements of the column
min_val = df[col].iloc[0]
avg_val = df[col].iloc[1]
max_val = df[col].iloc[2]
# Generate 5000 elements from a triangular distribution
data = np.random.triangular(min_val, avg_val, max_val, 5000)
# Add the data to the result DataFrame
result[col] = data
return result
def plot_histogram_with_percentiles(dataframe):
"""
Generates histograms for each column in the input DataFrame and adds vertical red lines
at the 5% and 95% bin edges. Displays histograms in rows of three.
Args:
dataframe (pd.DataFrame): Input DataFrame containing the data.
Returns:
None (Displays the plots directly).
"""
# Initialize an empty DataFrame to store the results
result = pd.DataFrame()
# Calculate the percentiles
q = [0.05, 0.95]
percentiles = dataframe.quantile(q)
# Determine the number of rows and columns for subplots
num_columns = dataframe.shape[1]
num_rows = (num_columns + 2) // 3 # Divide by 3 and round up
# Create subplots
fig, axes = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5 * num_rows))
for col_idx, column in enumerate(dataframe.columns):
row, col = divmod(col_idx, 3) # Calculate row and column indices for
# subplot
ax = axes[row, col]
column_data = dataframe[column]
# Plot the histogram
n, bins, patches = ax.hist(column_data, bins=50, color='skyblue', alpha=0.7, edgecolor='black')
# Calculate cumulative probability across the histogram
cumulative_sum = np.cumsum(n)/len(column_data)
# Plot cumulative sum as a line plot
ax.plot(bins[:-1], cumulative_sum, color='green', label='Cumulative Sum')
# Add vertical red lines at the 5% and 95% bin edges
for percentile in percentiles[column]:
ax.axvline(percentile, color='red', linestyle='--', linewidth=2, label=f'{percentile:.2f}')
ax.set_title(f'Histogram for {column}')
ax.set_xlabel('RPN')
ax.set_ylabel('Bin Count')
ax.legend()
#Second y-axis is the cumulative probability
ax2 = ax.twinx()
ax2.set_ylabel('Cumulative Probability')
result[column] = cumulative_sum #for testing
# Hide any empty subplots
for i in range(num_columns, num_rows * 3):
fig.delaxes(axes.flatten()[i])
plt.tight_layout()
plt.show()
return result
#example workflow
# 1. define the failure modes and the associated scores (values taken from ref)
rpn = {'fm1': pd.Series([1,12,60],
index=['min', 'avg', 'max']),
'fm2': pd.Series([2,36,60],
index=['min', 'avg', 'max']),
'fm3': pd.Series([6,45,60],
index=['min', 'avg', 'max']),
'fm4': pd.Series([6,15,60],
index=['min', 'avg', 'max']),
'fm5': pd.Series([4,45,60],
index=['min', 'avg', 'max']),
'fm6': pd.Series([3,10,30],
index=['min', 'avg', 'max']),
'fm7': pd.Series([18,75,100],
index=['min', 'avg', 'max'])}
df_rpn = pd.DataFrame(rpn)
# 2. Apply Monte Carlo Method to generate dataframe of FMEA scores
fmea_dist = generate_rpn_dataframe(df_rpn)
#3. Visualize histogram
hist_results = plot_histogram_with_percentiles(fmea_dist)
##checking to see that cumulative sums go from zero to one
def scatter_plots_for_columns(dataframe):
"""
Generates scatter plots for each column in the input DataFrame and displays them in a single figure.
Args:
dataframe (pd.DataFrame): Input DataFrame containing the data.
Returns:
None (Displays the combined plot directly).
"""
num_columns = dataframe.shape[1]
num_rows = 1 # Display all scatter plots in a single row
fig, axes = plt.subplots(nrows=num_rows, ncols=num_columns, figsize=(15, 5))
for col_idx, column in enumerate(dataframe.columns):
ax = axes[col_idx]
column_data = dataframe[column]
# Generate scatter plot
ax.scatter(column_data.index, column_data, color='b', alpha=0.7, label=column)
ax.set_title(f'Scatter Plot for {column}')
ax.set_xlabel('Index')
ax.set_ylabel('Probability')
ax.legend()
plt.tight_layout()
plt.show()
scatter_plots_for_columns(hist_results)
以下是plot_histogram_with_percentiles(dataframe)的更正代码:
def plot_histogram_with_percentiles(dataframe):
"""
Generates histograms for each column in the input DataFrame and adds vertical red lines
at the 5% and 95% bin edges. Displays histograms in rows of three.
Args:
dataframe (pd.DataFrame): Input DataFrame containing the data.
Returns:
None (Displays the plots directly).
"""
# Initialize an empty DataFrame to store the results
result = pd.DataFrame()
# Calculate the percentiles
q = [0.05, 0.95]
percentiles = dataframe.quantile(q)
# Determine the number of rows and columns for subplots
num_columns = dataframe.shape[1]
num_rows = (num_columns + 2) // 3 # Divide by 3 and round up
# Create subplots
fig, axes = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5 * num_rows))
for col_idx, column in enumerate(dataframe.columns):
row, col = divmod(col_idx, 3) # Calculate row and column indices
ax1 = axes[row, col]
column_data = dataframe[column]
# Plot the histogram
n, bins, patches = ax1.hist(column_data, bins=50, color='skyblue', alpha=0.7, edgecolor='black')
# Calculate cumulative sum
cumulative_sum = np.cumsum(n)/len(column_data)
#Second axis is the cumulative probability
ax2 = ax1.twinx()
# Plot cumulative sum as a line plot
ax2.plot(bins[:-1], cumulative_sum, color='green', label='Cumulative Sum')
# Add vertical red lines at the 5% and 95% bin edges
for percentile in percentiles[column]:
ax1.axvline(percentile, color='red', linestyle='--', linewidth=2, label=f'{percentile:.2f}')
ax1.set_title(f'Histogram with Cumulative Probability for {column}')
ax1.set_xlabel('RPN')
ax1.set_ylabel('Bin Count')
ax1.legend()
ax2.set_ylabel('Cumulative Probability')
result[column] = cumulative_sum #for testing
# Hide any empty subplots
for i in range(num_columns, num_rows * 3):
fig.delaxes(axes.flatten()[i])
plt.tight_layout()
plt.show()
return result