我开发了以下代码来检查三人组是否同时连接
import pandas as pd
from itertools import combinations
data = {
'User': ['Esther','Jonh', 'Ann', 'Alex', 'Jonh', 'Alex', 'Ann', 'Beatrix'],
'InitialTime': ['01/01/2023 00:00:00','01/01/2023 00:00:00', '01/01/2023 00:00:05', '01/01/2023 00:00:07', '01/01/2023 00:00:12', '01/01/2023 00:00:14', '01/01/2023 00:00:15', '01/01/2023 00:00:16'],
'FinalTime': ['01/01/2023 00:10:00','01/01/2023 00:00:10', '01/01/2023 00:00:12', '01/01/2023 00:00:12','01/01/2023 00:00:16', '01/01/2023 00:00:16', '01/01/2023 00:00:17', '01/01/2023 00:00:17']
}
df=pd.DataFrame(data)
def calculate_overlapped_time(df):
df['InitialTime'] = pd.to_datetime(df['InitialTime'], format='%d/%m/%Y %H:%M:%S')
df['FinalTime'] = pd.to_datetime(df['FinalTime'], format='%d/%m/%Y %H:%M:%S')
overlapped_time = {}
for i, row_i in df.iterrows():
for j, row_j in df.iterrows():
for k, row_k in df.iterrows():
if i != j and i != k and j != k:
initial_time = max(row_i['InitialTime'], row_j['InitialTime'], row_k['InitialTime'])
final_time = min(row_i['FinalTime'], row_j['FinalTime'], row_k['FinalTime'])
superposicion = max(0, (final_time - initial_time).total_seconds())
clave = f"{row_i['User']}-{row_j['User']}-{row_k['User']}"
if clave not in overlapped_time:
overlapped_time[clave] = 0
overlapped_time[clave] += superposicion
results = pd.DataFrame(list(overlapped_time.items()), columns=['Group', 'OverlappingTime'])
results['OverlappingTime'] = results['OverlappingTime'].astype(int)
return results
results_df = calculate_overlapped_time(df)
我想计算大约10人一组的重叠时间,因此,具有如此多重叠循环的代码变得不切实际。
有人可以告诉我是否有替代方法可以使此代码更具可扩展性,以便能够在没有 for 循环的情况下找到更大尺寸的组?
看起来您只是从同一数据帧中提取行组合。在这种情况下,您只需
itertools.combination
并仅使用一个循环:
import itertools as it
for [i, row_i], [j, row_j], [k, row_k] in it.combinations(df.iterrows(), 3):
# Loop code here