[我有一个following gist可以在一个数据集上实现TrueSkill算法,该数据集是一个n玩家免费比赛的集合,那里有赢家,然后是第二,第三,第四名,等等。
基本上我要问的是:
这是我想出的,可能可以针对速度进行优化。
# Fetch the data
df_raw = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')
# Create a holding DataFrame for our TrueRank
df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
df_truerank = pd.DataFrame(columns=df_truerank_columns)
# Use a sample of 1000
df = df_raw.head(10000)
# Group by the game_id
games = df.groupby('game_id')
# Now iterate the games
for game_id, game in games:
# Setup lists so we can zip them back up at the end
trueskills = []
player_ids = []
game_ids = []
mus = []
sigmas = []
post_mus = []
post_sigmas = []
# Now iterate over each player in a game
for index, row in game.iterrows():
# Create a game_ids arary for zipping up
game_ids.append(game_id)
# Now push the player_id onto the player_ids array for zipping up
player_ids.append(int(row['player_id']))
# Get the players last game, hence tail(1)
filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
df_player = df_truerank[filter].tail(1)
# If there isnt a game then just use the TrueSkill defaults
if (len(df_player) == 0):
mu = 25
sigma = 8.333
else:
# Otherwise get the mu and sigma from the players last game
row = df_player.iloc[0]
mu = row['post_mu']
sigma = row['post_sigma']
# Keep lists of pre mu and sigmas
mus.append(mu)
sigmas.append(sigma)
# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
trueskills.append(Rating(mu=mu, sigma=sigma))
# Use the positions as ranks, they are 0 based so -1 from all of them
ranks = [x - 1 for x in list(game['position'])]
# Create tuples out of the trueskills array
trueskills_tuples = [(x,) for x in trueskills]
try:
# Get the results from the TrueSkill rate method
results = rate(trueskills_tuples, ranks=ranks)
# Loop the TrueSkill results and get the new mu and sigma for each player
for result in results:
post_mus.append(round(result[0].mu, 2))
post_sigmas.append(round(result[0].sigma, 2))
except:
# If the TrusSkill rate method blows up, just use the previous
# games mus and sigmas
post_mus = mus
post_sigmas = sigmas
# Change the positions back to non 0 based
positions = [x + 1 for x in ranks]
# Now zip together all our lists
data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))
# Create a temp DataFrame the same as df_truerank and add data to the DataFrame
df_temp = pd.DataFrame(data, columns=df_truerank_columns)
# Add df_temp to our df_truerank
df_truerank = df_truerank.append(df_temp)