我有一个数组,例如:
arr = ['A', 'A', 'A', 'B', 'B']
我想重新排序这个数组,使得相同类型的元素之间的最小距离是最大。例如,这是上述数组的最佳序列:
arr1 = ['A', 'B', 'A', 'B', 'A']
我怎样才能在Python中做到这一点?我很难弄清楚...
编辑:我尝试用遗传算法来解决它,使每个个体都是一个整数序列,例如 [1 ,2, 3, 6, 4, 5] 这意味着位置 1 和 2 将被交换,3以及 6、4 和 5。然而,它似乎停留在局部最小值上。
我不知道是否有人仍然对这个问题的答案感兴趣,但由于我的第一次天真的尝试是不正确的并且被适当降级,它让我的设计活力得以发挥。所以这里有一个更完整的答案。
据我了解这个问题:
给定一系列符号,例如“AAABB”
其中 A 和 B 是符号
找到序列的重新排序,使得相似符号之间的最小间隔最大化。
所以在上面的例子中,解决方案是“ABABA”
import heapq as hpq
from copy import copy
from dataclasses import dataclass
from itertools import permutations
from functools import total_ordering
@dataclass
@total_ordering
class SolutionItem:
arr: str
sol: list[str]
def __lt__(self, other):
return len(self.arr) < len(other.arr)
def min_separation(in_arr, all_symbs):
# compute the mininimum separation between symbols
for s in all_symbs:
locs = [i for i, x in enumerate(in_arr) if x == s]
if len(locs) > 1:
return min([locs[x] - locs[x-1] - 1 for x in range(1, len(locs))])
return 0
def maximize_min_separation(arr):
""" find the organization of symbols in arr to produce the maximum
possible minimum separation between equal symbols
"""
all_symbs = set(arr)
h = []
best = min_separation(arr, all_symbs)
best_sol = arr
hpq.heappush(h, (best, SolutionItem(arr, [])))
while len(h) > 0:
mindist, itm = hpq.heappop(h)
cur_arr = itm.arr
cur_sol = itm.sol
if len(cur_arr) > 0:
options = set(cur_arr)
nxt_arr = copy(cur_arr)
for s in options:
k = nxt_arr.find(s)
nxt_arr = nxt_arr[:k] + nxt_arr[k+1:]
for opt in permutations(options):
if cur_sol:
nxt_sol_frt = list(opt)
nxt_sol_frt.extend(cur_sol)
nxt_sol_bck = copy(cur_sol)
nxt_sol_bck.extend(list(opt))
frt_itm = SolutionItem(nxt_arr, nxt_sol_frt)
bck_itm = SolutionItem(nxt_arr, nxt_sol_bck)
hpq.heappush(h, (-min_separation(frt_itm.sol, all_symbs),
frt_itm))
hpq.heappush(h, (-min_separation(bck_itm.sol, all_symbs),
bck_itm))
else:
frt_itm = SolutionItem(nxt_arr, list(opt))
hpq.heappush(h, (-min_separation(list(opt),
all_symbs), frt_itm))
else:
# come here for a solution
if mindist < best:
best_sol = cur_sol
return ''.join(best_sol)
对多个测试用例执行
maximize_min_separation
会产生以下结果:
Case: AAA --> AAA
Case: AAB --> ABA
Case: AABBB --> BABAB
Case: AABBC --> CBABA
Case: AABBBCC --> BCABACB
Case: AABCBAD --> BACABDA
看起来满足要求。
这里有一个“对输出进行暴力攻击”的解决方案。它不能提供完美的结果,它在更大的列表上效果更好,它通常比在线找到的其他解决方案更快。如果您需要速度而不是精度,这很有用。
from collections import defaultdict
def find_nearest_none(arr, position):
if arr[position] is None:
return position
step = 1
while True:
left_pos = position - step
right_pos = position + step
if right_pos < len(arr) and arr[right_pos] is None:
return right_pos
elif left_pos >= 0 and arr[left_pos] is None:
return left_pos
elif left_pos < 0 and right_pos >= len(arr):
# Both left and right positions are out of bounds
return False
step += 1
def max_distance_list(nums):
num_occurrences = {}
t = len(nums)
# Works on a lit that doesn't contains None
out = [None] * t
# Group data by occurence count
for num in nums:
num_occurrences[num] = num_occurrences.get(num, 0) + 1
num_occurrences = sorted(num_occurrences.items(), key=lambda item: item[1], reverse=True)
# Create a dict from touple list
# grouped_data_dict new format => {10: [3], 4: [1, 5], 2: [44, 4, 6], 1: [7, 8, 9, 11, 123, 22]})
grouped_data = defaultdict(list)
for key, value in num_occurrences:
grouped_data[value].append(key)
start_pos = 0 # start position
it = 0 # iterations
# iterate over the grouped data, beginning from the items that occurr most
for x, y in dict(grouped_data).items():
# iterate over list of items with same occurrences
for z in y:
# calculate the optimal separation between them
sep = (t+1) / x # keep it as a float and round it up later
pos = start_pos; # 0,1,2 increase for each uniq element in array
it += 1;
# Iterate over every occurrence of every element
for i in range(x):
# Write the last occurrence nearest the end as possible
if i == x-1:
pos = t-it;
# get the nearest free position to the oprimal one
free_pos = find_nearest_none(out, int(round(pos,0)))
# write the element in the out array
out[free_pos] = z
# Increment position by optimal (float) separation
pos+=sep;
start_pos+=1;
return out
您可以看到图像中的缺陷率,还可以看到速度
一些测试
[my_func] elements: 2000 -- Tot time: 0.0010275840759277344
[1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 44, 4, 5, 6, 1, 4, 5, 3, 6, 7, 8, 9, 11, 123, 22, 44, 1, 5, 5]
-->
[3, 1, 5, 3, 44, 4, 3, 6, 22, 3, 1, 5, 3, 123, 11, 9, 3, 1, 5, 3, 8, 7, 3, 6, 4, 3, 44, 5, 1, 3]
AABCBAD --> ['A', 'B', 'D', 'A', 'C', 'B', 'A']
AAA --> ['A', 'A', 'A']
AAB --> ['A', 'B', 'A']
AABBC --> ['A', 'B', 'C', 'B', 'A']
AABBBCC --> ['B', 'A', 'C', 'B', 'C', 'A', 'B']
如果我明白你想做什么。 给定一个元素列表,例如 ['A', 'A', 'A', 'B', 'B']
提供一个新列表,以便:
例如从上述输入 ['A', 'B', 'A', 'B', 'A'] 生成输出
下面的代码实现了这个功能
from collections import Counter
def processList(arr: list) -> list:
cd = []
for k,v in Counter(arr).items():
cd.append([k,v])
cd = sorted(cd, key= lambda x: x[1], reverse=True)
rslt = []
while sum([x[1] for x in cd]) > 0:
seq = []
for i in range(len(cd)):
if cd[i][1] > 0:
seq.append(cd[i][0])
cd[i][1] -= 1
rslt.extend(seq)
return rslt
因此使用您的示例作为测试:
processList(['A', 'B', 'A', 'B', 'A'])
产量:
['A', 'B', 'A', 'B', 'A']