我有一个大数据集和一个函数。该函数如下所示:
function myfunc(df)
iter = size(df)[1]
v1 = Vector{Int64}()
v2 = Vector{Int64}()
v3 = Vector{Int64}()
v4 = Vector{Int64}()
v5 = Vector{Int64}()
v6 = Vector{Int64}()
v7 = Vector{Float64}()
v8 = Vector{Int64}()
v9 = Vector{Int64}()
v10 = Vector{Int64}()
v11 = Vector{Int64}()
v12 = Vector{Int64}()
v13 = Vector{Int64}()
v14 = Vector{String}()
v15 = Vector{String}()
v16 = Vector{Int64}()
v17 = Vector{Int64}()
v18 = Vector{Int64}()
v19 = Vector{Int64}()
v20 = Vector{String}()
v21 = Vector{String}()
for i in 1:iter, j in (i+1):iter
if (df[i,:][1] != df[j,:][1]) # not the same person
if (((df[j,:]["StartTime"] < df[i,:]["EndTime"] < df[j,:]["EndTime"]) && (df[i,:]["StartTime"] <= df[j,:]["StartTime"])) || ((df[j,:]["StartTime"] < df[i,:]["StartTime"] < df[j,:]["EndTime"]) && (df[j,:]["EndTime"] < df[i,:]["EndTime"])) || ((df[j,:]["StartTime"] <= df[i,:]["StartTime"]) && (df[i,:]["EndTime"] <= df[j,:]["EndTime"])) || ((df[i,:]["StartTime"] < df[j,:]["StartTime"]) && (df[j,:]["EndTime"] <= df[i,:]["EndTime"])))
l = euclidean([df[i,:]["X"], df[i,:]["Y"]], [df[j,:]["X"], df[j,:]["Y"]])
if l <=50
push!(v1, df[i,:]["Person"])
push!(v2, df[j,:]["Person"])
push!(v3, df[i,:]["StartTime"])
push!(v4, df[i,:]["EndTime"])
push!(v5, df[j,:]["StartTime"])
push!(v6, df[j,:]["EndTime"])
push!(v7, round(l, sigdigits = 3))
push!(v8, df[i,:]["X"])
push!(v9, df[i,:]["Y"])
push!(v10, df[j,:]["X"])
push!(v11, df[j,:]["Y"])
push!(v12, df[i,:]["Link"])
push!(v13, df[j,:]["Link"])
push!(v14, df[i,:]["ActType"])
push!(v15, df[j,:]["ActType"])
push!(v16, df[i,:]["groupindices"])
push!(v17, df[j,:]["groupindices"])
push!(v18, df[i,:]["income"])
push!(v19, df[j,:]["income"])
push!(v20, df[i,:]["home-activity-zone"])
push!(v21, df[j,:]["home-activity-zone"])
i += 1
j += 1
end
end
end
end
return DataFrame(Person1 = Person1, Person2 = Person2, Distance = Distance,
Stime1 = Stime1, Etime1 = Etime1, Stime2 = Stime2, Etime2 = Etime2,
X1 = X1, Y1 = Y1, X2 = X2, Y2 = Y2, Link1 = Link1, Link2 = Link2,
Acttype1 = Acttype1, Acttype2 = Acttype2, Groupindices1 = Groupindices1, Groupindices2 = Groupindices2,
Income1 = Income1, Income2 = Income2, haz1 = haz1, haz2 = haz2)
end
得到结果的运行时间会超过10天,所以我想使用多计算的方式来得到结果。我尝试了多线程,它引发了错误“@threads 当前不支持嵌套外循环”。我读过一些教程,但没有找到使用多重计算运行程序的方法。我想知道一些建议。
你问题的直接答案是你做不到
@threads for i in 1:iter, j in (i+1):iter
因此最简单的方法是仅对一维进行螺纹加工,例如
for i in 1:iter
@threads for j in (i+1):iter
更好的答案可能是你的代码看起来效率很低,而且我也不确定它是否是线程安全的,所以你的时间可能最好花在想出更有效的方法来解决你的问题上,而不是试图将计算投入其中.
我建议使用一些虚拟数据制作一个可运行的示例,并在 JuliaLang Discourse 论坛上寻求帮助以加快速度。