让N
成为一个数字(10<=N<=10^5)
。
我必须将它分成3个数字(x,y,z)
,以便验证以下条件。
1. x<=y<=z
2. x^2+y^2=z^2-1;
3. x+y+z<=N
我必须找到一个方法中给定数字可以得到多少组合。
我尝试了如下,但它需要花费很多时间才能获得更高的数字并导致超时...
int N= Int32.Parse(Console.ReadLine());
List<String> res = new List<string>();
//x<=y<=z
int mxSqrt = N - 2;
int a = 0, b = 0;
for (int z = 1; z <= mxSqrt; z++)
{
a = z * z;
for (int y = 1; y <= z; y++)
{
b = y * y;
for (int x = 1; x <= y; x++)
{
int x1 = b + x * x;
int y1 = a - 1;
if (x1 == y1 && ((x + y + z) <= N))
{
res.Add(x + "," + y + "," + z);
}
}
}
}
Console.WriteLine(res.Count());
我的问题:
我的解决方案是花费更多的时间(我认为这是for循环),我该如何改进它?
对于同样的方法有没有更好的方法?
这里有一个方法,使用如下所述的数论来列举三元组,而不是详尽地测试它们:https://mathoverflow.net/questions/29644/enumerating-ways-to-decompose-an-integer-into-the-sum-of-two-squares
因为数学花了我一段时间来理解并花了一段时间来实现(收集一些在其上面得到赞誉的代码),并且因为我对这个主题没有多大的权威,所以我将留给读者进行研究。这是基于将数字表示为高斯整数共轭。 (a + bi)*(a - bi) = a^2 + b^2
。我们首先将数字z^2 - 1
分解为素数,将素数分解为高斯共轭,并找到我们扩展和简化的不同表达式以获得a + bi
,然后可以将其提升为a^2 + b^2
。
阅读关于Sum of Squares Function的一个振作是发现我们可以排除任何候选z^2 - 1
包含具有奇怪力量的4k + 3
形式的素数。单独使用该检查,我能够使用下面的Rosetta素因子代码将Prune的循环从210秒减少到19秒(在repl.it上)。
这里的实现只是一个演示。它没有处理或优化限制x
和y
。相反,它只是枚举它。和它一起玩here。
Python代码:
# https://math.stackexchange.com/questions/5877/efficiently-finding-two-squares-which-sum-to-a-prime
def mods(a, n):
if n <= 0:
return "negative modulus"
a = a % n
if (2 * a > n):
a -= n
return a
def powmods(a, r, n):
out = 1
while r > 0:
if (r % 2) == 1:
r -= 1
out = mods(out * a, n)
r /= 2
a = mods(a * a, n)
return out
def quos(a, n):
if n <= 0:
return "negative modulus"
return (a - mods(a, n))/n
def grem(w, z):
# remainder in Gaussian integers when dividing w by z
(w0, w1) = w
(z0, z1) = z
n = z0 * z0 + z1 * z1
if n == 0:
return "division by zero"
u0 = quos(w0 * z0 + w1 * z1, n)
u1 = quos(w1 * z0 - w0 * z1, n)
return(w0 - z0 * u0 + z1 * u1,
w1 - z0 * u1 - z1 * u0)
def ggcd(w, z):
while z != (0,0):
w, z = z, grem(w, z)
return w
def root4(p):
# 4th root of 1 modulo p
if p <= 1:
return "too small"
if (p % 4) != 1:
return "not congruent to 1"
k = p/4
j = 2
while True:
a = powmods(j, k, p)
b = mods(a * a, p)
if b == -1:
return a
if b != 1:
return "not prime"
j += 1
def sq2(p):
if p % 4 != 1:
return "not congruent to 1 modulo 4"
a = root4(p)
return ggcd((p,0),(a,1))
# https://rosettacode.org/wiki/Prime_decomposition#Python:_Using_floating_point
from math import floor, sqrt
def fac(n):
step = lambda x: 1 + (x<<2) - ((x>>1)<<1)
maxq = long(floor(sqrt(n)))
d = 1
q = n % 2 == 0 and 2 or 3
while q <= maxq and n % q != 0:
q = step(d)
d += 1
return q <= maxq and [q] + fac(n//q) or [n]
# My code...
# An answer for https://stackoverflow.com/questions/54110614/
from collections import Counter
from itertools import product
from sympy import I, expand, Add
def valid(ps):
for (p, e) in ps.items():
if (p % 4 == 3) and (e & 1):
return False
return True
def get_sq2(p, e):
if p == 2:
if e & 1:
return [2**(e / 2), 2**(e / 2)]
else:
return [2**(e / 2), 0]
elif p % 4 == 3:
return [p, 0]
else:
a,b = sq2(p)
return [abs(a), abs(b)]
def get_terms(cs, e):
if e == 1:
return [Add(cs[0], cs[1] * I)]
res = [Add(cs[0], cs[1] * I)**e]
for t in xrange(1, e / 2 + 1):
res.append(
Add(cs[0] + cs[1]*I)**(e-t) * Add(cs[0] - cs[1]*I)**t)
return res
def get_lists(ps):
items = ps.items()
lists = []
for (p, e) in items:
if p == 2:
a,b = get_sq2(2, e)
lists.append([Add(a, b*I)])
elif p % 4 == 3:
a,b = get_sq2(p, e)
lists.append([Add(a, b*I)**(e / 2)])
else:
lists.append(get_terms(get_sq2(p, e), e))
return lists
def f(n):
for z in xrange(2, n / 2):
zz = (z + 1) * (z - 1)
ps = Counter(fac(zz))
is_valid = valid(ps)
if is_valid:
print "valid (does not contain a prime of form\n4k + 3 with an odd power)"
print "z: %s, primes: %s" % (z, dict(ps))
lists = get_lists(ps)
cartesian = product(*lists)
for element in cartesian:
print "prime square decomposition: %s" % list(element)
p = 1
for item in element:
p *= item
print "complex conjugates: %s" % p
vals = p.expand(complex=True, evaluate=True).as_coefficients_dict().values()
x, y = vals[0], vals[1] if len(vals) > 1 else 0
print "x, y, z: %s, %s, %s" % (x, y, z)
print "x^2 + y^2, z^2-1: %s, %s" % (x**2 + y**2, z**2 - 1)
print ''
if __name__ == "__main__":
print f(100)
输出:
valid (does not contain a prime of form
4k + 3 with an odd power)
z: 3, primes: {2: 3}
prime square decomposition: [2 + 2*I]
complex conjugates: 2 + 2*I
x, y, z: 2, 2, 3
x^2 + y^2, z^2-1: 8, 8
valid (does not contain a prime of form
4k + 3 with an odd power)
z: 9, primes: {2: 4, 5: 1}
prime square decomposition: [4, 2 + I]
complex conjugates: 8 + 4*I
x, y, z: 8, 4, 9
x^2 + y^2, z^2-1: 80, 80
valid (does not contain a prime of form
4k + 3 with an odd power)
z: 17, primes: {2: 5, 3: 2}
prime square decomposition: [4 + 4*I, 3]
complex conjugates: 12 + 12*I
x, y, z: 12, 12, 17
x^2 + y^2, z^2-1: 288, 288
valid (does not contain a prime of form
4k + 3 with an odd power)
z: 19, primes: {2: 3, 3: 2, 5: 1}
prime square decomposition: [2 + 2*I, 3, 2 + I]
complex conjugates: (2 + I)*(6 + 6*I)
x, y, z: 6, 18, 19
x^2 + y^2, z^2-1: 360, 360
valid (does not contain a prime of form
4k + 3 with an odd power)
z: 33, primes: {17: 1, 2: 6}
prime square decomposition: [4 + I, 8]
complex conjugates: 32 + 8*I
x, y, z: 32, 8, 33
x^2 + y^2, z^2-1: 1088, 1088
valid (does not contain a prime of form
4k + 3 with an odd power)
z: 35, primes: {17: 1, 2: 3, 3: 2}
prime square decomposition: [4 + I, 2 + 2*I, 3]
complex conjugates: 3*(2 + 2*I)*(4 + I)
x, y, z: 18, 30, 35
x^2 + y^2, z^2-1: 1224, 1224
这是Python的一个简单改进(转换为基于C语言的代码中更快的等价物留给读者练习)。为了获得准确的计算时间,我删除了自己打印解决方案(在之前的运行中验证它们之后)。
z
),仅受其与N
的关系约束。y
)。时间结果:
-------------------- 10
1 solutions found in 2.3365020751953125e-05 sec.
-------------------- 100
6 solutions found in 0.00040078163146972656 sec.
-------------------- 1000
55 solutions found in 0.030081748962402344 sec.
-------------------- 10000
543 solutions found in 2.2078349590301514 sec.
-------------------- 100000
5512 solutions found in 214.93411707878113 sec.
对于大案例,这是3:35,加上你整理和/或打印结果的时间。
如果你需要更快的代码(这仍然是相当强大的力量),在给定(y, x)
的目标值的情况下,研究Diophantine方程和参数化以生成z^2 - 1
对。
import math
import time
def break3(N):
"""
10 <= N <= 10^5
return x, y, z triples such that:
x <= y <= z
x^2 + y^2 = z^2 - 1
x + y + z <= N
"""
"""
Observations:
z <= x + y
z < N/2
"""
count = 0
z_limit = N // 2
for z in range(3, z_limit):
# Since y >= x, there's a lower bound on y
target = z*z - 1
ymin = int(math.sqrt(target/2))
for y in range(ymin, z):
# Given y and z, compute x.
# That's a solution iff x is integer.
x_target = target - y*y
x = int(math.sqrt(x_target))
if x*x == x_target and x+y+z <= N:
# print("solution", x, y, z)
count += 1
return count
test = [10, 100, 1000, 10**4, 10**5]
border = "-"*20
for case in test:
print(border, case)
start = time.time()
print(break3(case), "solutions found in", time.time() - start, "sec.")
x
和y
的界限是问题的重要组成部分。我亲自去了this Wolfram Alpha query并检查了变量的确切形式。
感谢@ Bleep-Bloop和评论,发现了一个非常优雅的绑定优化,即x < n
和x <= y < n - x
。结果是相同的,时间几乎相同。
此外,由于x
和y
的唯一可能值是正整数,我们可以将循环迭代的数量减少一半。
为了进一步优化,由于我们计算了x
的上界,我们为x
建立了所有可能值的列表并使计算平行。这样可以在更高的N
值上节省大量时间,但由于并行化的开销,它对较小的值来说有点慢。
这是最终的代码:
非并行版本,int
值:
List<string> res = new List<string>();
int n2 = n * n;
double maxX = 0.5 * (2.0 * n - Math.Sqrt(2) * Math.Sqrt(n2 + 1));
for (int x = 2; x < maxX; x += 2)
{
int maxY = (int)Math.Floor((n2 - 2.0 * n * x - 1.0) / (2.0 * n - 2.0 * x));
for (int y = x; y <= maxY; y += 2)
{
int z2 = x * x + y * y + 1;
int z = (int)Math.Sqrt(z2);
if (z * z == z2 && x + y + z <= n)
res.Add(x + "," + y + "," + z);
}
}
并行版本,long
值:
using System.Linq;
...
// Use ConcurrentBag for thread safety
ConcurrentBag<string> res = new ConcurrentBag<string>();
long n2 = n * n;
double maxX = 0.5 * (2.0 * n - Math.Sqrt(2) * Math.Sqrt(n2 + 1L));
// Build list to parallelize
int nbX = Convert.ToInt32(maxX);
List<int> xList = new List<int>();
for (int x = 2; x < maxX; x += 2)
xList.Add(x);
Parallel.ForEach(xList, x =>
{
int maxY = (int)Math.Floor((n2 - 2.0 * n * x - 1.0) / (2.0 * n - 2.0 * x));
for (long y = x; y <= maxY; y += 2)
{
long z2 = x * x + y * y + 1L;
long z = (long)Math.Sqrt(z2);
if (z * z == z2 && x + y + z <= n)
res.Add(x + "," + y + "," + z);
}
});
当在i5-8400 CPU上单独运行时,我得到以下结果:
N:10;解决方案:1;经过的时间:0.03毫秒(不平行,
int
)N:100;解决方案:6;经过的时间:0.05毫秒(不平行,
int
)N:1000;解决方案:55;经过的时间:0.3毫秒(不平行,
int
)N:10000;解决方案:543;经过的时间:13.1毫秒(不平行,
int
)N:100000;解决方案:5512;时间流逝:849.4毫秒(平行,
long
)
当long
大于36340时,你必须使用N
,因为当它的平方时,它会溢出int
的最大值。最后,当N
大约23000时,并行版本开始变得比简单版本更好,使用int
s。
没有时间对它进行适当的测试,但似乎产生与您的代码相同的结果(在100 - > 6结果和1000 - > 55结果)。
与N=1000
一起2ms
对你的144ms
也没有列表
和N=10000
在28ms
的时间
var N = 1000;
var c = 0;
for (int x = 2; x < N; x+=2)
{
for (int y = x; y < (N - x); y+=2)
{
long z2 = x * x + y * y + 1;
int z = (int) Math.Sqrt(z2);
if (x + y + z > N)
break;
if (z * z == z2)
c++;
}
}
Console.WriteLine(c);
#include<iostream>
#include<math.h>
int main()
{
int N = 10000;
int c = 0;
for (int x = 2; x < N; x+=2)
{
for (int y = x; y < (N - x); y+=2)
{
auto z = sqrt(x * x + y * y + 1);
if(x+y+z>N){
break;
}
if (z - (int) z == 0)
{
c++;
}
}
}
std::cout<<c;
}
这是我的解决方案。在测试此问题的先前解决方案时,我发现x,y总是偶数,z是奇数。我不知道这背后的数学本质,我目前正试图弄清楚这一点。
我想在C#中完成它,它应该根据问题中提供的条件覆盖所有测试用例。
基本代码,转换为long
来处理N <= 100000
上限,我可以抛出每一个优化。我使用来自@ Mat(+1)Wolfram Alpha查询的备用表单来尽可能地预先计算。我还做了一个最小的完美平方测试,以避免数百万的sqrt()
呼叫在上限:
public static void Main()
{
int c = 0;
long N = long.Parse(Console.ReadLine());
long N_squared = N * N;
double half_N_squared = N_squared / 2.0 - 0.5;
double x_limit = N - Math.Sqrt(2) / 2.0 * Math.Sqrt(N_squared + 1);
for (long x = 2; x < x_limit; x += 2)
{
long x_squared = x * x + 1;
double y_limit = (half_N_squared - N * x) / (N - x);
for (long y = x; y < y_limit; y += 2)
{
long z_squared = x_squared + y * y;
int digit = (int) z_squared % 10;
if (digit == 3 || digit == 7)
{
continue; // minimalist non-perfect square elimination
}
long z = (long) Math.Sqrt(z_squared);
if (z * z == z_squared)
{
c++;
}
}
}
Console.WriteLine(c);
}
我按照趋势并忽略了OP代码暗示的“退化解决方案”,但没有明确说明。