我目前正在解决Project Euler的第37个问题(“截断素数”)。本质上,该任务涉及识别 11 个质数,这些数具有独特的属性,即当从左侧或右侧删除任何数字时,所得数字仍然是质数。例如,3797 就是这样一个素数,因为从左侧或右侧顺序删除数字(797、97、7、379、37、3)会产生素数。注意:对于这个特定问题,2、3、5、7 不被视为可截断。
最初,我用 Python 实现了该算法,并且它在不到一秒的时间内高效执行:
l = []
p = {"","2","3","5","7"}
i = 11
while len(l) < 11:
if all(i%j for j in range(2, int(i**0.5) + 1)):
n = str(i)
p.add(n)
if all(n[:j] in p and n[j:] in p for j in range(1, len(n))):
l.append(n)
print(l)
i += 2
print(sum(int(i) for i in l))
随后,我尝试在 C 中实现相同的算法。由于 C 中没有集合数据类型,我选择了带有二分查找的链表。虽然实现是有效的,但它表现出相当慢的速度,运行了大约一分钟。我正在寻求对 C 代码总体设计和结构中潜在缺陷的见解:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#define TRUNCATABLE 11
struct Node {
int val;
struct Node *next;
struct Node *previous;
};
bool search(struct Node *head, const int a, int listLength) {
struct Node *current = head;
int pos = 0;
int left = 0;
int right = listLength;
while (current != NULL && left < right) {
int mid = (left + right) / 2;
while (pos < mid) {
current = current->next;
pos++;
}
while (pos > mid) {
current = current->previous;
pos--;
}
if (current->val < a) {
left = mid + 1;
} else if (current->val > a) {
right = mid;
} else {
return true;
}
}
return false;
}
void freeLinkedList(struct Node *head) {
while (head != NULL) {
struct Node *temp = head;
head = head->next;
free(temp);
}
}
int main() {
struct Node *node2 = malloc(sizeof(struct Node));
struct Node *node3 = malloc(sizeof(struct Node));
struct Node *node5 = malloc(sizeof(struct Node));
struct Node *node7 = malloc(sizeof(struct Node));
node2->next = node3;
node3->next = node5;
node5->next = node7;
node7->next = NULL;
node2->previous = NULL;
node3->previous = node2;
node5->previous = node3;
node7->previous = node5;
node2->val = 2;
node3->val = 3;
node5->val = 5;
node7->val = 7;
struct Node *last = node7;
struct Node *first = node2;
int listLength = 4;
int sum = 0;
for (int i = 0, p = 11; i < TRUNCATABLE; p += 2) {
int n = p;
bool is_prime = true;
for (int j = 2; j * j <= n; j++) {
if (n % j == 0) {
is_prime = false;
break;
}
}
bool is_trunc_prime = true;
if (is_prime) {
struct Node *new_node = malloc(sizeof(struct Node));
last->next = new_node;
new_node->previous = last;
new_node->val = p;
new_node->next = NULL;
last = new_node;
listLength++;
}
int digits = log10(n);
while (is_trunc_prime && n > 10) {
n /= 10;
if (!search(first, n)) {
is_trunc_prime = false;
break;
}
}
n = p;
while (is_trunc_prime && digits > 0) {
n = n % (int)pow(10, digits);
if (!search(first, n)) {
is_trunc_prime = false;
break;
}
digits--;
}
if (is_prime && is_trunc_prime) {
sum += p;
i++;
}
}
printf("%d\n", sum);
freeLinkedList(node2);
return 0;
}
任何关于提高 C 实现性能的建议或批评都值得赞赏。预先感谢!
由于您的搜索只能使用
current = current->next;
和 current = current->previous;
等语句遍历链表,因此没有什么比 线性 搜索做得更好的了。 二元搜索的特点是您可以跳转到某个位置,而无需访问中间位置。这是链表无法达到的效率。
我建议使用一种方法,用筛子(即指示索引是否是集合中的数字的布尔数组)替换 Python 代码中的 set-idea。您需要分配一个足够大的数组来存储所有自然数的布尔值,直到某个合理的限制。在这里,我们期望找到最多 6 位十进制数字的结果,因此分配一百万个条目(字节)就足够了。
这是这个想法的实现:
#include <stdio.h>
// Must be a power of 10
#define SIEVE_SIZE 1000000
#define RESULT_SIZE 11
int main(void) {
char sieve[SIEVE_SIZE];
// Get all primes < SIEVE_SIZE: naive Eratosthenes sieve
sieve[0] = sieve[1] = 1; // Not primes
for (int i = 2; i < SIEVE_SIZE; i++) {
if (!sieve[i]) { // Is prime?
for (int j = i+i; j < SIEVE_SIZE; j += i) {
sieve[j] = 1; // Not prime
}
}
}
// Main logic
int sum = 0;
int k = 0;
for (int i = 11; i < SIEVE_SIZE; i++) {
// Check for numbers with digits removed at the right side (including number itself)
int j = i;
while (j > 0 && !sieve[j]) {
j = j / 10;
}
if (j) continue;
// Check for numbers with digits removed at the left side
j = SIEVE_SIZE / 10;
while (j > 1 && !sieve[i % j]) {
j = j / 10;
}
if (j > 1) continue;
// Bingo
printf("%d ", i);
sum += i;
if (k == RESULT_SIZE) {
break;
}
}
printf("\nsum = %d\n", sum);
return 0;
}
打印:
23 37 53 73 313 317 373 797 3137 3797 739397
sum = 748317