我正在开发一个 C 程序来解释可能包含多层括号的化学公式,并且我在正确解释这些公式方面面临困难。目标是根据提供的公式将每个原子与一个变量相关联。
我使用下面的代码来处理具有不同复杂程度的化学公式。例如,对于公式
{"2F2(SO4)3", 'A'}
,处理是正确的;然而,对于{"Na(H2(SO3)4)5", 'B'}
,解释是不正确的。 {"Na(H2(SO3)4)5", 'B'}
的预期结果应该是 Na + H10 + S20 + O60
,但结果是 Na + H10 + S10 + O15
,表明嵌套括号的处理未按预期运行。(H2(SO3)4)5
中,它将 'O3'
乘以 5,而不是乘以 4
,然后再乘以 5
。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct {
char symbol[3]; // Atom symbol (e.g., "H", "O")
} Atom;
typedef struct {
char term[50]; // Formula term (e.g., "2H2O", "3CO2")
char variable; // Variable associated with the term
} Association;
void printVariableAtomTable(Atom *atoms, int numAtoms, Association *terms, int numTerms) {
// Print table header
printf("\nTable of Association between Variables and Elements:\n");
printf("Variable: ");
for (int i = 0; i < numTerms; i++) {
printf("%c ", terms[i].variable);
}
printf("\n");
// Create matrix to store the quantity of each atom associated with each variable
int **table = (int **)malloc(numAtoms * sizeof(int *));
if (table == NULL) {
printf("Error: Failed to allocate memory for table.\n");
exit(1);
}
for (int i = 0; i < numAtoms; i++) {
table[i] = (int *)calloc(numTerms, sizeof(int));
if (table[i] == NULL) {
printf("Error: Failed to allocate memory for table.\n");
exit(1);
}
}
// Fill the table with the quantity of each atom associated with each variable
for (int j = 0; j < numTerms; j++) {
char *term = terms[j].term;
int termCoefficient = 1;
int multiplier = 1;
// Check if there is a numeric coefficient associated with the term (if any)
char *coeffEnd = strchr(term, '(');
if (coeffEnd != NULL) {
sscanf(coeffEnd + 1, "%d", &termCoefficient);
}
int k = 0;
while (term[k] != '\0') {
if (isdigit(term[k])) {
multiplier = term[k] - '0'; // Convert the numeric char to integer
k++;
continue;
}
if (isupper(term[k])) {
char symbol[3] = { term[k], '\0' };
int m = k + 1;
while (term[m] != '\0' && islower(term[m])) {
strncat(symbol, &term[m], 1);
m++;
}
int elementCoefficient = 1;
if (term[m] != '\0' && isdigit(term[m])) {
elementCoefficient = term[m] - '0';
m++;
}
int elementIndex = -1;
for (int n = 0; n < numAtoms; n++) {
if (strcmp(atoms[n].symbol, symbol) == 0) {
elementIndex = n;
break;
}
}
if (elementIndex != -1) {
table[elementIndex][j] += elementCoefficient * multiplier * termCoefficient;
}
k = m;
} else if (term[k] == '(') {
// Start of a group within parentheses
int start = k + 1;
int depth = 1;
int end = start;
// Find the end of the group within parentheses
while (term[end] != '\0' && depth > 0) {
if (term[end] == '(') {
depth++;
} else if (term[end] == ')') {
depth--;
}
end++;
}
// Process the group within parentheses
int groupCoefficient = 1;
if (term[end] != '\0' && isdigit(term[end])) {
sscanf(&term[end], "%d", &groupCoefficient);
}
int innerCoefficient = 1;
int n = start;
while (n < end) {
if (isupper(term[n])) {
char groupSymbol[3] = { term[n], '\0' };
int m = n + 1;
while (term[m] != '\0' && islower(term[m])) {
strncat(groupSymbol, &term[m], 1);
m++;
}
int groupIndex = -1;
for (int a = 0; a < numAtoms; a++) {
if (strcmp(atoms[a].symbol, groupSymbol) == 0) {
groupIndex = a;
break;
}
}
if (groupIndex != -1) {
if (term[m] != '\0' && isdigit(term[m])) {
sscanf(&term[m], "%d", &innerCoefficient);
while (term[m] != '\0' && isdigit(term[m])) {
m++;
}
}
table[groupIndex][j] += termCoefficient * innerCoefficient * groupCoefficient * multiplier;
}
n = m;
} else {
n++;
}
}
k = end;
} else {
k++;
}
}
}
// Print the table of association between variables and elements
for (int i = 0; i < numAtoms; i++) {
printf("%s: ", atoms[i].symbol);
for (int j = 0; j < numTerms; j++) {
if (table[i][j] != 0) {
if (table[i][j] == 1) {
printf("%c ", terms[j].variable);
} else {
printf("%d%c ", table[i][j], terms[j].variable);
}
} else {
printf("0%c ", terms[j].variable);
}
}
printf("\n");
}
// Free allocated memory for the table
for (int i = 0; i < numAtoms; i++) {
free(table[i]);
}
free(table);
}
int main() {
// Example input data (atoms and terms)
Atom atoms[] = { {"F"}, {"O"}, {"S"}, {"H"}, {"Na"} };
Association terms[] = { {"2F2(SO4)3", 'A'}, {"Na(H2(SO3)4)5", 'B'} };
int numAtoms = sizeof(atoms) / sizeof(Atom);
int numTerms = sizeof(terms) / sizeof(Association);
// Function call
printVariableAtomTable(atoms, numAtoms, terms, numTerms);
return 0;
}
结果
Table of Association between Variables and Elements:
Variable: A B
F: 4A 0B
O: 24A 15B
S: 6A 10B
H: 0A 10B
Na: 0A B
如何修改代码以纠正具有多层括号的公式的解释?
是否有更有效的方法来处理具有不同复杂性(包括嵌套括号)的化学式的分析?
我感谢任何解决这个公式解释问题的帮助或建议。谢谢!
一个大问题是我们需要同时做所有事情的想法。 克服这一点。将问题分解为更简单的步骤是可以的, 每一步都会让您更接近想要的结果。
让我们看看问题项:
Na(H2(SO3)4)5
我将做一个小的改变(稍后说明):
S2(H2(SO3)4)5
在下文中,我将使用术语“list”来指代您用来跟踪解析的(元素、计数)值的任何结构。我想到的两个不错的选择是数组(静态即可)或链接列表(单链接就足够了)。请记住,列表必须是有序的,您必须能够向其中添加元素,并对其进行索引/遍历。
⟶我自己的实现使用了单链表,但静态数组可能是最简单的。
此外,我不太担心表示列表中项目的存储方式。我就写,例如
O2
表示"O"
的元素值和2
的计数值。存储元素的方式完全取决于您。
⟶我将原子符号转换为相应的原子序数并将其存储为整数。这对于您的任务来说可能有点过分了,但是非常简单又有趣,并且使以后处理它变得更加简单。
让我们想象一下示例问题:
// This is the list of elements we are collecting.
// It is COMMON to all invocations of the function!
// (You could make it a global variable, or just pass a
// reference to it as argument to your parsing function.)
list = {}
Invoke your parsing function.
[invocation 0]
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found Sulfur
list = { S1 }
Notice how we add BOTH the element AND an automatic count of 1 to the list?
We can update that count as necessary.
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found a subscript
this is where you update the count of the last item in the list
list = { S2 }
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found an open parenthesis
make note of how long your list is
RECURSE on your parsing function to handle stuff inside
list = { S2 }
↑0
[invocation 1]
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found Hydrogen
list = { S2, H1 }
↑0
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found subscript
update last item
list = { S2, H2 }
↑0
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found another open parenthesis
make note of list length and recurse again
list = { S2, H2 }
↑0 ↑1
[invocation 2]
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found Sulfur
list = { S2, H2, S1 }
↑0 ↑1
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ (no subscript == do nothing)
list = { S2, H2, S1 }
↑0 ↑1
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found Oxygen
list = { S2, H2, S1, O1 }
↑0 ↑1
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found subscript
update last item
list = { S2, H2, S1, O3 }
↑0 ↑1
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found close parenthesis
return!
list = { S2, H2, S1, O3 }
↑0 ↑1
[back to invocation 1]
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found subscript
update all elements STARTING at pre-recursion list length
list = { S2, H2, S1, O3 }
↑0 ↑1
list = { S2, H2, S1*4, O3 }
↑0 ↑1
list = { S2, H2, S4, O3*4 }
↑0 ↑1
list = { S2, H2, S4, O12 }
↑0
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found close parenthesis
return!
list = { S2, H2, S4, O12 }
↑0
[back to invocation 0]
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ found subscript
update all elements starting at pre-recursion list length
list = { S2, H2, S4, O12 }
↑0
list = { S2, H2*5, S4, O12 }
↑0
list = { S2, H10, S4*5, O12 }
↑0
list = { S2, H10, S20, O12*5 }
↑0
list = { S2, H10, S20, O60 }
"S2 ( H2 ( S O3 ) 4 ) 5"
↑ end of input; all done!
list = { S2, H10, S20, O60 }
解析函数不需要以递归方式编写。你完全可以做一个迭代版本。每次发现左括号时,这都需要一个额外的列表来跟踪列表长度。
还有一步
S2 H10 S20 O60
主要
int main()
{
// s <-- ask user for chemical formula to parse
char s[1000];
{
printf( "formula? " );
fflush( stdout );
fgets( s, sizeof(s), stdin );
char * p = strchr( s, '\n' );
if (p) *p = '\0';
}
// here is the list of (element,count) pairs we will parse out of `s`
#define MAX_LIST_SIZE 1000
element list[MAX_LIST_SIZE];
int size = 0;
// first pass (recursive function that does the parentheses)
parse_elements( s, list, &size, MAX_LIST_SIZE );
// second pass (add duplicate elements)
simplify_list( list, &size, MAX_LIST_SIZE );
// now we can print out our list
if (size)
{
printf( "%s%d", list[0].element, list[0].count );
for (int n = 1; n < size; n++)
printf( " + %s%d", list[0].element, list[0].count );
printf( "\n" );
}
}