解释包含多层括号的方程的问题

问题描述 投票:0回答:1

我正在开发一个 C 程序来解释可能包含多层括号的化学公式,并且我在正确解释这些公式方面面临困难。目标是根据提供的公式将每个原子与一个变量相关联。
我使用下面的代码来处理具有不同复杂程度的化学公式。例如,对于公式

{"2F2(SO4)3", 'A'}
,处理是正确的;然而,对于
{"Na(H2(SO3)4)5", 'B'}
,解释是不正确的。
{"Na(H2(SO3)4)5", 'B'}
的预期结果应该是
Na + H10 + S20 + O60
,但结果是
Na + H10 + S10 + O15
,表明嵌套括号的处理未按预期运行。
根据我的观察,逻辑是将最里面的括号乘以最外面的括号的因子。例如,在
(H2(SO3)4)5
中,它将
'O3'
乘以 5,而不是乘以
4
,然后再乘以
5

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef struct {
    char symbol[3];  // Atom symbol (e.g., "H", "O")
} Atom;

typedef struct {
    char term[50];    // Formula term (e.g., "2H2O", "3CO2")
    char variable;    // Variable associated with the term
} Association;

void printVariableAtomTable(Atom *atoms, int numAtoms, Association *terms, int numTerms) {
    // Print table header
    printf("\nTable of Association between Variables and Elements:\n");
    printf("Variable: ");
    for (int i = 0; i < numTerms; i++) {
        printf("%c  ", terms[i].variable);
    }
    printf("\n");

    // Create matrix to store the quantity of each atom associated with each variable
    int **table = (int **)malloc(numAtoms * sizeof(int *));
    if (table == NULL) {
        printf("Error: Failed to allocate memory for table.\n");
        exit(1);
    }

    for (int i = 0; i < numAtoms; i++) {
        table[i] = (int *)calloc(numTerms, sizeof(int));
        if (table[i] == NULL) {
            printf("Error: Failed to allocate memory for table.\n");
            exit(1);
        }
    }

    // Fill the table with the quantity of each atom associated with each variable
    for (int j = 0; j < numTerms; j++) {
        char *term = terms[j].term;
        int termCoefficient = 1;
        int multiplier = 1;

        // Check if there is a numeric coefficient associated with the term (if any)
        char *coeffEnd = strchr(term, '(');
        if (coeffEnd != NULL) {
            sscanf(coeffEnd + 1, "%d", &termCoefficient);
        }

        int k = 0;
        while (term[k] != '\0') {
            if (isdigit(term[k])) {
                multiplier = term[k] - '0'; // Convert the numeric char to integer
                k++;
                continue;
            }

            if (isupper(term[k])) {
                char symbol[3] = { term[k], '\0' };
                int m = k + 1;
                while (term[m] != '\0' && islower(term[m])) {
                    strncat(symbol, &term[m], 1);
                    m++;
                }

                int elementCoefficient = 1;
                if (term[m] != '\0' && isdigit(term[m])) {
                    elementCoefficient = term[m] - '0';
                    m++;
                }

                int elementIndex = -1;
                for (int n = 0; n < numAtoms; n++) {
                    if (strcmp(atoms[n].symbol, symbol) == 0) {
                        elementIndex = n;
                        break;
                    }
                }

                if (elementIndex != -1) {
                    table[elementIndex][j] += elementCoefficient * multiplier * termCoefficient;
                }

                k = m;
            } else if (term[k] == '(') {
                // Start of a group within parentheses
                int start = k + 1;
                int depth = 1;
                int end = start;

                // Find the end of the group within parentheses
                while (term[end] != '\0' && depth > 0) {
                    if (term[end] == '(') {
                        depth++;
                    } else if (term[end] == ')') {
                        depth--;
                    }
                    end++;
                }

                // Process the group within parentheses
                int groupCoefficient = 1;
                if (term[end] != '\0' && isdigit(term[end])) {
                    sscanf(&term[end], "%d", &groupCoefficient);
                }

                int innerCoefficient = 1;
                int n = start;
                while (n < end) {
                    if (isupper(term[n])) {
                        char groupSymbol[3] = { term[n], '\0' };
                        int m = n + 1;
                        while (term[m] != '\0' && islower(term[m])) {
                            strncat(groupSymbol, &term[m], 1);
                            m++;
                        }

                        int groupIndex = -1;
                        for (int a = 0; a < numAtoms; a++) {
                            if (strcmp(atoms[a].symbol, groupSymbol) == 0) {
                                groupIndex = a;
                                break;
                            }
                        }

                        if (groupIndex != -1) {
                            if (term[m] != '\0' && isdigit(term[m])) {
                                sscanf(&term[m], "%d", &innerCoefficient);
                                while (term[m] != '\0' && isdigit(term[m])) {
                                    m++;
                                }
                            }
                            table[groupIndex][j] += termCoefficient * innerCoefficient * groupCoefficient * multiplier;
                        }

                        n = m;
                    } else {
                        n++;
                    }
                }

                k = end;
            } else {
                k++;
            }
        }
    }

    // Print the table of association between variables and elements
    for (int i = 0; i < numAtoms; i++) {
        printf("%s: ", atoms[i].symbol);
        for (int j = 0; j < numTerms; j++) {
            if (table[i][j] != 0) {
                if (table[i][j] == 1) {
                    printf("%c  ", terms[j].variable);
                } else {
                    printf("%d%c  ", table[i][j], terms[j].variable);
                }
            } else {
                printf("0%c  ", terms[j].variable);
            }
        }
        printf("\n");
    }

    // Free allocated memory for the table
    for (int i = 0; i < numAtoms; i++) {
        free(table[i]);
    }
    free(table);
}

int main() {
    // Example input data (atoms and terms)
    Atom atoms[] = { {"F"}, {"O"}, {"S"}, {"H"}, {"Na"} };
    Association terms[] = { {"2F2(SO4)3", 'A'}, {"Na(H2(SO3)4)5", 'B'} };
    int numAtoms = sizeof(atoms) / sizeof(Atom);
    int numTerms = sizeof(terms) / sizeof(Association);

    // Function call
    printVariableAtomTable(atoms, numAtoms, terms, numTerms);

    return 0;
}

结果

Table of Association between Variables and Elements:
Variable: A  B  
F: 4A  0B  
O: 24A  15B  
S: 6A  10B  
H: 0A  10B  
Na: 0A  B  

如何修改代码以纠正具有多层括号的公式的解释?
是否有更有效的方法来处理具有不同复杂性(包括嵌套括号)的化学式的分析?
我感谢任何解决这个公式解释问题的帮助或建议。谢谢!

c
1个回答
0
投票

一个大问题是我们需要同时做所有事情的想法。 克服这一点。将问题分解为更简单的步骤是可以的, 每一步都会让您更接近想要的结果。

让我们看看问题项:

Na(H2(SO3)4)5

我将做一个小的改变(稍后说明):

S2(H2(SO3)4)5

词汇和数据表示

在下文中,我将使用术语“list”来指代您用来跟踪解析的(元素、计数)值的任何结构。我想到的两个不错的选择是数组(静态即可)或链接列表(单链接就足够了)。请记住,列表必须是有序的,您必须能够向其中添加元素,并对其进行索引/遍历。

⟶我自己的实现使用了单链表,但静态数组可能是最简单的。

此外,我不太担心表示列表中项目的存储方式。我就写,例如

O2
表示
"O"
的元素值和
2
的计数值。存储元素的方式完全取决于您。

我将原子符号转换为相应的原子序数并将其存储为整数。这对于您的任务来说可能有点过分了,但是非常简单又有趣,并且使以后处理它变得更加简单。

问题:括号⟶递归思维

让我们想象一下示例问题:

// This is the list of elements we are collecting.
// It is COMMON to all invocations of the function!
// (You could make it a global variable, or just pass a
//  reference to it as argument to your parsing function.)
list = {}

Invoke your parsing function.

[invocation 0]
  "S2 ( H2 ( S O3 ) 4 ) 5"
   ↑ found Sulfur
  list = { S1 }
    Notice how we add BOTH the element AND an automatic count of 1 to the list?
    We can update that count as necessary.

  "S2 ( H2 ( S O3 ) 4 ) 5"
    ↑ found a subscript
      this is where you update the count of the last item in the list
  list = { S2 }

  "S2 ( H2 ( S O3 ) 4 ) 5"
      ↑ found an open parenthesis
        make note of how long your list is
        RECURSE on your parsing function to handle stuff inside
  list = { S2 }
               ↑0

[invocation 1]
  "S2 ( H2 ( S O3 ) 4 ) 5"
        ↑ found Hydrogen
  list = { S2, H1 }
               ↑0

  "S2 ( H2 ( S O3 ) 4 ) 5"
         ↑ found subscript
           update last item
  list = { S2, H2 }
               ↑0

  "S2 ( H2 ( S O3 ) 4 ) 5"
           ↑ found another open parenthesis
             make note of list length and recurse again
  list = { S2, H2 }
               ↑0  ↑1

[invocation 2]
  "S2 ( H2 ( S O3 ) 4 ) 5"
             ↑ found Sulfur
  list = { S2, H2, S1 }
               ↑0  ↑1

  "S2 ( H2 ( S O3 ) 4 ) 5"
              ↑ (no subscript == do nothing)
  list = { S2, H2, S1 }
               ↑0  ↑1

  "S2 ( H2 ( S O3 ) 4 ) 5"
               ↑ found Oxygen
  list = { S2, H2, S1, O1 }
               ↑0  ↑1

  "S2 ( H2 ( S O3 ) 4 ) 5"
                ↑ found subscript
                  update last item
  list = { S2, H2, S1, O3 }
               ↑0  ↑1

  "S2 ( H2 ( S O3 ) 4 ) 5"
                  ↑ found close parenthesis
                    return!
  list = { S2, H2, S1, O3 }
               ↑0  ↑1

[back to invocation 1]
  "S2 ( H2 ( S O3 ) 4 ) 5"
                    ↑ found subscript
                      update all elements STARTING at pre-recursion list length
  list = { S2, H2, S1, O3 }
               ↑0  ↑1
  list = { S2, H2, S1*4, O3 }
               ↑0  ↑1
  list = { S2, H2, S4, O3*4 }
               ↑0      ↑1
  list = { S2, H2, S4, O12 }
               ↑0
                
  "S2 ( H2 ( S O3 ) 4 ) 5"
                      ↑ found close parenthesis
                       return!
  list = { S2, H2, S4, O12 }
               ↑0

[back to invocation 0]
  "S2 ( H2 ( S O3 ) 4 ) 5"
                        ↑ found subscript
                          update all elements starting at pre-recursion list length
  list = { S2, H2, S4, O12 }
               ↑0
  list = { S2, H2*5, S4, O12 }
               ↑0
  list = { S2, H10, S4*5, O12 }
                    ↑0
  list = { S2, H10, S20, O12*5 }
                         ↑0
  list = { S2, H10, S20, O60 }

  "S2 ( H2 ( S O3 ) 4 ) 5"
                         ↑ end of input; all done!
  list = { S2, H10, S20, O60 }

解析函数不需要以递归方式编写。你完全可以做一个迭代版本。每次发现左括号时,这都需要一个额外的列表来跟踪列表长度。

还有一步

此时您的列表可能包含重复项! 然而值得注意的是,我们明显更接近期望的结果:所有附加内容都已被删除!

    

S2 H10 S20 O60


现在我们需要做的就是组合相似的元素。至少有两种非常明显的方法可以做到这一点,而且都很简单。编写另一个函数来完成它。

主要

全部完成,你的主要功能应该看起来非常简单:

int main() { // s <-- ask user for chemical formula to parse char s[1000]; { printf( "formula? " ); fflush( stdout ); fgets( s, sizeof(s), stdin ); char * p = strchr( s, '\n' ); if (p) *p = '\0'; } // here is the list of (element,count) pairs we will parse out of `s` #define MAX_LIST_SIZE 1000 element list[MAX_LIST_SIZE]; int size = 0; // first pass (recursive function that does the parentheses) parse_elements( s, list, &size, MAX_LIST_SIZE ); // second pass (add duplicate elements) simplify_list( list, &size, MAX_LIST_SIZE ); // now we can print out our list if (size) { printf( "%s%d", list[0].element, list[0].count ); for (int n = 1; n < size; n++) printf( " + %s%d", list[0].element, list[0].count ); printf( "\n" ); } }
    
© www.soinside.com 2019 - 2024. All rights reserved.