对数轴图的线性拟合不适合数据

问题描述 投票:0回答:1

当我尝试将线性曲线拟合到我的数据点时,该曲线与我的数据不匹配。这条线在我的数据点下方,斜率也太深了。这可能是由于从 x=10 及其相应的 y 值开始,但我不确定。最小二乘法应该是正确的,因为我尝试了多种计算斜率和截距的方法,但所有外观方法都产生相同的输出。

#include <stdio.h>
#include <math.h>
#include <stdlib.h>

#define PATH_GNUPLOT "your/path/gnuplot"

void ERROR(char* error_string);

int main() {

    // first 10 data points are irrelevant
    double data[100] = {0,0,0,0,0,0,0,0,0,0, 0.242910, 0.235523, 0.228976, 0.223114, 0.217820, 0.213005, 0.208597, 0.204539, 0.200786, 0.197299, 0.194047, 0.191003, 0.188146, 0.185456, 0.182916, 0.180513, 0.178234, 0.176068, 0.174006, 0.172039, 0.170160, 0.168362, 0.166639, 0.164987, 0.163399, 0.161872, 0.160402, 0.158984, 0.157617, 0.156297, 0.155020, 0.153785, 0.152590, 0.151431, 0.150308, 0.149218, 0.148159, 0.147131, 0.146131, 0.145158, 0.144211, 0.143289, 0.142391, 0.141515, 0.140661, 0.139827, 0.139014, 0.138219, 0.137442, 0.136683, 0.135941, 0.135215, 0.134504, 0.133809, 0.133128, 0.132461, 0.131807, 0.131166, 0.130538, 0.129922, 0.129318, 0.128725, 0.128142, 0.127571, 0.127010, 0.126458, 0.125916, 0.125384, 0.124861, 0.124346, 0.123840, 0.123342, 0.122853, 0.122371, 0.121897, 0.121430, 0.120970, 0.120518, 0.120072, 0.119633, 0.119200, 0.118774, 0.118354, 0.117939, 0.117531, 0.117128, 0.116731, 0.116340, 0.115953, 0.115572};

    int n = 100;

    /* calc fit */
    double sumx, sumy, sumx2, sumxy, a, b = 0.0;

    // => log(y) = a * log(x) + b
    // data starts at 10 for x value
    for (int i = 10; i < n; i++) {

        double x = log(i);
        double y = log(data[i]);

        sumx += x;
        sumx2 += x * x;
        sumy += y;
        sumxy += y * x;
    }

    a = (n * sumxy  -  sumx * sumy) / (n * sumx2 - sumx*sumx);
    b = (sumy * sumx2  -  sumx * sumxy) / (n * sumx2 - sumx*sumx);
    /* end calc fit */

    

    /* save data and plot */
    FILE *fp = fopen("data.dat", "w");
    
    if (fp == NULL) {
        ERROR("failed to open data.dat");
    }
    
    for (int i = 10; i < n; i++)
        fprintf(fp, "%d %.16lf\n", i, data[i]);
    
    fclose(fp);

    FILE *GNUpipe = popen(PATH_GNUPLOT, "w");
    
    if (GNUpipe == NULL) {
        ERROR("failed to open gnuplot");
    }
    
    fprintf(GNUpipe, "set term png\n");
    fprintf(GNUpipe, "set logscale xy\n");

    // log(y) = a * log(x) + b
    // => y = x**a + 10**b              
    fprintf(GNUpipe, "Fit(x) = x**%lf * 10**%lf\n", a, b);
    
    fprintf(GNUpipe, "plot [0.1:%d*1.1] 'data.dat' title 'error', ", n);

    fprintf(GNUpipe, "Fit(x) title 'fit'\n");
    
    pclose(GNUpipe);

}

void ERROR(char* error_string) {
    
    fprintf(stderr, error_string);
    
    exit(EXIT_FAILURE);
}

c curve-fitting least-squares
1个回答
0
投票

至少这些问题:

需要归零初始化

// double sumx, sumy, sumx2, sumxy, a, b = 0.0; 
double sumx = 0.0;
double sumy = 0.0;
...

节省时间

启用所有警告:“警告:数组初始值设定项中的多余元素”

初始化器太多

// double data[100] = {  101 initializers };

避免失去精度

// fprintf(GNUpipe, "Fit(x) = x**%lf * 10**%.*le\n", a, b);
fprintf(GNUpipe, "Fit(x) = x**%.16le * 10**%.16le\n", a, b);

错了

n

代码使用

n
。因为,
for (int i = 10; i < n; i++)
,应该是
n-10
.

也许错误的公式?

我觉得不对,需要更深入的审查。

// ??
a = (n * sumxy  -  sumx * sumy) / (n * sumx2 - sumx*sumx);
b = (sumy * sumx2  -  sumx * sumxy) / (n * sumx2 - sumx*sumx);

我期待:ref.

N = n - 10;
double denom = N*sumx2 - sumx * sumx;
a = (sumy * sumx2 - summx * sum xy)/ denom;
b = (N*sumxy * sumx * sumxy)/ denom;
© www.soinside.com 2019 - 2024. All rights reserved.