如何提高Python和numpy的的提取数据的代码性能

问题描述 投票:-1回答:1

我试图实现DIC缺少点(数字图像相关)的结果,这是缺少因材料(照片)破坏。

我想放点(没有值)此区,它的缺失。 (从源数据本地分)

这段代码是我的论文的一部分,我想了解这是拉伸试验时失踪,由于材料的破坏那些点。数据从DIC(数字图像相关)方法正在添加其测量在试样的表面应变。当标本遭受局部破坏DIC软件无法找到在此区域像素排列和finaly失分。我有超过30例中,每个50帧。总的计算时间这一数据大约是一周。波纹管在版本1所示的代码 - 没有什么改变我的电脑乳宁东西约4分钟。在第2版的代码是缩短的是:从开始到结束的注释语句MARK-1和第2版注释行注释掉,但仍需要3分45秒。

输入数据:https://github.com/MarekSawicki/data/blob/master/022_0_29-03-2018_e11_45.csv

import numpy as np
import os
# changing of folder
os.chdir("D:/Marek/doktorat/Badania_obrobione/test")
# load data from file
data = np.genfromtxt('022_0_29-03-2018_e11_45.csv', delimiter=',',dtype='float64')
# separation of coordintates (points) and values (both floats64)
# data in format: list of points (X,Y) and list of values
points = data[1:,1:3]
values = data[1:,4]
#shifting coordinates to zero (points coordinates might be negative or offset from 0) (-x0)
points[:,0] -= min(points[:,0])
points[:,1] -= min(points[:,1])
#scale factor K_scale
k_scale=2
points[:,0:2] *= k_scale
# vector reshape
values=  np.reshape(values, [len(data)-1,1])
# sort the points to keep order in X direction 
# points X are assumed as points[:,0]
# points Y are assumed as points[:,1]
array1 = np.ascontiguousarray(points)
a_view = array1.view(dtype=[('', array1.dtype)]*array1.shape[-1])
a_view.sort(axis=0)
points_sorted = array1
# Start of processing points
# a and b are respectively X and Y limits 
a = np.int32(np.ceil(np.max(points[:,0])))+1
b = np.int32(np.ceil(np.max(points[:,1])))+1
# length 1 unit array cluster
array2=np.empty((0,2))
for m in range(0,3):
    for n in range(0,3):
        array2=np.append(array2,[[m*.5,n*.5]],axis=0)
# initialization of variables
k=0 # searching limits
bool_array_del=np.zeros((9,1), dtype=bool) # determine which line should be deleted - bool type
# array4 is a container of values which meets criteria
array4=np.empty((0,2))
# array7 is output container
array7=np.empty((0,2))
# main loop of concerned code:
for i in range(0,a): # X wise loop, a is a X limit
    for i2 in range(0,b): # Y wise loop, a is a Y limit
        array3 = np.copy(array2) # creating a cluster in range (i:i+1,i2:i2+1, step=0.5)
        array3[:,0]+=i
        array3[:,1]+=i2
        # value container (each loop it should be cleaned)
        array4=np.empty((0,2))
        # container which determine data to delete (each loop it should be cleaned)
        bool_array_del = np.empty((0,1),dtype=bool)
        k=0 # set zero searching limits
# loop for searching points which meet conditions.
# I think it is the biggest time waster
#To make it shorter I deal with sorted points which allows me 
#to browse part of array insted of whole array 
#(that is why I used k parameter and if-break condition )
        for i3 in range(k,points_sorted.shape[0]):
            valx = points_sorted[i3,0]
            valy = points_sorted[i3,1]
            if valx>i-1:
                k=i3
            if valx>i+1.5:
                break
#this condition check does considered point has X and  coordinates is range : i-0.5:i+1.5
# If yes then append this coordinate to empty container (array4)
            if np.abs(valx-(i+.5))<=1:
                if np.abs(valy-(i2+.5))<=1:
                    array4=np.append(array4,[[valx,valy]],axis=0)
# (version 2)       break
# Then postprocessing of selected points container - array4. To determine - do all point out of array4 should are close enough to be deleted?
    if array4.shape[0]!=0:
# (version 2) pass
# begin(MARK1)
# round the values from array4 to neares .5 value
        array5 = np.round(array4*2)/2
# if value from array5 are out of bound for proper cluster values then shift it to the closest correct value
        for i4 in range(0,array5.shape[0]):
            if array5[i4,0]>i+1:
                array5[i4,0]= i+1
            elif array5[i4,0]<i:
                array5[i4,0]=i
            if array5[i4,1]>i2+1:
                array5[i4,1]=i2+1
            elif array5[i4,1]<i2:
                array5[i4,1]=i2
# substract i,i2 vector and double from value of array5 to get indices which should be deleted
        array5[:,0]-=i
        array5[:,1]-=i2
        array5*=2
# create empty container with bool values - True - delete this value, False - keep
        array_bool1=np.zeros((3,3), dtype=bool)
        for i5 in range(0,array5.shape[0]):
# below condition doesn't work - it is too rough
            #array_bool1[int(array5[i5,0]),int(array5[i5,1])]=True
# this approach works with correct results but I guess it is second the biggest time waster.
            try: 
                array_bool1[int(array5[i5,0]),int(array5[i5,1])]=True
                array_bool1[int(array5[i5,0]+1),int(array5[i5,1]-1)]=True
                    array_bool1[int(array5[i5,0]+1),int(array5[i5,1])+1]=True
                    array_bool1[int(array5[i5,0]+1),int(array5[i5,1])]=True
                    array_bool1[int(array5[i5,0]-1),int(array5[i5,1]+1)]=True
                    array_bool1[int(array5[i5,0]-1),int(array5[i5,1]-1)]=True
                    array_bool1[int(array5[i5,0]-1),int(array5[i5,1])]=True
                    array_bool1[int(array5[i5,0]),int(array5[i5,1]+1)]=True
                    array_bool1[int(array5[i5,0]),int(array5[i5,1]-1)]=True
                except: 
                    pass
# convert bool array to list
            for i6 in range(0,array_bool1.shape[0]):
                for i7 in range(0,array_bool1.shape[1]):
                    bool_array_del=np.append(bool_array_del,    [[array_bool1[i6,i7]]],axis=0)
# get indices where bool list (unfotunatelly called bool_array_del) is true
            result= np.argwhere(bool_array_del)
            array6=np.delete(array3,result[:,0],axis=0)
# append it to output container
            array7=np.append(array7,array6,axis=0)
# if nothing is found in loop for searching points which meet conditions append full cluster to output array
# end(MARK1)
        else:
            array7=np.append(array7,array3,axis=0)

此代码给我为版本1为版本2 satisfactionary结果(图3)和可接受的结果(图4)

enter image description here

enter image description here

我在Python和numpy的新。你能告诉我,我可以做的更好,以加快我的代码?我想过切换到熊猫

python pandas numpy
1个回答
1
投票

事实上,乔丹歌手建议十分便利的解决方案 - 通过numba使用JIT编译。时间减少为4分钟,以4-5秒。从另一方面来说,我被迫不使用np.append,但我用修复lenght零数组,然后从数组中删除零。

下面重做代码:

import numpy as np
import matplotlib.pyplot as plt
import os
from numba import jit
import time
start = time.time()

# creating array4 from previous code changed to function with JIT decorator
@jit(nopython=True)
def Numba_Function_array4(if1,if2,source):
    k=0 # set zero searching limits
    index=0 # browse index for output array
    rarray=np.zeros((10,2)) # fixed length output array
# loop for searching points which meet conditions.
# I think it is the biggest time waster - in fact is!
#To make it shorter I deal with sorted points which allows me 
#to browse part of array insted of whole array 
#(that is why I used k parameter and if-break condition )
    for i3 in range(k,source.shape[0]):
        valx = source[i3,0]
        valy = source[i3,1]
        if valx>if1-1:
            k=i3
        if valx>if1+1.5:
        break
#this condition check does considered point has X and  coordinates is range : i-0.5:i+1.5
# If yes then append this coordinate to empty container (array4)
    if np.abs(valx-(if1+.5))<=1:
        if np.abs(valy-(if2+.5))<=1:
            rarray[index,:] = [valx,valy]
            index+=1
    return rarray

# changing of folder
os.chdir("D:/Marek/doktorat/Badania_obrobione/test")
cwd = os.getcwd()
# load data from file
data = np.genfromtxt('022_0_29-03-2018_e11_45.csv', delimiter=',',dtype='float64')
# separation of coordintates (points) and values (both floats64)
# data in format: list of points (X,Y) and list of values
points = data[1:,1:3]
values = data[1:,4]
#shifting coordinates to zero (points coordinates might be negative or offset from 0) (-x0)
points[:,0] -= min(points[:,0])
points[:,1] -= min(points[:,1])
#scale factor K_scale
ks=2
points[:,0:2] *= ks
# vector reshape
values=  np.reshape(values, [len(data)-1,1])
# sort the points to keep order in X direction 
# points X are assumed as points[:,0]
# points Y are assumed as points[:,1]
array1 = np.ascontiguousarray(points)
a_view = array1.view(dtype=[('', array1.dtype)]*array1.shape[-1])
a_view.sort(axis=0)
Input_points_sorted = array1
# Start of processing points
# a and b are respectively X and Y limits 
a = np.int32(np.ceil(np.max(points[:,0])))+1
b = np.int32(np.ceil(np.max(points[:,1])))+1
# length 1 unit array cluster (step 0.5)
array2=np.empty((0,2))
for m in range(0,3):
    for n in range(0,3):
    array2=np.append(array2,[[m*.5,n*.5]],axis=0)
# array7 is output container
array7=np.empty((0,2))
# main loop of concerned code:
for i in range(0,a): # X wise loop, a is a X limit
    for i2 in range(0,b): # Y wise loop, a is a Y limit
        array3 = np.copy(array2) # creating a cluster in range (i:i+1,i2:i2+1, step=0.5)
        array3[:,0]+=i
        array3[:,1]+=i2
        # function which contail the most time consuming part of the code
        array4 = Numba_Function_array4(i,i2,Input_points_sorted)
        # container which determine data to delete (each loop it should be cleaned)
        bool_array_del = np.empty((0,1),dtype=bool)
        # because in function umba_Function_array4 I created fixed length array I have to delete from the end zeros which indicates empty index
        for i8 in range(9,-1,-1):
            if array4[i8,0]==0:
                array4=np.delete(array4,i8,axis=0)

# Then postprocessing of selected points container - array4. To determine - do all point out of array4 should are close enough to be deleted?
        if array4.shape[0]!=0: 
# round the values from array4 to neares .5 value
            array5 = np.round(array4*2)/2
# if value from array5 are out of bound for proper cluster values then shift it to the closest correct value
            for i4 in range(0,array5.shape[0]):
                if array5[i4,0]>i+1:
                    array5[i4,0]= i+1
                elif array5[i4,0]<i:
                    array5[i4,0]=i
                if array5[i4,1]>i2+1:
                    array5[i4,1]=i2+1
                elif array5[i4,1]<i2:
                    array5[i4,1]=i2
# substract i,i2 vector and double from value of array5 to get indices which should be deleted
            array5[:,0]-=i
            array5[:,1]-=i2
            array5*=2
# create empty container with bool values - True - delete this value, False - keep
            array_bool1=np.zeros((3,3), dtype=bool)
            for i5 in range(0,array5.shape[0]):
                try: 
                    array_bool1[int(array5[i5,0]),int(array5[i5,1])]=True
                    array_bool1[int(array5[i5,0]+1),int(array5[i5,1]-1)]=True
                    array_bool1[int(array5[i5,0]+1),int(array5[i5,1])+1]=True
                    array_bool1[int(array5[i5,0]+1),int(array5[i5,1])]=True
                    array_bool1[int(array5[i5,0]-1),int(array5[i5,1]+1)]=True
                    array_bool1[int(array5[i5,0]-1),int(array5[i5,1]-1)]=True
                    array_bool1[int(array5[i5,0]-1),int(array5[i5,1])]=True
                    array_bool1[int(array5[i5,0]),int(array5[i5,1]+1)]=True
                    array_bool1[int(array5[i5,0]),int(array5[i5,1]-1)]=True
                except: 
                    pass
# convert bool array to list
            for i6 in range(0,array_bool1.shape[0]):
                for i7 in range(0,array_bool1.shape[1]):
                    bool_array_del=np.append(bool_array_del,[[array_bool1[i6,i7]]],axis=0)
# get indices where bool list (unfotunatelly called bool_array_del) is true
            result= np.argwhere(bool_array_del)
            array6=np.delete(array3,result[:,0],axis=0)
# append it to output container
            array7=np.append(array7,array6,axis=0)
# if nothing is found in loop for searching points which meet conditions append full cluster to output array
        else:
            array7=np.append(array7,array3,axis=0)
    end = time.time()
    print("Elapsed (after compilation) = %s" % (end - start))
    print("Done!") 

再次感谢乔丹,问题解决了!

© www.soinside.com 2019 - 2024. All rights reserved.