这段代码是应该做的是转移怪异写在一行中的.csv文件到一个multilined CSV
import csv
import re
filenmi = "original.csv"
filenmo = "data-out.csv"
infile = open(filenmi,'r')
outfile = open(filenmo,'w+')
for line in infile:
print ('read data :',line)
line2 = re.sub('[^0-9|^,^.]','',line)
line2 = re.sub(',,',',',line2)
print ('clean data: ',line2)
wordlist = line2.split(",")
n=(len(wordlist))/2
print ('num data pairs: ',n)
i=0
print ('data paired :')
while i < n*2 :
pairstr = ','.join( pairlst )
print(' ',i/2+1,' ',pairstr)
pairstr = pairstr + '\n'
outfile.write( pairstr )
i=i+2
infile.close()
outfile.close()
我想这个代码做的是改变一个搞砸.txt文件
L,39,100,50.5,83,L,50.5,83
成如下面的例子是常格式化csv文件
39,100
50.5,83
50.5,83
但我的数据出来是这样
,39
100,50.5
83,50.5
83,
我不知道哪里出了问题或如何解决这个问题。因此,这将是巨大的,如果有人可以帮助
::Data Set::
L,39,100,50.5,83,L,50.5,83,57.5,76,L,57.5,76,67,67.5,L,67,67.5,89,54,L,89,54,100.5,49,L,100.5,49,111.5,45.5,L,111.5,45.5,134,42,L,134,42,152.5,44,L,152.5,44,160,46.5,L,160,46.5,168,52,L,168,52,170,56.5,L,170,56.5,162,64.5,L,162,64.5,152.5,70,L,152.5,70,126,85.5,L,126,85.5,113.5,94,L,113.5,94,98,105.5,L,98,105.5,72.5,132,L,72.5,132,64.5,145,L,64.5,145,57.5,165.5,L,57.5,165.5,57,176,L,57,176,63.5,199.5,L,63.5,199.5,69,209,L,69,209,76,216.5,L,76,216.5,83.5,222,L,83.5,222,90.5,224.5,L,90.5,224.5,98,225.5,L,98,225.5,105.5,225,L,105.5,225,115,223,L,115,223,124.5,220,L,124.5,220,133.5,216.5,L,133.5,216.5,142,212,L,142,212,149,207,L,149,207,156.5,201.5,L,156.5,201.5,163.5,195.5,L,163.5,195.5,172.5,185.5,L,172.5,185.5,175,180.5,L,175,180.5,177,173,L,177,173,177.5,154,L,177.5,154,174.5,142.5,L,174.5,142.5,168.5,133.5,L,168.5,133.5,150,131.5,L,150,131.5,135,136.5,L,135,136.5,120.5,144.5,L,120.5,144.5,110.5,154,L,110.5,154,104,161.5,L,104,161.5,99.5,168.5,L,99.5,168.5,98,173,L,98,173,97.5,176,L,97.5,176,99.5,178,L,99.5,178,105,179.5,L,105,179.5,112.5,179,L,112.5,179,132,175.5,L,132,175.5,140.5,175,L,140.5,175,149.5,175,L,149.5,175,157,176.5,L,157,176.5,169.5,181.5,L,169.5,181.5,174,185.5,L,174,185.5,178,206,L,178,206,176.5,214.5,L,176.5,214.5,161,240.5,L,161,240.5,144.5,251,L,144.5,251,134.5,254,L,134.5,254,111.5,254.5,L,111.5,254.5,98,253,L,98,253,71.5,248,L,71.5,248,56,246,
因为当你试图line2 = re.sub('[^0-9|^,^.]','',line)
,输出到,39,100,50.5,83,,50.5,83
您的代码失败。
在该行使用的是re
更换任何字符不是一个数字,点或逗号,没有或''
。这将删除你输入L
但第二个字符是一个逗号会留下来。
我只是固定的,并就如何创建一个csv
名单稍加修改。下面的代码工作。
import csv
import re
filenmi = "original.csv"
filenmo = "data-out.csv"
with open(filenmi, 'r') as infile:
#get a list of words that must be split
for line in infile:
#remove any char which isn't a number, dot, or comma
line2 = re.sub('[^0-9|^,^.]','',line)
#replace ",," with ","
line2 = re.sub(',,',',',line2)
#remove the first char which is a ","
line2 = line2[1:]
#get a list of individual values, sep by ","
wordlist = line2.split(",")
parsed = []
for i,val in enumerate(wordlist):
#for every even index, get the word pair
try:
if i%2 == 0:
parstr = wordlist[i] + "," + wordlist[i+1] + '\n'
parsed.append(parstr)
except:
print("Data set needs cleanup\n")
with open(filenmo, 'w+') as f:
for item in parsed:
f.write(item)