我正在尝试使用 pandas 从数据记录器的 FTP 服务器实时跟踪 CSV 文件。我正在尝试实现“使用 Pandas 处理 CSV 的实时尾部”的解决方案,但在读取新行时遇到问题。该程序将读取所有可用行,但之后只读取空行。这是打印每个读取行时的输出:
b'+1.000000000E+01,+1.98000E+01,+3.15500E-04,-4.23700E-03,-3.20300E-03,-1.13750E-03,-3.26900E-03,+1.47450E-03,-3.39800E-03,-1.05500E-04,-4.24500E-03,0\r\n'
b'+1.100000000E+01,+1.98000E+01,-2.63000E-04,-4.24300E-03,-3.23350E-03,-4.79000E-04,-3.26000E-03,+1.43450E-03,-3.49800E-03,-6.47500E-04,-4.11750E-03,0\r\n'
b'+1.200000000E+01,+1.98000E+01,-9.49500E-04,-3.80900E-03,-3.22450E-03,+1.14500E-04,-3.26800E-03,+1.26850E-03,-3.63450E-03,-1.32050E-03,-3.59800E-03,0\r\n'
...
b''
b''
b''
b''
代码:
from ftplib import FTP
import os
import pandas as pd
import time
from io import StringIO
from io import BytesIO
ftp = FTP('192.168.10.100')
ftp.login()
def follow(thefile):
global ftp
with BytesIO() as flo:
ftp.retrbinary('RETR ' + thefile, flo.write)
flo.seek(0)
while True:
line = flo.readline()
print(line)
if not line or not line.endswith(b'\n'):
time.sleep(1)
continue
yield line.decode('utf-8')
if __name__ == "__main__":
global df
# set the file we want to log the current line to
log_file = "./current_line"
# check if the last line processed has been saved
if os.path.exists(log_file):
with open(log_file, 'r') as ifile:
# get the last line processed
start_line = int(ifile.read())
else:
# set the last line processed to be the first data row (not the header). If there is no header then set to 0
start_line = 1
# set the file we are reading
myfile = '/sdcard/HIOKI/LR8450/DATA/24-04-12/tmpWvData240412_172748.CSV'
# remove this line if you don't need the header
# flo.seek(0)
# header = pd.read_csv(flo, nrows=0, on_bad_lines='skip', engine="python", encoding='unicode_escape')
# initialize the list to store the lines in
lines = []
# loop through each line in the file
for nline, line in enumerate(follow(myfile)):
# if we have already processed this file
if nline < start_line:
continue
# append to the lines list
lines.append(line)
# check if the we have hit the number of lines we want to handle
if len(lines) == 10:
# read the csv from the lines we have processed
df = pd.read_csv(StringIO(''.join(lines)), header=None, on_bad_lines='skip', engine="python", encoding='unicode_escape')
# update the header. Delete this row if there is no header
df.columns = header.columns
# do something with df
print(df)
# reset the lines list
lines = []
# open the log file and note the line we have processed up to
with open(log_file, 'w') as lfile:
lfile.write(str(nline)) # only write the processed lines when we have actually done something