我正在研究流量检测深度学习模型,我仍在数据预处理中,我的数据位于 .shp 和 .dbf 和 .prj 文件中,现在所有必要的数据都位于 .geojson 文件中我对其进行投影我看到了地图(我们到阿拉斯加)我正在研究,但几何和
shape_leng
列中仍然存在一些缺失值,因为它们相关,这里是非缺失值行的示例:
"Shape_Leng": 7.2826369182000004e-05 }, "geometry": { "type": "LineString", "coordinates": [ [ -85.393274186354574, 31.208875440549207 ], [ -85.393254553255019, 31.208945570581648 ] ] }
这是缺失值行:
"Shape_Leng": 0.0 }, "geometry": null
我想用接近其邻居的值来填充那些 null 和 0.0
请注意,文件中有 6840 个连续的 null 和 0.0 值(其中只有一行,然后在 not null 中),总共有 18354 个缺失值
我尝试了这段代码:
import geopandas as gpd
import pandas as pd
import shapely
from shapely.geometry import LineString
from geopy.distance import geodesic
df = gpd.read_file("geoformat.geojson")
df.reset_index(drop=True, inplace=True)
def interpolate_geometry(row):
prev_index = row.name - 1
next_index = row.name + 1
if pd.notnull(row['geometry']):
return row['geometry']
if isinstance(row['geometry'], shapely.geometry.MultiLineString):
coords = []
for line in row['geometry']:
coords.extend(line.coords)
return LineString(coords)
if isinstance(row['geometry'], shapely.geometry.LineString):
return row['geometry']
prev_coords = df.at[prev_index, 'geometry']
if not isinstance(prev_coords, shapely.geometry.MultiLineString):
prev_coords = df.at[0, 'geometry'] if prev_index < 0 else df.at[prev_index, 'geometry']
if next_index == len(df):
return prev_coords
next_row = df.iloc[next_index]
next_coords = next_row['geometry'].coords[0] if next_row['geometry'] else None
if next_coords is None:
interpolated_coords = prev_coords
else:
if isinstance(prev_coords, shapely.geometry.LineString):
if isinstance(next_coords, shapely.geometry.Point):
next_coords = LineString([next_coords])
elif not isinstance(next_coords, shapely.geometry.LineString):
raise TypeError('next_coords must be a LineString or Point object')
interpolated_coords = LineString([prev_coords, next_coords])
return interpolated_coords
interpolated_geometries = []
for idx, row in df.iterrows():
interpolated_geometries.append(interpolate_geometry(row))
df['geometry'] = interpolated_geometries
def interpolate_shape_leng(row):
if row['Shape_Leng'] != 0.0:
return row['Shape_Leng']
prev_index = row.name - 1
while pd.isnull(df.at[prev_index, 'Shape_Leng']) or df.at[prev_index, 'Shape_Leng'] == 0.0:
prev_index -= 1
next_index = row.name + 1
while pd.isnull(df.at[next_index, 'Shape_Leng']) or df.at[next_index, 'Shape_Leng'] == 0.0:
next_index += 1
prev_coords = df.at[prev_index, 'geometry'].coords[-1]
next_coords = df.at[next_index, 'geometry'].coords[0]
distance = geodesic(prev_coords, next_coords).meters
prev_shape_leng = df.at[prev_index, 'Shape_Leng']
next_shape_leng = df.at[next_index, 'Shape_Leng']
interpolated_shape_leng = prev_shape_leng + (next_shape_leng - prev_shape_leng) * (row.name - prev_index) / (
next_index - prev_index)
return interpolated_shape_leng
df['Shape_Leng'] = df.apply(interpolate_shape_leng, axis=1)
df.to_file("geoformat_imputed.geojson", driver='GeoJSON')
这是我得到的错误:
Traceback (most recent call last):
File "C:\Users\youssef\Desktop\prj.py", line 68, in <module>
interpolated_geometries.append(interpolate_geometry(row))
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\youssef\Desktop\prj.py", line 56, in interpolate_geometry
raise TypeError('next_coords must be a LineString or Point object')
TypeError: next_coords must be a LineString or Point object