我在 8 核的 MAC 操作系统中使用以下代码。但当程序运行时,我发现 Python 仅使用一个核心。我如何将土地利用数据库划分为例如6 个部分并在每个核心中分别运行交叉点?然后合并结果?
import os
import geopandas as gpd
from pathlib import Path
from multiprocessing import Pool
import time
start_time = time.time()
# Set the paths
grid_file_pattern = "emiss*.shp"
output_path = "/outpath/"
land_use_path = "/pathlu/"
def process_land_use(selected_snp, land_use_category):
# Read the gridded emissions density file
grid_density_file = os.path.join(output_path, f"emiss_{selected_snp}_density.shp")
gdf_density = gpd.read_file(grid_density_file)
# Read land use shapefile and set the CRS if not already set
land_use_file = f"lu_ll_{land_use_category}.shp"
gdf_land_use = gpd.read_file(os.path.join(land_use_path, land_use_file))
# Check if CRS is defined, if not, set it to the appropriate value
if gdf_land_use.crs is None:
gdf_land_use.crs = "EPSG:4326"
# Reproject to WGS84 (EPSG:4326)
gdf_land_use = gdf_land_use.to_crs("EPSG:4326")
# Perform spatial intersection
intersection = gpd.overlay(gdf_density, gdf_land_use, how='intersection')
# Calculate emissions in each land use category
for pollutant in ["CO", "NH3", "NMVOC", "NOx", "SO2", "PMc", "PM2_5"]:
intersection[f"{pollutant}_emissions"] = intersection[f"{pollutant}_m2"] * intersection["area"]
# Save the result
output_file = os.path.join(output_path, f"{selected_snp}_{land_use_category}_emissions.shp")
intersection.to_file(output_file)
if __name__ == '__main__':
start_time = time.time()
# Distribute gridded emissions over land use for specific SNPs
snp_distributions = {
"SNP1": "ind",
}
with Pool(8) as pool:
pool.starmap(process_land_use, snp_distributions.items())
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Execution completed in {elapsed_time / 60:.2f} minutes.")
我没有尝试与 geopandas 并行运行覆盖的经验,但我开发了一个库,geofileops,专门用于加速大文件的处理。默认情况下它将使用所有 CPU。对于某些操作,它在底层使用 geopandas,但不用于覆盖以保持较低的内存使用量(对于大文件)。
所以如果你不介意尝试另一个库,你可以尝试一下。
用于运行 geofileops.intersection:
的通用代码示例import geofileops as gfo
if __name__ == "__main__":
# Shapefile is supported, but .gpkg will be faster
input1_path = "input1.gpkg"
input2_path = "input2.gpkg"
output_path = "output.gpkg"
gfo.intersection(
input1_path=input1_path, input2_path=input2_path, output_path=output_path
)