Python 中贝叶斯反演的 H 矩阵生成输出问题

问题描述 投票:0回答:0

我正在尝试生成一个 H 矩阵,用于在 Python 环境中对微量气体浓度数据进行贝叶斯反演。我的输出接近于我所需要的,但出于某种原因我目前无法确定——我的最终输出由“lat”、“lon”和“lev”的所有可能组合组成。而不是通过 lat'、'lon' 和 'lev' 为每个相互关联的 NOAA 站点循环(因此大约 15 种组合)。下面是我的代码:

###-> Python script that serves as a workspace to generate H-Matrix for Kalman Smoother Bayesian Inversion, as seen in Bruhwiler et al [2005] - Step #1 of C2H6 Bayesian Inversion:

###-> Key Module Import(s):
import numpy as np
import netCDF4 as nc
import pandas as pd
import xarray as xr
import math
import os
import matplotlib.pyplot as plt

'''### Primary Workspace ###'''
###-> Concentrations Directories + Location(s)
GCHP_OutDir = ('D://CCAR REU/Emissions Inventories - GEOS-CHEM/GEOS-Chem Output Directory/Tagged Tracer C2H6/')      # GCHP C2H6 Parent Output Directory (Laptop)
Dir_2017 = (GCHP_OutDir + '2017/')                                                                                   # GCHP C2H6 2017 Output Directory

###-> Measurements Directories + Location(s)
NOAA_ObsDIR = ('D://CCAR REU/Emissions Inventories - GEOS-CHEM/Observation Data/Observation Data - NOAA/')           # NOAA Observational C2H6 Data Directory (Laptop)
NOAA_Dir2017 = (NOAA_ObsDIR + '2017/')                                                                               # NOAA C2H6 Observations in 2017

###-> Key datasets for analysis
##->  Average Monthly Concentrations by Year
GCHP_2017 = xr.open_mfdataset(Dir_2017 + GCHP_2017 = xr.open_mfdataset(Dir_2017 + 'GEOSChem.SpeciesConcMonthlyAvg.2017????_????z.nc4', data_vars = ['SpeciesConc_C2H6ANTBorNA', 'SpeciesConc_C2H6ANTTemNA', 'SpeciesConc_C2H6ANTTroSA', 'SpeciesConc_C2H6ANTTemSA', 'SpeciesConc_C2H6ANTOth', 'SpeciesConc_C2H6ANTEur', 'SpeciesConc_C2H6ANTBorEuras','SpeciesConc_C2H6ANTTemEuras', 'SpeciesConc_C2H6ANTTroAS', 'SpeciesConc_C2H6ANTNAfr', 'SpeciesConc_C2H6ANTSAfr', 'SpeciesConc_C2H6BBBorNA', 'SpeciesConc_C2H6BBTemNA', 'SpeciesConc_C2H6BBTroSA', 'SpeciesConc_C2H6BBTemSA', 'SpeciesConc_C2H6BBOth', 'SpeciesConc_C2H6BBEur', 'SpeciesConc_C2H6BBBorEuras', 'SpeciesConc_C2H6BBTemEuras', 'SpeciesConc_C2H6BBTroAS', 'SpeciesConc_C2H6BBNAfr', 'SpeciesConc_C2H6BBSAfr', 'SpeciesConc_C2H6OCEUNH', 'SpeciesConc_C2H6OCEMNH', 'SpeciesConc_C2H6OCELNH', 'SpeciesConc_C2H6OCEUSH','SpeciesConc_C2H6OCEMSH', 'SpeciesConc_C2H6OCELSH', 'SpeciesConc_C2H6BIOUNH', 'SpeciesConc_C2H6BIOMNH', 'SpeciesConc_C2H6BIOLNH', 'SpeciesConc_C2H6BIOUSH','SpeciesConc_C2H6BIOMSH', 'SpeciesConc_C2H6BIOLSH', 'SpeciesConc_C2H6GEO'], combine = 'by_coords')    # Monthly AVG of C2H6 Concentration GCHP Output file from 2017/01 to 2017/12

NOAA_Obs2017 = pd.read_csv(NOAA_Dir2017 + 'NOAA_Observations_2017_MAVG.csv', sep = ",", skipinitialspace = True, parse_dates = ['yyyymmdd']) # 2017 NOAA Observation(s)

##->  Hardcoded Variables:
GCHP_TP_17 = 12                                                                # Quantity of months in 1-year simulation period

##->  Extract and count quantity of NOAA observation sites
NOAA_Obs2017 = NOAA_Obs2017.set_index('yyyymmdd')                              # Index by observation time variable ('yyyymmdd')
Site = NOAA_Obs2017['site'].unique()
NUM_OBSS = len(Site)

#->   Size calculation of N 
N_17 = (NUM_OBSS * GCHP_TP_17)

##->  Extract and count quantity of source and subregions for generation of M
#->   Fossil Fuel (FF) + Biofuel (BF), Biomass Burning (BB) - Geographic Regional Partitions; BorNA = boreal North America, TemNA = temperate North America, Eur = Europe, BorEuras = boreal Eurasia, 
#     TemEuras = temperate Eurasia, TroSA = tropical South America, TemSA = temperate South America, NAfr = North Africa, SAfr = South Africa, TroAs = tropical Asia, and Aus = Australia
GCHP_SR_01 = ['SpeciesConc_C2H6ANTBorNA', 'SpeciesConc_C2H6ANTTemNA', 'SpeciesConc_C2H6ANTTroSA', 'SpeciesConc_C2H6ANTTemSA', 'SpeciesConc_C2H6ANTOth', 'SpeciesConc_C2H6ANTEur', 
              'SpeciesConc_C2H6ANTBorEuras','SpeciesConc_C2H6ANTTemEuras', 'SpeciesConc_C2H6ANTTroAS', 'SpeciesConc_C2H6ANTNAfr', 'SpeciesConc_C2H6ANTSAfr', 'SpeciesConc_C2H6BBBorNA', 
              'SpeciesConc_C2H6BBTemNA', 'SpeciesConc_C2H6BBTroSA', 'SpeciesConc_C2H6BBTemSA', 'SpeciesConc_C2H6BBOth', 'SpeciesConc_C2H6BBEur', 'SpeciesConc_C2H6BBBorEuras', 
              'SpeciesConc_C2H6BBTemEuras', 'SpeciesConc_C2H6BBTroAS', 'SpeciesConc_C2H6BBNAfr', 'SpeciesConc_C2H6BBSAfr']
NUM_SR_01 = len(GCHP_SR_01)

#->   Oceanic_C2H6 (OCE), Biogenic_C2H6 (BIO) - Latitudinal Partitions; 90˚N, 60˚N+, 30˚N+, 0˚, 30˚S+, 60˚S+, 90˚S
GCHP_SR_02 = ['SpeciesConc_C2H6OCEUNH', 'SpeciesConc_C2H6OCEMNH', 'SpeciesConc_C2H6OCELNH', 'SpeciesConc_C2H6OCEUSH','SpeciesConc_C2H6OCEMSH', 'SpeciesConc_C2H6OCELSH', 
              'SpeciesConc_C2H6BIOUNH', 'SpeciesConc_C2H6BIOMNH', 'SpeciesConc_C2H6BIOLNH', 'SpeciesConc_C2H6BIOUSH','SpeciesConc_C2H6BIOMSH', 'SpeciesConc_C2H6BIOLSH']
NUM_SR_02 = len(GCHP_SR_02)

#->   Geologic_C2H6 (GEO) - Global Scale Partition; entire 180˚ x 180˚
GCHP_SR_03 = ['SpeciesConc_C2H6GEO']
NUM_SR_03 = len(GCHP_SR_03)

#->   Quantity of months in simulation period and size calculation of M
SUM_GCHP_SR = (NUM_SR_01 + NUM_SR_02 + NUM_SR_03)                              # Sum of all sources partitioned by varied subregions
M_17 = (SUM_GCHP_SR * GCHP_TP_17)                                              # Calculation of M = 420

#->   GEOS-Chem GCHP Data
Latitudes_G = GCHP_2017['lat'].sel(lat = [82.5, 36.5389, 71.3, -40.682, 42.5, 19.516, 45.6, 53.3, 19.5362, 40.1, -64.6, -14.247, -89.98, 72.6,  41], method='nearest')                       
Longitudes_G = GCHP_2017['lon'].sel(lon = [-62.3, 126.3295, -156.6, 144.688, -72.2, -154.811, -90.27, -9.9, -155.5763, -105.5, -64., -170.564, -24.8, -38.4, -124.1], method='nearest')                      
Levels_G1 = GCHP_2017['lev'].sel(lev = [13, 1, 1, 12, 18, 1, 27, 1, 71, 71, 1, 1, 71, 17, 2])
Levels_G2 = GCHP_2017['lev'].sel(lev = [14, 1, 1, 12, 19, 1, 28, 1, 72, 72, 1, 1, 72, 18, 3])

#->   Initialize H-matrix with NaN values
H = np.empty((N_17, M_17))                                                     # Shape of H = N = 180 X M = 420
H[:] = np.nan                                                                  # Temporarily fill H with NaN values

##->  Generation of H-Matrix based on available Emissions at the NOAA Observation Location(s) a.k.a Where Available
#->   Index of NOAA Observation Stations by their lat, long, and elev

for lat, lon, lev in zip(Latitudes_G, Longitudes_G, Levels_G1):
    lat = Latitudes_G[:]
    lon = Longitudes_G[:]
    lev = Levels_G1[:]

    # Fetch GCHP concentrations by source subregions at each NOAA Observation site
    H = GCHP_2017[['SpeciesConc_C2H6ANTBorNA', 'SpeciesConc_C2H6ANTTemNA', 'SpeciesConc_C2H6ANTTroSA', 'SpeciesConc_C2H6ANTTemSA', 'SpeciesConc_C2H6ANTOth', 'SpeciesConc_C2H6ANTEur', 
                   'SpeciesConc_C2H6ANTBorEuras','SpeciesConc_C2H6ANTTemEuras', 'SpeciesConc_C2H6ANTTroAS', 'SpeciesConc_C2H6ANTNAfr', 'SpeciesConc_C2H6ANTSAfr', 'SpeciesConc_C2H6BBBorNA', 
                   'SpeciesConc_C2H6BBTemNA', 'SpeciesConc_C2H6BBTroSA', 'SpeciesConc_C2H6BBTemSA', 'SpeciesConc_C2H6BBOth', 'SpeciesConc_C2H6BBEur', 'SpeciesConc_C2H6BBBorEuras', 
                   'SpeciesConc_C2H6BBTemEuras', 'SpeciesConc_C2H6BBTroAS', 'SpeciesConc_C2H6BBNAfr', 'SpeciesConc_C2H6BBSAfr', 'SpeciesConc_C2H6OCEUNH', 'SpeciesConc_C2H6OCEMNH', 
                   'SpeciesConc_C2H6OCELNH', 'SpeciesConc_C2H6OCEUSH','SpeciesConc_C2H6OCEMSH', 'SpeciesConc_C2H6OCELSH', 'SpeciesConc_C2H6BIOUNH', 'SpeciesConc_C2H6BIOMNH', 
                   'SpeciesConc_C2H6BIOLNH', 'SpeciesConc_C2H6BIOUSH','SpeciesConc_C2H6BIOMSH', 'SpeciesConc_C2H6BIOLSH', 'SpeciesConc_C2H6GEO']].sel(lat=lat, lon=lon, lev=lev)
    
    ##->  Save H-Matrix to netCDF Output File
    H.to_netcdf('D:/CCAR REU/Python Codex/H Matrix Output/' + 'H_Matrix_2017.nc')        # (Laptop)
    
    break

##->  Open H-Matrix netCDF and convert to .CSV File
H_NC = xr.open_dataset('D:/CCAR REU/Python Codex/H Matrix Output/' + 'H_Matrix_2017.nc')       # (Laptop)
H_DF = H_NC.to_dataframe()
H_DD = H_DF.drop_duplicates()
H_DD.to_csv('D:/CCAR REU/Python Codex/H Matrix Output/' + 'H_Matrix_2017.csv')                 # (Laptop)

我的直觉是问题出在 H 代 for 循环中,因为它在整个过程中一直给我带来问题。

再次感谢您付出的时间和努力!

python pandas numpy matrix python-xarray
© www.soinside.com 2019 - 2024. All rights reserved.