Retrieve daily ERA5 data for all pressure levels and multiple time-zones

Hi all,

I am struggling with the speed of my API request.

I want to download daily averages from the “ERA5 pressure levels” dataset for temperature and relative humidity for each pressure level and four different time zones (US time zones) covering the period 2006-2020.

It seems that I can only pass one variable, UTC shift and pressure level per request.

However, one request takes more than 2 minutes.

Thus, it will take too much time to retrieve all the data I need.

Is there any possibility to speed up the process (e.g. request multiple variables, UTC shifts, pressure levels at once)?

I wrote the following code (based on Retrieve daily ERA5/ERA5-Land data using the CDS API ):

# Packages
import cdsapi
import requests
import urllib3
urllib3.disable_warnings()

# PATH
PATH = ".../ERA5_pressure_levels/"
 
# Requires:
# 1) the CDS API to be installed and working on your system
# 2) You have agreed to the ERA5 Licence (via the CDS web page)
# 3) Selection of required variable, daily statistic, etc

# Call API
c = cdsapi.Client(timeout=600)

# Time Zones
UTC =  ["UTC-05", "UTC-06", "UTC-07", "UTC-08"]

# Variables
VAR =  ['temperature', 'relative_humidity']

# Pressure levels
PS = [
            '1', '2', '3',
            '5', '7', '10',
            '20', '30', '50',
            '70', '100', '125',
            '150', '175', '200',
            '225', '250', '300',
            '350', '400', '450',
            '500', '550', '600',
            '650', '700', '750',
            '775', '800', '825',
            '850', '875', '900',
            '925', '950', '975',
            '1000'
        ]

# Years
YEARS =  [
      '2006', '2007', '2008',
      '2009', '2010', '2011',
      '2012', '2013', '2014',
      '2015', '2016', '2017',
      '2018', '2019', '2020',
]

# Months
MONTHS = [
    '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'
    ]

# Loop over all parameters
for yr in YEARS:
    for mn in MONTHS:
        for var in VAR:
            for ps in PS:
                for utc in UTC:
                    
                    print('Running: '+yr+mn+var+ps+utc)        
                    
                    #--- UTC-05 ---#                

                    if utc == "UTC-05":
                        result = c.service(
                            "tool.toolbox.orchestrator.workflow",
                            params={
                                "realm": "user-apps",
                                "project": "app-c3s-daily-era5-statistics",
                                "version": "master",
                                "kwargs": {
                                "dataset": "reanalysis-era5-pressure-levels",
                                "product_type": "reanalysis",
                                "variable": var,
                                'pressure_level': ps,
                                "statistic": "daily_mean",
                                "year": yr,
                                "month": mn,
                                "time_zone": utc+":0",
                                "frequency": "1-hourly",
                                "grid": "0.25/0.25",                        
                                "area": {"lat": [23,50], "lon": [-91,-65]}   
                        },
                        "workflow_name": "application"
                    })

                    #--- UTC-06 ---#

                    if utc == "UTC-06":
                        result = c.service(
                            "tool.toolbox.orchestrator.workflow",
                            params={
                                "realm": "user-apps",
                                "project": "app-c3s-daily-era5-statistics",
                                "version": "master",
                                "kwargs": {
                                "dataset": "reanalysis-era5-pressure-levels",
                                "product_type": "reanalysis",
                                "variable": var,
                                'pressure_level': ps,
                                "statistic": "daily_mean",
                                "year": yr,
                                "month": mn,
                                "time_zone": utc+":0",
                                "frequency": "1-hourly",
                                "grid": "0.25/0.25",
                                "area": {"lat": [24,51], "lon": [-106,-83]}    
                        },
                        "workflow_name": "application"
                    })

                    #--- UTC-07 ---#

                    if utc == "UTC-07":
                        result = c.service(
                            "tool.toolbox.orchestrator.workflow",
                            params={
                                "realm": "user-apps",
                                "project": "app-c3s-daily-era5-statistics",
                                "version": "master",
                                "kwargs": {
                                "dataset": "reanalysis-era5-pressure-levels",
                                "product_type": "reanalysis",
                                "variable": var,
                                'pressure_level': ps,
                                "statistic": "daily_mean",
                                "year": yr,
                                "month": mn,
                                "time_zone": utc+":0",
                                "frequency": "1-hourly",
                                "grid": "0.25/0.25",
                                "area": {"lat": [29,51], "lon": [-120,-99]}    
                        },
                        "workflow_name": "application"
                    })

                    #--- UTC-08 ---#

                    if utc == "UTC-08":
                        result = c.service(
                            "tool.toolbox.orchestrator.workflow",
                            params={
                                "realm": "user-apps",
                                "project": "app-c3s-daily-era5-statistics",
                                "version": "master",
                                "kwargs": {
                                "dataset": "reanalysis-era5-pressure-levels",
                                "product_type": "reanalysis",
                                "variable": var,
                                'pressure_level': ps,
                                "statistic": "daily_mean",
                                "year": yr,
                                "month": mn,
                                "time_zone": utc+":0",
                                "frequency": "1-hourly",
                                "grid": "0.25/0.25",
                                "area": {"lat": [31,51], "lon": [-126,-113]}    
                        },
                        "workflow_name": "application"
                    })

                    # set name of output file for each month (statistic, variable, year, month)
                    file_name = "download_"+utc+ "_"+ ps +"_" + var +"_"+ yr + mn+ ".nc"   
                    w = open(PATH+'Filenames.txt', "a")
                    w.write(file_name + '\n')      
                    location=result[0]['location']    
                    res = requests.get(location, stream = True)
                    print("Writing data to " + file_name)
                    with open(PATH+file_name,'wb') as fh:
                        for r in res.iter_content(chunk_size = 1024 * 1024):
                            fh.write(r)
                    fh.close()
w.close()

Hi Felix,

At the moment I think requests are limited to 1 variable/level/month at a time in order to manage the load on the CDS,

Thanks,

Kevin 

In this case it seems faster to download ERA5 hourly data on pressure levels for the entire US and calculate daily averages for the respective time zones myself. I will share my code for others facing a similar issue:

# Packages
import sys
import xarray as xr
import numpy as np
import geopandas as gpd
import pandas as pd
import glob
import dask
from datetime import timedelta

ERA5 pressure levels for the entire US (load at least two consecutive years since time shift requires data from 1. January of the following year)

d = xr.open_mfdataset(“ERA5 pressure level for the entire US”)

Split file into different “coordinate time-zone windows”

d_UTC5 = d.sel(longitude=slice(-91, -65), latitude=slice(50, 23), drop=True)
d_UTC6 = d.sel(longitude=slice(-106, -83), latitude=slice(51, 24), drop=True)
d_UTC7 = d.sel(longitude=slice(-120, -99), latitude=slice(51, 29), drop=True)
d_UTC8 = d.sel(longitude=slice(-126, -113), latitude=slice(51, 31), drop=True)

Shift time to UTC-5, UTC-6, UTC-7, UTC-8 for the “coordinate time-zone windows”
d_UTC5[‘time’] = d_UTC5.time.get_index(‘time’) + timedelta(hours=-5)

d_UTC6[‘time’] = d_UTC6.time.get_index(‘time’) + timedelta(hours=-6)
d_UTC7[‘time’] = d_UTC7.time.get_index(‘time’) + timedelta(hours=-7)
d_UTC8[‘time’] = d_UTC8.time.get_index(‘time’) + timedelta(hours=-8)

Calculate daily averages for respective time-zone coordinate windows and store them in separate netCDF files

d_UTC5 = d_UTC5.resample(time=‘1D’).mean(‘time’)
d_UTC5.to_netcdf(‘UTC-5_’+‘.nc’)
d_UTC6 = d_UTC6.resample(time=‘1D’).mean(‘time’)
d_UTC6.to_netcdf(‘UTC-6_’+‘.nc’)
d_UTC7 = d_UTC7.resample(time=‘1D’).mean(‘time’)
d_UTC7.to_netcdf(‘UTC-7_’+‘.nc’)
d_UTC8 = d_UTC8.resample(time=‘1D’).mean(‘time’)
d_UTC8.to_netcdf(‘UTC-8_’+‘.nc’)