Monday, 17 February 2014

Add and calculate mean of multiple rasters

I needed to calculate a mean value of multiple rasters i.e. the average value for each pixel derived from a set of GeoTiff images covering the same area. The gdal_calc.py script can handle a few files but not 38 files as in my case. So here is a script to import a list of files and return a raster of the sum and a raster of the average. It's not elegant at all but it works (well, for me it did).

#!/Library/Frameworks/Python.framework/Versions/Current/bin/python
#from __future__ import division
import sys
import os
import math
import numpy as np
from scipy.stats.stats import nanmean
import scipy.stats as stats
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
import gc
from datetime import datetime
#
# Here we create the "main" function so that the script will run stand-alone or as a library
def main():
    # Enter list of tif files
    FileList = ('Fill','with','list','of','tifs')
    # Enter the location of files
    FileLoc = '/Path/to/folder/containing/tifs''
    # Enter the destination folder (here same as source)
    outfolder = FileLoc
    # Run the functions defined below.
    filnmsum,filnmmean = rasterAverage(FileList,FileLoc,outfolder)
    return 0
#
# Some slighty bloated and messy code because I took this from a function I wrote to
# import data for a kriging routine I am working on.
def DemImport(demfile):
    '''Import a DEM file and return grid plus meta data. Written for kriging. '''
    time_one = datetime.now()
    print '\nImporting ',demfile
    print time_one.strftime('at day:%j %H:%M:%S')
    # register all of the GDAL drivers
    gdal.AllRegister()
    # Open file
    dem = gdal.Open(demfile)
    if dem is None:
        print 'Could not open ',demfile,'\n'
        sys.exit(1)
    # Get coordinate system parameters
    projec = dem.GetProjection()
    transf = dem.GetGeoTransform()
    ul_x = transf[0]
    ul_y = transf[3]
    xres = transf[1]
    yres = transf[5]
    # get image size
    demrows = dem.RasterYSize
    demcols = dem.RasterXSize
    # Calculate corners
    ll_x = ul_x
    ll_y = ul_y + (demrows * yres)
    #
    driveshrt = dem.GetDriver().ShortName
    driver = gdal.GetDriverByName(driveshrt)
    #metadata = driver.GetMetadata()
    # Read the dem band to a matrix called band_1
    demband = dem.GetRasterBand(1)
    # Access data in rastern band as array
    demdata = demband.ReadAsArray(0,0,demcols,demrows)
    # gdal interpolation creates "upside down files, hence this
    Yres=yres
    if Yres/np.fabs(Yres)==-1:
        Yres=-1*Yres
        demdata = np.flipud(demdata)
    #
    # get nodata value
    demnandat = demband.GetNoDataValue()
    # get minimum and maximum value
    demmindat = demband.GetMinimum()
    demmaxdat = demband.GetMaximum()
    if demmindat is None or demmaxdat is None:
            (demmindat,demmaxdat) = demband.ComputeRasterMinMax(1)
    #
    ## Create grid to krig to.
    xstart = int(round(ll_x))
    xstop = xstart + int(demcols)*int(round(xres))
    ystart = int(round(ll_y))
    ystop = ystart + int(demrows)*int(round(Yres))
    demRx = range(xstart,xstop,int(round(xres)))
    demRy = range(ystart,ystop,int(round(Yres)))
    Xg1,Yg1 = np.meshgrid(demRx,demRy)
    # Convert grid to vectors
    Yg=Yg1.reshape((-1,1))
    Xg=Xg1.reshape((-1,1))
    #
    rx = len(demRx)
    ry = len(demRy)
    # Collect metadata to list for return
    demmeta = []
    demmeta.append( ['projection','geotransform','driver','rows','columns','nanvalue','min','max'])# All one line
    demmeta.append(projec)
    demmeta.append(transf)
    demmeta.append(driver)
    demmeta.append(demrows)
    demmeta.append(demcols)
    demmeta.append(demnandat)
    demmeta.append(demmindat)
    demmeta.append(demmaxdat)
    for i in demmeta: print i
    print '\n'
    return demdata,Xg,Yg,Xg1,Yg1,rx,ry,demmeta
#
# This writes to GeoTiff with NoData values set to -9999
def datawrite(outdata,demdata,meta,name,outDir):
    '''Write an array of grid data to a georeferenced raster file'''
    #meta = ['projection','geotransform','driver','rows','columns','nanvalue']
    filnm = os.path.join(outDir,(name + '.tif'))
    datdriver = gdal.GetDriverByName( "GTiff" )
    datout = datdriver.Create(filnm,meta[5],meta[4],1,gdal.GDT_Float32)
    datout.SetGeoTransform(meta[2])
    datout.SetProjection(meta[1])
    nanmask = demdata != meta[6]
    outdata_m = np.flipud(outdata * nanmask)
    outdata_m = np.where(outdata_m==0,-9999,outdata_m)
    datout.GetRasterBand(1).WriteArray(outdata_m)
    datout.GetRasterBand(1).SetNoDataValue(-9999)
    datout.GetRasterBand(1).ComputeStatistics(True)
    return filnm
#
# Here is where the summing occurs
def rasterAverage(flist,flistfolder,outfolder):
    '''Average all rasters together'''
    time_one = datetime.now()
    print time_one.strftime('Start rasterAverage at:%j %H:%M:%S')
    # Get first raster file
    Afile = os.path.join(flistfolder,flist[0])
    Adata,AXg,AYg,AXg1,AYg1,Arx,Ary,Ademmeta = DemImport(Afile)
    # Get second for creating mask to deal with NaN
    Bfile = os.path.join(flistfolder,flist[1])
    Bdata,BXg,BYg,BXg1,BYg1,Brx,Bry,Bdemmeta = DemImport(Bfile)
    # Set NoData values to numpy NaN
    Adata[Adata==Ademmeta[6]]=np.nan
    Bdata[Bdata==Bdemmeta[6]]=np.nan
    # Create starting values with initial mask
    sumdata = np.ma.masked_array(np.nan_to_num(Adata), mask=np.isnan(Adata) & np.isnan(Bdata))
    counter = 1
    print '\n'*3
    print '*'*20
    # Go through list of other raster files
    for B in flist[1:]:
        print 'file ',counter+1,' of ',len(flist)
        # Import files 2 to n
        Bfile = os.path.join(flistfolder,B)
        Bdata,BXg,BYg,BXg1,BYg1,Brx,Bry,Bdemmeta = DemImport(Bfile)
        print B,' data type: ',type(Bdata)
        print 'Rows (A,B): ',Arx,Brx,' Columns (A,B): ',Ary,Bry
        print 'xres (A,B): ',Ademmeta[2][1],Bdemmeta[2][1],' yres (A,B): ',Ademmeta[2][5],Bdemmeta[2][5]
        print 'No data (A): ',Ademmeta[6],' No data (B): ',Bdemmeta[6]
        # Check matching resolution
        if Arx != Brx or Ary != Bry:
            print B,' resolution mismatch with ', Afile
            continue
        elif Ademmeta[4] != Bdemmeta[4] or Ademmeta[5] != Bdemmeta[5]:
            print 'Size mismatch between ',B,' and ',Afile
            continue
        # Add current file to sum
        Bdata[Bdata==Bdemmeta[6]]=np.nan
        BdataMasked  = np.ma.masked_array(np.nan_to_num(Bdata), mask=sumdata.mask)
        sumdata = (sumdata + BdataMasked).filled(np.nan)
        sumdata = np.ma.masked_array(np.nan_to_num(sumdata), mask=np.isnan(sumdata))
        counter = counter + 1
    # Here we create the mean raster
    meandata = sumdata/counter
    #
    # Save the data to file
    sumname = 'file_sum'
    filnmsum = datawrite(sumdata,Adata,Ademmeta,sumname,outfolder)
    meanname = 'file_mean'
    filnmmean = datawrite(meandata,Adata,Ademmeta,meanname,outfolder)
    return filnmsum,filnmmean
#
#
## MAIN - Runs script if it is called as stand-alone
if __name__ == "__main__":
    main()

No comments:

Post a Comment