Monday 17 February 2014

Add and calculate mean of multiple rasters

I needed to calculate a mean value of multiple rasters i.e. the average value for each pixel derived from a set of GeoTiff images covering the same area. The gdal_calc.py script can handle a few files but not 38 files as in my case. So here is a script to import a list of files and return a raster of the sum and a raster of the average. It's not elegant at all but it works (well, for me it did).

#!/Library/Frameworks/Python.framework/Versions/Current/bin/python
#from __future__ import division
import sys
import os
import math
import numpy as np
from scipy.stats.stats import nanmean
import scipy.stats as stats
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
import gc
from datetime import datetime
#
# Here we create the "main" function so that the script will run stand-alone or as a library
def main():
    # Enter list of tif files
    FileList = ('Fill','with','list','of','tifs')
    # Enter the location of files
    FileLoc = '/Path/to/folder/containing/tifs''
    # Enter the destination folder (here same as source)
    outfolder = FileLoc
    # Run the functions defined below.
    filnmsum,filnmmean = rasterAverage(FileList,FileLoc,outfolder)
    return 0
#
# Some slighty bloated and messy code because I took this from a function I wrote to
# import data for a kriging routine I am working on.
def DemImport(demfile):
    '''Import a DEM file and return grid plus meta data. Written for kriging. '''
    time_one = datetime.now()
    print '\nImporting ',demfile
    print time_one.strftime('at day:%j %H:%M:%S')
    # register all of the GDAL drivers
    gdal.AllRegister()
    # Open file
    dem = gdal.Open(demfile)
    if dem is None:
        print 'Could not open ',demfile,'\n'
        sys.exit(1)
    # Get coordinate system parameters
    projec = dem.GetProjection()
    transf = dem.GetGeoTransform()
    ul_x = transf[0]
    ul_y = transf[3]
    xres = transf[1]
    yres = transf[5]
    # get image size
    demrows = dem.RasterYSize
    demcols = dem.RasterXSize
    # Calculate corners
    ll_x = ul_x
    ll_y = ul_y + (demrows * yres)
    #
    driveshrt = dem.GetDriver().ShortName
    driver = gdal.GetDriverByName(driveshrt)
    #metadata = driver.GetMetadata()
    # Read the dem band to a matrix called band_1
    demband = dem.GetRasterBand(1)
    # Access data in rastern band as array
    demdata = demband.ReadAsArray(0,0,demcols,demrows)
    # gdal interpolation creates "upside down files, hence this
    Yres=yres
    if Yres/np.fabs(Yres)==-1:
        Yres=-1*Yres
        demdata = np.flipud(demdata)
    #
    # get nodata value
    demnandat = demband.GetNoDataValue()
    # get minimum and maximum value
    demmindat = demband.GetMinimum()
    demmaxdat = demband.GetMaximum()
    if demmindat is None or demmaxdat is None:
            (demmindat,demmaxdat) = demband.ComputeRasterMinMax(1)
    #
    ## Create grid to krig to.
    xstart = int(round(ll_x))
    xstop = xstart + int(demcols)*int(round(xres))
    ystart = int(round(ll_y))
    ystop = ystart + int(demrows)*int(round(Yres))
    demRx = range(xstart,xstop,int(round(xres)))
    demRy = range(ystart,ystop,int(round(Yres)))
    Xg1,Yg1 = np.meshgrid(demRx,demRy)
    # Convert grid to vectors
    Yg=Yg1.reshape((-1,1))
    Xg=Xg1.reshape((-1,1))
    #
    rx = len(demRx)
    ry = len(demRy)
    # Collect metadata to list for return
    demmeta = []
    demmeta.append( ['projection','geotransform','driver','rows','columns','nanvalue','min','max'])# All one line
    demmeta.append(projec)
    demmeta.append(transf)
    demmeta.append(driver)
    demmeta.append(demrows)
    demmeta.append(demcols)
    demmeta.append(demnandat)
    demmeta.append(demmindat)
    demmeta.append(demmaxdat)
    for i in demmeta: print i
    print '\n'
    return demdata,Xg,Yg,Xg1,Yg1,rx,ry,demmeta
#
# This writes to GeoTiff with NoData values set to -9999
def datawrite(outdata,demdata,meta,name,outDir):
    '''Write an array of grid data to a georeferenced raster file'''
    #meta = ['projection','geotransform','driver','rows','columns','nanvalue']
    filnm = os.path.join(outDir,(name + '.tif'))
    datdriver = gdal.GetDriverByName( "GTiff" )
    datout = datdriver.Create(filnm,meta[5],meta[4],1,gdal.GDT_Float32)
    datout.SetGeoTransform(meta[2])
    datout.SetProjection(meta[1])
    nanmask = demdata != meta[6]
    outdata_m = np.flipud(outdata * nanmask)
    outdata_m = np.where(outdata_m==0,-9999,outdata_m)
    datout.GetRasterBand(1).WriteArray(outdata_m)
    datout.GetRasterBand(1).SetNoDataValue(-9999)
    datout.GetRasterBand(1).ComputeStatistics(True)
    return filnm
#
# Here is where the summing occurs
def rasterAverage(flist,flistfolder,outfolder):
    '''Average all rasters together'''
    time_one = datetime.now()
    print time_one.strftime('Start rasterAverage at:%j %H:%M:%S')
    # Get first raster file
    Afile = os.path.join(flistfolder,flist[0])
    Adata,AXg,AYg,AXg1,AYg1,Arx,Ary,Ademmeta = DemImport(Afile)
    # Get second for creating mask to deal with NaN
    Bfile = os.path.join(flistfolder,flist[1])
    Bdata,BXg,BYg,BXg1,BYg1,Brx,Bry,Bdemmeta = DemImport(Bfile)
    # Set NoData values to numpy NaN
    Adata[Adata==Ademmeta[6]]=np.nan
    Bdata[Bdata==Bdemmeta[6]]=np.nan
    # Create starting values with initial mask
    sumdata = np.ma.masked_array(np.nan_to_num(Adata), mask=np.isnan(Adata) & np.isnan(Bdata))
    counter = 1
    print '\n'*3
    print '*'*20
    # Go through list of other raster files
    for B in flist[1:]:
        print 'file ',counter+1,' of ',len(flist)
        # Import files 2 to n
        Bfile = os.path.join(flistfolder,B)
        Bdata,BXg,BYg,BXg1,BYg1,Brx,Bry,Bdemmeta = DemImport(Bfile)
        print B,' data type: ',type(Bdata)
        print 'Rows (A,B): ',Arx,Brx,' Columns (A,B): ',Ary,Bry
        print 'xres (A,B): ',Ademmeta[2][1],Bdemmeta[2][1],' yres (A,B): ',Ademmeta[2][5],Bdemmeta[2][5]
        print 'No data (A): ',Ademmeta[6],' No data (B): ',Bdemmeta[6]
        # Check matching resolution
        if Arx != Brx or Ary != Bry:
            print B,' resolution mismatch with ', Afile
            continue
        elif Ademmeta[4] != Bdemmeta[4] or Ademmeta[5] != Bdemmeta[5]:
            print 'Size mismatch between ',B,' and ',Afile
            continue
        # Add current file to sum
        Bdata[Bdata==Bdemmeta[6]]=np.nan
        BdataMasked  = np.ma.masked_array(np.nan_to_num(Bdata), mask=sumdata.mask)
        sumdata = (sumdata + BdataMasked).filled(np.nan)
        sumdata = np.ma.masked_array(np.nan_to_num(sumdata), mask=np.isnan(sumdata))
        counter = counter + 1
    # Here we create the mean raster
    meandata = sumdata/counter
    #
    # Save the data to file
    sumname = 'file_sum'
    filnmsum = datawrite(sumdata,Adata,Ademmeta,sumname,outfolder)
    meanname = 'file_mean'
    filnmmean = datawrite(meandata,Adata,Ademmeta,meanname,outfolder)
    return filnmsum,filnmmean
#
#
## MAIN - Runs script if it is called as stand-alone
if __name__ == "__main__":
    main()

Wednesday 5 February 2014

Bash script for gdal

This is just a quick post on bash scripting gdal. This isn't very fancy and you can do more, much more but lets start with something (reasonably) basic.

I have recently been working in ArcGIS (it's sad but true) and the result of this work in a catalogue of ESRI raster files. These aren't simple GeoTiffs, that would make life too easy. These are those messy folders of adf suffixed files. Furthermore the files could only be created with a rectangular extent and I need the clipped, i.e. filled with "NoData" outside of my study area.

An ESRI raster file in all its glory
Above is the sort of thing ArcGIS thinks is good practice. OK, so the collection of files that is one single raster is stored in its own folder and in this case that is stored in another folder of the same name. So I want to loop through the list of folders on the left, in each folder go to the sub-folder and do some gdal stuff on the hdr.adf file.

#!/bin/sh
#
# Start for loop by setting g equal to each value in the list returned by the "ls" command
for g in $(ls -d */)
do
  # ls returns the folder names with a forward slash which we must remove
  f=$(echo ${g} | sed 's/\///')
  # Talk to user
  echo "Processing $f file..."
  # Here is the gdal bit
  gdalwarp -dstnodata -9999 -te 647821.820 7535500.320 651626.820 7537760.320 -tr 10 10 -q -cutline ../../Whole.shp -crop_to_cutline -of GTiff ${f}/${f}/${f}hdr.adf ${f}/${f}.tif
done

Here I get a list of all the folders using ls -d /* and pass the results to a loop. I then remove the forward slash appended to each folder name by ls using sed and pass that to gdal.
gdalwarp then does the rest of the work: set no data value to -9999, set bounding box coordinates, set pixel size, use cutline from a shape file (this wouldn't honour the bounding box and pixel size on its own), specify GeoTiff as output format and finally specify input and output file names.
The result:

The new GeoTiff file


Simple. Now for the 50+ other files.