I needed to calculate a mean value of multiple rasters i.e. the average value for each pixel derived from a set of GeoTiff images covering the same area. The gdal_calc.py script can handle a few files but not 38 files as in my case. So here is a script to import a list of files and return a raster of the sum and a raster of the average. It's not elegant at all but it works (well, for me it did).
#!/Library/Frameworks/Python.framework/Versions/Current/bin/python
#from __future__ import division
import sys
import os
import math
import numpy as np
from scipy.stats.stats import nanmean
import scipy.stats as stats
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
import gc
from datetime import datetime
#
# Here we create the "main" function so that the script will run stand-alone or as a library
def main():
# Enter list of tif files
FileList = ('Fill','with','list','of','tifs')
# Enter the location of files
FileLoc = '/Path/to/folder/containing/tifs''
# Enter the destination folder (here same as source)
outfolder = FileLoc
# Run the functions defined below.
filnmsum,filnmmean = rasterAverage(FileList,FileLoc,outfolder)
return 0
#
# Some slighty bloated and messy code because I took this from a function I wrote to
# import data for a kriging routine I am working on.
def DemImport(demfile):
'''Import a DEM file and return grid plus meta data. Written for kriging. '''
time_one = datetime.now()
print '\nImporting ',demfile
print time_one.strftime('at day:%j %H:%M:%S')
# register all of the GDAL drivers
gdal.AllRegister()
# Open file
dem = gdal.Open(demfile)
if dem is None:
print 'Could not open ',demfile,'\n'
sys.exit(1)
# Get coordinate system parameters
projec = dem.GetProjection()
transf = dem.GetGeoTransform()
ul_x = transf[0]
ul_y = transf[3]
xres = transf[1]
yres = transf[5]
# get image size
demrows = dem.RasterYSize
demcols = dem.RasterXSize
# Calculate corners
ll_x = ul_x
ll_y = ul_y + (demrows * yres)
#
driveshrt = dem.GetDriver().ShortName
driver = gdal.GetDriverByName(driveshrt)
#metadata = driver.GetMetadata()
# Read the dem band to a matrix called band_1
demband = dem.GetRasterBand(1)
# Access data in rastern band as array
demdata = demband.ReadAsArray(0,0,demcols,demrows)
# gdal interpolation creates "upside down files, hence this
Yres=yres
if Yres/np.fabs(Yres)==-1:
Yres=-1*Yres
demdata = np.flipud(demdata)
#
# get nodata value
demnandat = demband.GetNoDataValue()
# get minimum and maximum value
demmindat = demband.GetMinimum()
demmaxdat = demband.GetMaximum()
if demmindat is None or demmaxdat is None:
(demmindat,demmaxdat) = demband.ComputeRasterMinMax(1)
#
## Create grid to krig to.
xstart = int(round(ll_x))
xstop = xstart + int(demcols)*int(round(xres))
ystart = int(round(ll_y))
ystop = ystart + int(demrows)*int(round(Yres))
demRx = range(xstart,xstop,int(round(xres)))
demRy = range(ystart,ystop,int(round(Yres)))
Xg1,Yg1 = np.meshgrid(demRx,demRy)
# Convert grid to vectors
Yg=Yg1.reshape((-1,1))
Xg=Xg1.reshape((-1,1))
#
rx = len(demRx)
ry = len(demRy)
# Collect metadata to list for return
demmeta = []
demmeta.append( ['projection','geotransform','driver','rows','columns','nanvalue','min','max'])# All one line
demmeta.append(projec)
demmeta.append(transf)
demmeta.append(driver)
demmeta.append(demrows)
demmeta.append(demcols)
demmeta.append(demnandat)
demmeta.append(demmindat)
demmeta.append(demmaxdat)
for i in demmeta: print i
print '\n'
return demdata,Xg,Yg,Xg1,Yg1,rx,ry,demmeta
#
# This writes to GeoTiff with NoData values set to -9999
def datawrite(outdata,demdata,meta,name,outDir):
'''Write an array of grid data to a georeferenced raster file'''
#meta = ['projection','geotransform','driver','rows','columns','nanvalue']
filnm = os.path.join(outDir,(name + '.tif'))
datdriver = gdal.GetDriverByName( "GTiff" )
datout = datdriver.Create(filnm,meta[5],meta[4],1,gdal.GDT_Float32)
datout.SetGeoTransform(meta[2])
datout.SetProjection(meta[1])
nanmask = demdata != meta[6]
outdata_m = np.flipud(outdata * nanmask)
outdata_m = np.where(outdata_m==0,-9999,outdata_m)
datout.GetRasterBand(1).WriteArray(outdata_m)
datout.GetRasterBand(1).SetNoDataValue(-9999)
datout.GetRasterBand(1).ComputeStatistics(True)
return filnm
#
# Here is where the summing occurs
def rasterAverage(flist,flistfolder,outfolder):
'''Average all rasters together'''
time_one = datetime.now()
print time_one.strftime('Start rasterAverage at:%j %H:%M:%S')
# Get first raster file
Afile = os.path.join(flistfolder,flist[0])
Adata,AXg,AYg,AXg1,AYg1,Arx,Ary,Ademmeta = DemImport(Afile)
# Get second for creating mask to deal with NaN
Bfile = os.path.join(flistfolder,flist[1])
Bdata,BXg,BYg,BXg1,BYg1,Brx,Bry,Bdemmeta = DemImport(Bfile)
# Set NoData values to numpy NaN
Adata[Adata==Ademmeta[6]]=np.nan
Bdata[Bdata==Bdemmeta[6]]=np.nan
# Create starting values with initial mask
sumdata = np.ma.masked_array(np.nan_to_num(Adata), mask=np.isnan(Adata) & np.isnan(Bdata))
counter = 1
print '\n'*3
print '*'*20
# Go through list of other raster files
for B in flist[1:]:
print 'file ',counter+1,' of ',len(flist)
# Import files 2 to n
Bfile = os.path.join(flistfolder,B)
Bdata,BXg,BYg,BXg1,BYg1,Brx,Bry,Bdemmeta = DemImport(Bfile)
print B,' data type: ',type(Bdata)
print 'Rows (A,B): ',Arx,Brx,' Columns (A,B): ',Ary,Bry
print 'xres (A,B): ',Ademmeta[2][1],Bdemmeta[2][1],' yres (A,B): ',Ademmeta[2][5],Bdemmeta[2][5]
print 'No data (A): ',Ademmeta[6],' No data (B): ',Bdemmeta[6]
# Check matching resolution
if Arx != Brx or Ary != Bry:
print B,' resolution mismatch with ', Afile
continue
elif Ademmeta[4] != Bdemmeta[4] or Ademmeta[5] != Bdemmeta[5]:
print 'Size mismatch between ',B,' and ',Afile
continue
# Add current file to sum
Bdata[Bdata==Bdemmeta[6]]=np.nan
BdataMasked = np.ma.masked_array(np.nan_to_num(Bdata), mask=sumdata.mask)
sumdata = (sumdata + BdataMasked).filled(np.nan)
sumdata = np.ma.masked_array(np.nan_to_num(sumdata), mask=np.isnan(sumdata))
counter = counter + 1
# Here we create the mean raster
meandata = sumdata/counter
#
# Save the data to file
sumname = 'file_sum'
filnmsum = datawrite(sumdata,Adata,Ademmeta,sumname,outfolder)
meanname = 'file_mean'
filnmmean = datawrite(meandata,Adata,Ademmeta,meanname,outfolder)
return filnmsum,filnmmean
#
#
## MAIN - Runs script if it is called as stand-alone
if __name__ == "__main__":
main()
Monday, 17 February 2014
Wednesday, 5 February 2014
Bash script for gdal
This is just a quick post on bash scripting gdal. This isn't very fancy and you can do more, much more but lets start with something (reasonably) basic.
I have recently been working in ArcGIS (it's sad but true) and the result of this work in a catalogue of ESRI raster files. These aren't simple GeoTiffs, that would make life too easy. These are those messy folders of adf suffixed files. Furthermore the files could only be created with a rectangular extent and I need the clipped, i.e. filled with "NoData" outside of my study area.
Above is the sort of thing ArcGIS thinks is good practice. OK, so the collection of files that is one single raster is stored in its own folder and in this case that is stored in another folder of the same name. So I want to loop through the list of folders on the left, in each folder go to the sub-folder and do some gdal stuff on the hdr.adf file.
#!/bin/sh
#
# Start for loop by setting g equal to each value in the list returned by the "ls" command
for g in $(ls -d */)
do
# ls returns the folder names with a forward slash which we must remove
f=$(echo ${g} | sed 's/\///')
# Talk to user
echo "Processing $f file..."
# Here is the gdal bit
gdalwarp -dstnodata -9999 -te 647821.820 7535500.320 651626.820 7537760.320 -tr 10 10 -q -cutline ../../Whole.shp -crop_to_cutline -of GTiff ${f}/${f}/${f}hdr.adf ${f}/${f}.tif
done
Here I get a list of all the folders using ls -d /* and pass the results to a loop. I then remove the forward slash appended to each folder name by ls using sed and pass that to gdal.
gdalwarp then does the rest of the work: set no data value to -9999, set bounding box coordinates, set pixel size, use cutline from a shape file (this wouldn't honour the bounding box and pixel size on its own), specify GeoTiff as output format and finally specify input and output file names.
The result:
Simple. Now for the 50+ other files.
I have recently been working in ArcGIS (it's sad but true) and the result of this work in a catalogue of ESRI raster files. These aren't simple GeoTiffs, that would make life too easy. These are those messy folders of adf suffixed files. Furthermore the files could only be created with a rectangular extent and I need the clipped, i.e. filled with "NoData" outside of my study area.
An ESRI raster file in all its glory |
#!/bin/sh
#
# Start for loop by setting g equal to each value in the list returned by the "ls" command
for g in $(ls -d */)
do
# ls returns the folder names with a forward slash which we must remove
f=$(echo ${g} | sed 's/\///')
# Talk to user
echo "Processing $f file..."
# Here is the gdal bit
gdalwarp -dstnodata -9999 -te 647821.820 7535500.320 651626.820 7537760.320 -tr 10 10 -q -cutline ../../Whole.shp -crop_to_cutline -of GTiff ${f}/${f}/${f}hdr.adf ${f}/${f}.tif
done
Here I get a list of all the folders using ls -d /* and pass the results to a loop. I then remove the forward slash appended to each folder name by ls using sed and pass that to gdal.
gdalwarp then does the rest of the work: set no data value to -9999, set bounding box coordinates, set pixel size, use cutline from a shape file (this wouldn't honour the bounding box and pixel size on its own), specify GeoTiff as output format and finally specify input and output file names.
The result:
The new GeoTiff file |
Simple. Now for the 50+ other files.
Subscribe to:
Posts (Atom)