Wednesday, 26 March 2014

Filtering rasters

I recently needed/wanted to write a filter function to use with GDAL in Python. What I wanted was something that could be passed a kernel such as spatial average (blur) or edge detection (actually it was this I was after mostly). So this is the code I wrote for it.
Lets start with the selection of a kernel (filter). Firstly, if I know which one I want I can pass the name to the selection function, otherwise the function sets the choice to nothing to begin with.
def kernelChoice(ans = ''):
Then there is a dictionary containing all the kernels as Numpy arrays (see code below). After which the function asks for the choice if none has been passed to it already and returns the array and its name. Note also that Numpy has been imported as np (import numpy as np).
def kernelChoice(ans = ''):  
        '''Returns a kernel to use with mustard as well as the name of the kernel.
        To use: kernel, ans = kernelChoice(ans = '') '''
        # Create dictionary of kernels
        kernelDict ={
        'Average':np.array([[0.111,0.111,0.111],[0.111,0.111,0.111],[0.111,0.111,0.111]]),
        'HighPass1':np.array([[-0.7,-1.0,-0.7],[-1.0,6.8,-1.0],[-0.7,-1.0,-0.7]]),
        'HighPass2':np.array([[-1.0,-1.0,-1.0],[-1.0,9.0,-1.0],[-1.0,-1.0,-1.0]]),
        'PrewittX':np.array([[-1.0,0.0,1.0],[-1.0,0.0,1.0],[-1.0,0.0,1.0]]),
        'PrewittY':np.array([[1.0,1.0,1.0],[0.0,0.0,0.0],[-1.0,-1.0,-1.0]]),
        'SobelX':np.array([[-1.0,0.0,1.0],[-2.0,0.0,2.0],[-1.0,0.0,1.0]]),
        'SobelY':np.array([[1.0,2.0,1.0],[0.0,0.0,0.0],[-1.0,-2.0,-1.0]])
        }
        # Create list of kernel names
        KerList = kernelDict.keys()
        KerList.sort()
        # Get choice from user
        while ans not in KerList:
            print KerList
            ans = raw_input('Enter name of kernel: ')
        kernel = kernelDict[ans]
        print ans,':\n', kernel
    return kernel, ans
The next step then is to open the raster file, read in the data and then run the kernel over that. This function calls upon rasterImport which we will also define as function further down, but is similar to previous raster importers I have messed around with before. Getting the code for that sorted out should be a priority because as it stands now it is pretty messy and requires a lot of manipulation. Well, back to this code.
The mustard function sets NoData values in the imported data grid to Numpy nan. Then there is a whole load of getting shapes and sizes in order. After which an array, based on the kernel is created that contains relative locations to the central pixel; left and up are negative, right and down are positive (remember that coordinates here start at the upper left).
Then we loop starting top left going right then down to the next row, left to right each time. Then at last the filtered raster is returned.
def mustard(file,kernel):
    '''Run a kernel (window,matrix) over a single layer raster. Uses rasterImport and rasterExport.
    Kernel must be numpy array of odd number dimensions.
    To use: outdata = mustard(file,kernel) '''
    # Import data file
    data, meta, metadata = rasterImport(file)
    # Set no data values to numpy nan
    data[data==meta['dats'][0]]=np.nan
    # Get size of indata array
    inrows =  meta['dimension']['rows']
    incols =  meta['dimension']['cols']
    # Get size of kernel
    krows = np.shape(kernel)[0]
    kcols = np.shape(kernel)[1]
    # Check kernel is smaller than data grid.
    if krows >= inrows or kcols >= incols: sys.exit('Bad kernel. Too large.')
    # Check kernel has a central pixel
    if krows % 2 == 0: sys.exit('Bad kernel. Even number rows.')
    if kcols % 2 == 0: sys.exit('Bad kernel. Even number columns.')
    # Get central pixel location in kernel
    kmidrow = int(krows)/2
    kmidcol = int(kcols)/2
    # Create relative extent of kernel
    rowminext = -1*((krows-1)/2)
    rowmaxext = (krows-1)/2
    rowrange = range(rowminext,rowmaxext+1,1)
    colminext = -1*((kcols-1)/2)
    colmaxext = (kcols-1)/2
    colrange = range(colminext,colmaxext+1,1)
    # Set initial starting location of kernel on grid
    dati = kmidrow
    datj = kmidcol
    # Get number of rows to run kernel over
    gridrows = range(inrows + rowminext*2)
    # Get number of columns to run kernel over
    gridcols = range(incols + colminext*2)
    # Create output array filled with nan
    outdata = np.empty((inrows,incols))
    outdata[:] = np.nan
    # Start loop
    for row in gridrows:
        datj = kmidcol
        rowvec = np.ones((1,krows))*dati + rowrange
        for col in gridcols:
            if np.isnan(data[dati,datj]):
                datj = datj + 1
                outdata[dati,datj] = np.nan
            else:
                colvec = np.ones((1,kcols))*datj + colrange
                extract = np.empty((krows,kcols))
                for i in range(krows):
                    for j in range(kcols):
                        extract[i,j] = data[int(rowvec[0,i]),int(colvec[0,j])]
                pixval = np.nansum(extract * kernel)
                outdata[dati,datj] = pixval
                datj = datj + 1
        dati = dati + 1
    return outdata
To finish off with here are the other functions that make this work as well as a few lines to get everything running. Don’t forget to import Numpy as well as os to get this all working.
def namer(pathstring):
    '''Get name and extension of a file
    To use: name,ext,path,namefull = namer(pathstring) '''
    ## Called by: kriging
    namefull = os.path.basename(pathstring)
    path = os.path.dirname(pathstring)
    ext = namefull.split('.')[1]
    name = namefull.split('.')[0]
    return name,ext,path,namefull
#
def rasterImport(file):
    '''Import a raster file and return grid plus meta data.
    To use: data, meta, metadata = rasterImport(file)'''
    time_one = datetime.now()
    print '\nImporting ',file
    #print time_one.strftime('at day:%j %H:%M:%S')
    # register all of the GDAL drivers
    gdal.AllRegister()
    # Open file
    raster = gdal.Open(file)
    if raster is None:
        print 'Could not open ',file,'\n'
        sys.exit(1)
    # Get coordinate system parameters
    projec = raster.GetProjection()
    srs=osr.SpatialReference(wkt=projec)
    transf = raster.GetGeoTransform()
    ul_x = transf[0]
    ul_y = transf[3]
    xres = transf[1]
    yres = transf[5]
    # get image size
    rows = raster.RasterYSize
    cols = raster.RasterXSize
    dims = {'xres':xres,'yres':yres,'rows':rows,'cols':cols}
    # Calculate corners
    ll_x = ul_x
    ll_y = ul_y + (rows * yres)
    ur_x = ul_x + (cols * xres)
    ur_y = ul_y
    lr_x = ur_x
    lr_y = ll_y
    corners = {'ll':(ll_x,ll_y),'ul':(ul_x,ul_y),'ur':(ur_x,ur_y),'lr':(lr_x,lr_y)}
    #
    driveshrt = raster.GetDriver().ShortName
    driver = gdal.GetDriverByName(driveshrt)
    metadata = driver.GetMetadata()
    metakeys = metadata.keys()
    # Read the file band to a matrix called band_1
    band = raster.GetRasterBand(1)
    # Access data in rastern band as array
    data = band.ReadAsArray(0,0,cols,rows)
    #
    # gdal interpolation creates "upside down files, hence this
    Yres=yres
    if Yres/np.fabs(Yres)==-1:
        Yres=-1*Yres
        data = np.flipud(data)
    #
    # get nodata value
    nandat = band.GetNoDataValue()
    # get minimum and maximum value
    mindat = band.GetMinimum()
    maxdat = band.GetMaximum()
    if mindat is None or maxdat is None:
            (mindat,maxdat) = band.ComputeRasterMinMax(1)
    dats = [nandat,mindat,maxdat]
    sumvals = data[np.where(np.logical_not(data == nandat))]
    sumval = sumvals.sum()
    numvals = len(sumvals)
    avval = sumval/numvals
    volvals = sumval*(math.fabs((transf[1]*transf[5])))
    meta = {'transform':transf,'projection':projec,'corners':corners,'dimension':dims,'dats':dats,'sumval':sumval,'numvals':numvals,'avval':avval,'volvals':volvals}
    if srs.IsProjected:
        print 'projcs 1: ',srs.GetAttrValue('projcs')
        print 'projcs 2: ',srs.GetAuthorityCode('projcs')
        print 'projcs 3: ',srs.GetAuthorityName('projcs')
    print 'geogcs 1:: ',srs.GetAttrValue('geogcs')
    print 'Sum of pixel values = ',sumval,' in range ',mindat,' to ',maxdat
    print 'From ',numvals,' of ',rows,' X ',cols,' = ',rows*cols,' pixels, or ',((1.0*numvals)/(rows*cols))*100,'%'
    print 'Average then is ',avval
    print 'Pixels size, x:',transf[1],' y:',transf[5]
    print '"Volume" is then the sum * 1 pixel area: ',volvals
    return data, meta, metadata
#
def rasterExport(outdata,meta,filnm):
    '''Export array as single layer GeoTiff. Uses metadata from rasterImport and numpy nan as mask.
    To use: rasterExport(outdata,meta,filnm)'''
    inrows =  meta['dimension']['rows']
    incols =  meta['dimension']['cols']
    datdriver = gdal.GetDriverByName( "GTiff" )
    datout = datdriver.Create(filnm,incols,inrows,1,gdal.GDT_Float32)
    datout.SetGeoTransform(meta['transform'])
    datout.SetProjection(meta['projection'])
    outdata_m = np.flipud(outdata)
    outdata_m = np.where(np.isnan(outdata_m),meta['dats'][0],outdata_m)
    datout.GetRasterBand(1).WriteArray(outdata_m)
    datout.GetRasterBand(1).SetNoDataValue(meta['dats'][0])
    datout.GetRasterBand(1).ComputeStatistics(True)
    datout = None
    return 0
#
# Here is a little starter, just set the variable 'file' first
file = './file1.tif'
name,ext,path,namefull = AM.namer(file)
filnm = os.path.join(path,(name + ans + '.tif'))
outdata,meta = mustard(file,kernel)
rasterExport(outdata,meta,filnm)

No comments:

Post a Comment