0

I am trying to do a Random Forest in Google Earth Engine. My data is unbalanced (more or less I have 3 times more points of one class) so I want to balance them. I tried to give different weights to my classes but it doesn't work.

Any suggestions?

Here is the code I am using:

var polygons = table4; var sample = polygons.randomColumn(); var trainingsample = sample.filter('random <= 0.8'); var validationsample = sample.filter('random > 0.8'); var NoNull = trainingsample.filter(ee.Filter.notNull(trainingsample.first().propertyNames())); print('Training sample', trainingsample); print('Validation sample', validationsample); var bands =['b1', 'b3', 'b4', 'b6', 'b7', 'b8', 'b9', 'b10', 'b11'];//Recordar canviar les bandes que es van a utilitzar segons el tipus de RF que anem a fer. Map.addLayer (trainingsample,{color:'black'}); Map.addLayer (validationsample,{color:'white'}); //Definr pesos para equilibrar las clases var weights = ee.Dictionary({ clase1: 1, clase2: 2 }); //var RFclassifier = ee.Classifier.smileRandomForest(500).train(training, 'Class'); var RFclassifier = ee.Classifier.randomForest({ numberOfTrees: 1000, variablesPerSplit: 3, minLeafPopulation: 1, bagFraction: 0.5 }).train({ features:trainingsample, classProperty: 'arboles', inputProperties: ['bands'], weights: 'weights' }); print('Results of trained classifier', RFclassifier.explain()); 

I am new in using Google Earth Engine.

1 Answer 1

1

The only way to make them balanced is to discard some. Get a frequencyHistrogram of the classes, then compute the percentage of each class necessary to make each one have the same number of features as the minimum class, then filter down to the subset using a randomColumn.

var geometry = ee.Geometry.Polygon( [13.8, 41.0, 13.8, 40.75, 14.5, 40.75, 14.5, 41.0], null, false); // Get some random points with landcover classes. var pts = ee.ImageCollection("COPERNICUS/CORINE/V20/100m").filterDate('2017', '2018').first() .divide(100).floor().int() .sampleRegions(ee.FeatureCollection.randomPoints(geometry, 1000)) // Compute a histogram var hist = pts.reduceColumns(ee.Reducer.frequencyHistogram(), ["landcover"]) hist = ee.Dictionary(hist.get('histogram')) var values = hist.values() print(values) // Subset to balance number of features. var classes = hist.keys() var min = ee.Number(values.reduce(ee.Reducer.min())) var subset = classes.map(function(c) { var ratio = min.divide(hist.getNumber(c)) return pts.filter(ee.Filter.eq('landcover', ee.Number.parse(c))) .randomColumn() .filter(ee.Filter.lt('random', ratio)) }) var balanced = ee.FeatureCollection(subset).flatten() print(balanced.reduceColumns(ee.Reducer.frequencyHistogram(), ["landcover"])) 

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.