from pyspark import SparkContext, SparkConf def splitComma(line): splits = Utils.COMMA_DELIMITER.split(line) return "{}, {}".format(splits[1], splits[2]) if __name__ == "__main__": conf = SparkConf().setAppName("airports").setMaster("local[2]") sc = SparkContext(conf = conf) sc.addPyFile('.../pathto commons.zip') from commons import Utils airports = sc.textFile("in/airports.text") airportsInUSA = airports\ .filter(lambda line : Utils.COMMA_DELIMITER.split(line)[3] == "\"United States\"") airportsNameAndCityNames = airportsInUSA.map(splitComma) airportsNameAndCityNames.saveAsTextFile("out/airports_in_usa.text") Yes, it only accepts the ones from the Spark. You can zip the required files (Utils, numpy) etc and specify the parameter --py-files in the spark-submit.
spark-submit --py-files rdd/file.zip rdd/AirportsInUsaSolution.py