This is a robust solution that takes into consideration all possible combinations of nulls that could be in a column. First, all null columns are found and then they are dropped. It looks lengthy and cumbersome, but in fact this is a robust solution. Only one loop is used for the finding of the null columns and no memory intensive function such as collect() is applied, which should make this solution fast and efficient.
rows = [(None, 18, None, None), (1, None, None, None), (1, 9, 4.0, None), (None, 0, 0., None)] schema = "a: int, b: int, c: float, d:int" df = spark.createDataFrame(data=rows, schema=schema) def get_null_column_names(df): column_names = [] for col_name in df.columns: min_ = df.select(F.min(col_name)).first()[0] max_ = df.select(F.max(col_name)).first()[0] if min_ is None and max_ is None: column_names.append(col_name) return column_names null_columns = get_null_column_names(df) def drop_column(null_columns, df): for column_ in null_columns: df = df.drop(column_) return df df = drop_column(null_columns, df) df.show()
Output: 