I have a dataframe with the following schema:

I try to fetch all the data from this dataframe. I use df.collect() method to iterate through the entire dataframe and then pulling the values out of the columns one-by-one. But it seems like its not iterating through the entire tree and just pulling through the initial parent row only.
def parseCol(landing_df,data): for i in landing_df.collect(): parent_id = i["parent_id"] shared = "null" if (len(i["children"]))>1: # print(len(i["children"])) # if(len(i["children"])>1): data.append([i["project_id"],i["id"],i["name"],i["order"],i["pid"],i["created_date"],i["last_modified_date"], str(parent_id),i["description"],i["recursive"],i["links"][0][0],str(shared)]) for j in i["children"]: if(('shared') not in (i)): shared = 'null' else: shared = i['shared'] if(('project_id') not in (j)): project_id = "null" else: project_id = j['project_id'] data.append([project_id,j["id"],j["name"],j["order"],j["pid"],j["created_date"],j["last_modified_date"],str(j["parent_id"]),j["description"],j["recursive"],j["links"][0][0],str(shared)]) # print(-1) elif(len(i["children"])==0): data.append([i["project_id"],i["id"],i["name"],i["order"],i["pid"],i["created_date"],i["last_modified_date"],"null",i["description"],i["recursive"],i["links"][0][0],str(shared)]) return data Can someone suggest some better way to do this.