I am having issues with splitting contents of a dataframe column using Spark 1.4. The dataframe was created by reading a nested complex json file. I used df.explode but keep getting error message. The json file format along with spark commands is as follows:
scala> val df = sqlContext.read.json("/Users/xx/target/statsfile.json") scala> df.show() +--------------------+--------------------+ | mi| neid| +--------------------+--------------------+ |[900,["pmEs","pmS...|[SubNetwork=ONRM_...| |[900,["pmIcmpInEr...|[SubNetwork=ONRM_...| |[900,pmUnsuccessf...|[SubNetwork=ONRM_...| |[900,["pmBwErrBlo...|[SubNetwork=ONRM_...| |[900,["pmSctpStat...|[SubNetwork=ONRM_...| |[900,["pmLinkInSe...|[SubNetwork=ONRM_...| |[900,["pmGrFc","p...|[SubNetwork=ONRM_...| |[900,["pmReceived...|[SubNetwork=ONRM_...| |[900,["pmIvIma","...|[SubNetwork=ONRM_...| |[900,["pmEs","pmS...|[SubNetwork=ONRM_...| |[900,["pmEs","pmS...|[SubNetwork=ONRM_...| |[900,["pmExisOrig...|[SubNetwork=ONRM_...| |[900,["pmHDelayVa...|[SubNetwork=ONRM_...| |[900,["pmReceived...|[SubNetwork=ONRM_...| |[900,["pmReceived...|[SubNetwork=ONRM_...| |[900,["pmAverageR...|[SubNetwork=ONRM_...| |[900,["pmDchFrame...|[SubNetwork=ONRM_...| |[900,["pmReceived...|[SubNetwork=ONRM_...| |[900,["pmNegative...|[SubNetwork=ONRM_...| |[900,["pmUsedTbsQ...|[SubNetwork=ONRM_...| +--------------------+--------------------+ scala> df.printSchema() root |-- mi: struct (nullable = true) | |-- gp: long (nullable = true) | |-- mt: string (nullable = true) | |-- mts: string (nullable = true) | |-- mv: string (nullable = true) |-- neid: struct (nullable = true) | |-- nedn: string (nullable = true) | |-- nesw: string (nullable = true) | |-- neun: string (nullable = true) scala> val df1=df.select("mi.mv").show() +--------------------+ | mv| +--------------------+ |[{"r":[0,0,0],"mo...| |{"r":[0,4,0,4],"m...| |{"r":5,"moid":"Ma...| |[{"r":[2147483647...| |{"r":[225,1112986...| |[{"r":[83250,0,0,...| |[{"r":[1,2,529982...| |[{"r":[26998564,0...| |[{"r":[0,0,0,0,0,...| |[{"r":[0,0,0],"mo...| |[{"r":[0,0,0],"mo...| |{"r":[0,0,0,0,0,0...| |{"r":[0,0,1],"moi...| |{"r":[4587,4587],...| |[{"r":[180,180],"...| |[{"r":["0,0,0,0,0...| |{"r":[0,35101,0,0...| |[{"r":["0,0,0,0,0...| |[{"r":[0,1558],"m...| |[{"r":["7484,4870...| +--------------------+ scala> df1.explode("mv","mvnew")(mv: String => mv.split(",")) <console>:1: error: ')' expected but '(' found. df1.explode("mv","mvnew")(mv: String => mv.split(",")) ^ <console>:1: error: ';' expected but ')' found. df1.explode("mv","mvnew")(mv: String => mv.split(","))
Am i doing something wrong? I need to extract data under mi.mv in separate columns so i can apply some transformations.