This might help you get started; converted your Databricks link to python using a 1 row ex. for you to explore
from pyspark.sql.functions import * from pyspark.sql.types import * schema = StructType()\ .add("metadata", StructType()\ .add("eventid", IntegerType(), True)\ .add("hostname", StringType(), True)\ .add("timestamp", StringType(), True))\ .add("items", StructType()\ .add("books", StructType()\ .add("fees", DoubleType(), True))\ .add("paper", StructType()\ .add("pages", IntegerType(), True))) nested_row = [ ( { "metadata": { "eventid": 9, "hostname": "999.999.999", "timestamp": "9999-99-99 99:99:99" }, "items": { "books": { "fees": 99.99 }, "paper": { "pages": 9999 } } } ) ] df = spark.createDataFrame(nested_row, schema) df.printSchema() df.selectExpr(""" named_struct( 'metadata', metadata, 'items', named_struct( 'books', named_struct('fees', items.books.fees * 1.01), 'paper', items.paper ) ) as named_struct """).select(col("named_struct.metadata"), col("named_struct.items"))\ .show(truncate=False) root |-- metadata: struct (nullable = true) | |-- eventid: integer (nullable = true) | |-- hostname: string (nullable = true) | |-- timestamp: string (nullable = true) |-- items: struct (nullable = true) | |-- books: struct (nullable = true) | | |-- fees: double (nullable = true) | |-- paper: struct (nullable = true) | | |-- pages: integer (nullable = true) +-------------------------------------+-----------------+ |metadata |items | +-------------------------------------+-----------------+ |[9, 999.999.999, 9999-99-99 99:99:99]|[[99.99], [9999]]| +-------------------------------------+-----------------+ +-------------------------------------+------------------------------+ |metadata |items | +-------------------------------------+------------------------------+ |[9, 999.999.999, 9999-99-99 99:99:99]|[[100.98989999999999], [9999]]| +-------------------------------------+------------------------------+