# Read the CSV (with the first row as data) df = spark.read.format("csv").option("header", "false").load("/path/to/csvfile") # Extract the first row as the header new_header = df.first() # Create a new DataFrame without the first row df_without_first_row = df.filter(df["_c0"] != new_header["_c0"]) # Rename columns to match the values from the first row (header) new_column_names = [new_header[col] for col in df.columns] df_with_new_header = df_without_first_row.toDF(*new_column_names) df_with_new_header.show()