def haversine_spark(df , col_lat1, col_lon1, col_lat2, col_lon2, col_name="distance"): df = df.withColumn("a", F.pow(F.sin(F.radians(col_lat2 - col_lat1) / 2), 2) + F.cos(F.radians(col_lat1)) * F.cos(F.radians(col_lat2)) * F.pow(F.sin(F.radians(col_lon2 - col_lon1) / 2), 2)) df = df.withColumn(col_name, F.atan2(F.sqrt(df["a"]), F.sqrt(-df["a"] + 1)) * 2 * 6371) return df.drop("a")