data = sqlContext.load("/home/rxin/ints.parquet") data.groupBy("a").agg(col("a"), avg("num")).collect()