1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| region_data = spark.createDataFrame([ ('Family with grown ups','PN'), ('Driven Growers','GJ'), ('Conservative families','DD'), ('Cruising Seniors','DL'), ('Average Family ','MN'), ('Living well','KA'), ('Successful hedonists','JH'), ('Retired and Religious','AX'), ('Career Loners','HY'),('Farmers','JH') ], schema=StructType() \ .add("Customer_main_type","string") \ .add("Region Code","string")) new_df=df.join(region_data,on='Customer_main_type') new_df.groupby("Region Code").count().show()
|