/*************************************
Joining datasets in Pig
Employee..Salary = 1..many
Displaying most recent salary
Without using any join optimizations
**************************************/

rawEmpDS = load '/user/akhanolk/joinProject/data/employees_active/part-e' using PigStorage(',') as (empNo:chararray,dOB:chararray,lName:chararray,fName:chararray,gender:chararray,hireDate:chararray,deptNo:chararray);

empDS = foreach rawEmpDS generate empNo,fName,lName,gender,deptNo;

rawSalDS = load '/user/akhanolk/joinProject/data/salaries_history/part-sh' using PigStorage(',') as (empNo:chararray,salary:long,fromDate:chararray,toDate:chararray);

filteredSalDS = filter rawSalDS by toDate == '9999-01-01';

salDS = foreach filteredSalDS generate empNo, salary;

joinedDS = join empDS by empNo, salDS by empNo;

finalDS = foreach joinedDS generate empDS::empNo,empDS::fName,empDS::lName,empDS::gender,empDS::deptNo,salDS::salary;

store finalDS into '/user/akhanolk/joinProject/output/pig-RSJ';

