Forked from michael-erasmus/transform_actions_taken.pig
Last active
August 29, 2015 14:19
-
-
Save narayana1208/64374a0c8ef4ad5fef52 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| REGISTER '../udfs/jython/actions_taken.py' USING jython AS actions_taken; | |
| REGISTER '../udfs/python/actions_taken.py' USING streaming_python AS actions_taken1; | |
| raw = load '$OUTPUT_PATH/extract-actions-taken' | |
| using PigStorage() | |
| as ( | |
| user_id:chararray, | |
| visitor_id:chararray, | |
| client_id:chararray, | |
| last_modified:chararray, | |
| user_joined_at:chararray, | |
| date:chararray, | |
| value:bag{t:tuple()}, | |
| extra_data:chararray | |
| ); | |
| with_scopes = foreach raw generate | |
| user_id, | |
| visitor_id, | |
| client_id, | |
| actions_taken1.transform_date(last_modified) as last_modified, | |
| actions_taken1.transform_date(date), | |
| actions_taken1.transform_joined_at(user_joined_at), | |
| actions_taken.pull_out_scopes(value) as scopes, | |
| extra_data; | |
| transformed = foreach with_scopes generate | |
| user_id, | |
| visitor_id, | |
| client_id, | |
| last_modified, | |
| user_joined_at, | |
| date, | |
| flatten(scopes.$0) as scope1, | |
| flatten(scopes.$1) as scope2, | |
| flatten(scopes.$2) as scope3, | |
| flatten(scopes.$3) as scope4, | |
| flatten(scopes.$4) as scope5, | |
| flatten(scopes.$5) as scope6, | |
| flatten(scopes.$6) as scope7, | |
| flatten(scopes.$7) as scope8, | |
| flatten(scopes.$8) as scope9, | |
| flatten(scopes.$9) as scope10, | |
| extra_data; | |
| -- Use gzip compression | |
| set output.compression.enabled true; | |
| set output.compression.codec org.apache.hadoop.io.compress.GzipCodec; | |
| rmf $OUTPUT_PATH/transform-actions-taken; | |
| store transformed into '$OUTPUT_PATH/transform-actions-taken' using PigStorage(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment