Last active
April 20, 2020 16:30
-
-
Save userid/fecee8535dcebf3197b7347c123a653c to your computer and use it in GitHub Desktop.
Revisions
-
userid revised this gist
Apr 20, 2020 . 1 changed file with 90 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,90 @@ #!/usr/bin/perl -w ## ## ## use 5.012; $| = 1; use POSIX qw(strftime); chdir '/home/odps/ods_ac_serverconfig/'; my $date = strftime("%Y%m%d", localtime(time()-86400)); my $sql = "tunnel download cbods.ods_ac_serverconfig/requestdate='$date' -fd '|' data/serverconfig/serverconfig.$date.txt;"; say $sql; say qx{ /opt/odps/bin/odpscmd -e "$sql"}; my $downloadfile = "data/serverconfig/serverconfig.$date.txt"; die("file download error") unless -s $downloadfile; ###_---- 开始解析 open my $f_user, "+>", "huge/user.csv"; open my $f_device, "+>", "huge/device.csv"; open my $f_udevice, "+>", "huge/udevice.csv"; open my $f_used, "+>", "huge/used.csv"; open my $f_contains, "+>", "huge/contains.csv"; sub get_uuid_time($){ use POSIX qw(strftime); my $uuid= shift || 'ef802820-46b3-11e2-bf3a-47ef6b3e28e2'; $uuid =~ s/-//g; my $timelow = hex substr( $uuid, 2 * 0, 2 * 4 ); my $timemid = hex substr( $uuid, 2 * 4, 2 * 2 ); my $version = hex substr( $uuid, 2 * 6, 1 ); my $timehi = hex substr( $uuid, 2 * 6 + 1, 2 * 2 - 1 ); my $time = ( $timehi * ( 2**16 ) + $timemid ) * ( 2**32 ) + $timelow; my $epoc = int( $time / 10000000 ) - 12219292800; #return strftime( "%Y%m%d", localtime($epoc) ) ; return strftime( "%Y-%m-%d %H:%M:%S", localtime($epoc) ); } sub process($){ my $l = shift; chomp $l; my @vals = split '\\|', $l ; return if scalar @vals <5; my $userid = $vals[0]; my $type = $vals[1]; my $model = $vals[2]; $model=~s/,/_/; my $mac = $vals[3]; my $uuid = $vals[-1] || ''; return if $userid eq ''; return if $userid == 0; my $date = get_uuid_time($uuid); #say "$userid $model $mac $uuid $date"; print $f_user "$userid\n"; print $f_device "$mac,$model,$type\n"; print $f_udevice "$uuid,$date,$type\n"; print $f_used "$userid,$uuid\n"; print $f_contains "$uuid,$mac\n"; } my $n = 0; open my $fd,"<", $downloadfile; while(<$fd>){ process($_) if ( $_ ne ''); say "processed $n lines." if $n++ % 1000 == 0; } close($fd); close($f_user); close $f_device; close $f_udevice; close $f_user; close $f_contains; die("解析文件出错" ) unless -s "huge/user.csv"; #__----- 开始导入hugegraph say qx {/home/dev/hugegraph-loader-0.7.0/bin/hugegraph-loader -g hugegraph -f huge/s.json -s huge/schema.groovy -h 10.10.14.33 --max-parse-errors 10000 --max-insert-errors 10000}; -
userid created this gist
Apr 20, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,37 @@ #!/usr/bin/perl -w ## ## ## use 5.012; $| = 1; use POSIX qw(strftime); chdir '/home/odps/'; my $date = strftime("%Y%m%d", localtime(time()-86400*1)); my $partition = strftime("%Y-%m-%d", localtime(time()-86400*1)); my $sql = "tunnel download cbmds.mds_user_attribute_original_big_data_table/requestdate=${date} -fd '\x01' ./data/user_attribute_${date}.txt "; say $sql; say "=== 开始下载odps ==="; say qx{ /opt/odps/bin/odpscmd -e "$sql"}; my $downloadfile = "./data/user_attribute_${date}.txt"; die("file download error") unless -s $downloadfile; #__----- 开始导入 # say "=== 开始导入clickhouse ==="; say qx{ cat $downloadfile |sed -e "s/['\\"{}\\r\\t]//g" -e 's/\$/\\x01${partition}/' | clickhouse-client --format_csv_delimiter='\x01' --query="INSERT INTO user_attribute FORMAT CSV" --input_format_allow_errors_num=10000000}; #__----- 开始清理旧数据 my $old_date = strftime("%Y%m%d", localtime(time()-86400*9)); my $old_partition = strftime("%Y-%m-%d", localtime(time()-86400*9)); say "=== drop table partition '$old_partition' ==="; qx{rm ./data/user_attribute_${old_date}.txt }; say qx{ clickhouse-client --query=" alter table user_attribute drop partition '$old_partition' "}; say "=== Done ===\n,", `date`;