Created
February 25, 2012 19:27
-
-
Save dkcreatto/1910203 to your computer and use it in GitHub Desktop.
Convertes Blackberry Messenger CSV files to XML, which can be imported to excel or any other software which supports it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /* | |
| *BBM to XML version 1.0 | |
| * | |
| * This script will take CSV file generated by | |
| * Blackberry Messenger and convert it to well | |
| * formatted xml document which can be imported | |
| * into any spreadsheet program such as MS Excel, | |
| * OpenOffice Calc etc. | |
| * | |
| * This script came into existence due to my | |
| * frustration in backing up chat history which | |
| * was (luckily) stored as csv files in memory | |
| * card when BBM is properly configured. | |
| * | |
| * | |
| * This script is made to be used in php in command | |
| * line in windows. Although, it can be also used in | |
| * any other os due to php's portable code structure. | |
| * | |
| * To configure php to be used as command line utility | |
| * follow this URI: http://www.youtube.com/watch?v=AEyzKm2pTBw | |
| * | |
| */ | |
| $stime = new DateTime(); //Registers starting time of script. To be used in log. | |
| if (isset($argv[1])) { | |
| echo "\n\nInput Directory path ".$inputDir = //Stores and prints on screen Input directory consisting of BBM csv files | |
| is_dir($argv[1]) //Checks if first parameter (http://php.net/manual/en/reserved.variables.argv.php) passed to script is a directory | |
| ? $argv[1] //If TRUE, then set that parameter to be Input Directory | |
| : dirname($argv[1]);//If FALSE, then get the parent directory path and set it as input directory | |
| } else { | |
| die("Please provide path to BBM CSV file or the folder containing them."); | |
| } | |
| echo "\n\nOutput Directory path ".$outputDir = //Stores and prints on screen Output directory name where xml and logfile will be stored | |
| isset($argv[2]) && is_dir($argv[2]) //checks if second parameter is set and is a directory path. | |
| ? $argv[2] //If TRUE, then set that parameter to be Output Directory | |
| : $inputDir; //If FALSE, then set Input Directory as Output Directory | |
| $files = is_dir($argv[1]) //Checks if first parameter is a directory | |
| ? glob("$argv[1]\*.csv") //If TRUE, get all the files with csv extension and save their full paths in a array | |
| : $argv[1]; //If FALSE, deems first parameter as a file and sets it to be processed on | |
| $bb_xml_file_name = (isset($argv[2]) && !is_dir($argv[2]) && pathinfo("$argv[2]",PATHINFO_EXTENSION) == "xml") //Checkes if parameter two is set, if it's not a directory and it's an xml file | |
| ? pathinfo("$argv[2]",PATHINFO_BASENAME) //If TRUE, ie. if para 2 is a full path to file, then get only name of file with extension_loaded | |
| : "BBMExport-dk_".date("dmYHi").".xml"; //If FALSE, create a new name for xml file | |
| $bb_xml_file = fopen("$outputDir/$bb_xml_file_name", "w"); //Creates or overwrites xml file and stores handle as bb_xml_file object. NOTE: here "w" parameter used for fopen to overwrite exported file each time to avoid duplication. | |
| $bb_xml_data ='<?xml version="1.0" encoding="UTF-8" ?>' . "\n";//Creates variable in which we will store xml data with first line being dom declaration | |
| $bb_xml_data .="<bb_history>\n"; //Adds xml root element in xml data by appending it to above string | |
| fwrite($bb_xml_file,$bb_xml_data); //Stores/writes XML header to xml file | |
| $sf = array(); //declaring empty array of successfully processed files, so can be used globally later in script | |
| $ef = array(); //declaring empty array of unsuccessfully processed files, so can be used globally later in script | |
| $dateLimit = date_create(1/1/2012); //Sets date limit if we just want to capture messages since a specific date only. Experimental. Currently not used. | |
| //Basic information to let user know what's happening | |
| echo "\n\n\nI'm working. Please wait.. ;)\nProcessing "; | |
| echo is_array($files) ? count($files)." files.\n" : "1 file.\n"; | |
| echo "\n\n****************************************\n\n"; | |
| //If $files is an array containing a list of files then process each file one by one, if it's single file then just process it | |
| If (is_array($files)) { | |
| foreach ($files as $fname) | |
| { | |
| if (filesize($fname) > 0) { //Checks if file has any data | |
| $bb_r = bb_routine($fname,$bb_xml_file); //Passes values to main bb_routine function to process file and saves results as an array in $bb_r | |
| ($bb_r[0] == 1) ? $sf[] = $bb_r[1] : $ef[] = $bb_r[1]; //If $bb_r[0] is 1 then all file was successfully processed, else there was failiure. Stores the successful files in $sf array and unsuccessful ones in $ef array. | |
| } | |
| else | |
| { | |
| $ef[] = $fname." is zero sized"; //If file has no data then stores message containing filename in $ef array | |
| } | |
| } | |
| } | |
| else { | |
| (filesize($files) > 0 && pathinfo($files,PATHINFO_EXTENSION) == "csv") ? $bb_r = bb_routine($files,$bb_xml_file) : $ef[] = $files." is zero sized"; //Same as above, here is for if only single file is passed | |
| } | |
| //MAIN processing function which carries out all the extractions | |
| function bb_routine($file_name,$bb_xml_file) { | |
| $bb_xml_data = ""; //On each call, ie for each file, clears the previous file's $bb_xml_data so memmory don't get overloaded | |
| $handle = is_readable($file_name) ? fopen($file_name,"r") : die("Something went wrong. Error for $file_name."); //creates a new handle object to read the passed csv file | |
| echo pathinfo($file_name, PATHINFO_BASENAME)."\n"; //Prints current file name being processed to screen | |
| $input_line = fgets($handle); // Reads the first line of the file, which we will discard | |
| $input_line = fgets($handle); //read a line of text from the file | |
| $bb_msg_data = ''; //Declaring string variable to store BBM message meta data like date, sender's pin and receiver's pin | |
| $bb_msg_text = ''; //Declaring string variable to store actual BBM message text | |
| $i = 0; | |
| $bb_msg_count = 0; | |
| $first_run = TRUE; | |
| while ($input_line) // Check whether we've reached the end of the file, | |
| // fgets returns FALSE if we have | |
| { | |
| if (new_bbm_message($input_line)) | |
| { | |
| $bb_msg_count++; | |
| if(!$first_run) | |
| { | |
| //You now have a complete message, do something with it. | |
| $msg_data = str_getcsv($bb_msg_data,','); //Spliting message data into array, to be stored in field variables | |
| $dF = date_create("@".mb_substr($msg_data[0],8,10)); | |
| date_timezone_set($dF, timezone_open('Asia/Calcutta')); | |
| $dateField = "<dateField><date>".date_format($dF,"d-m-Y")."</date><time>".date_format($dF,"H:i:s")."</time></dateField>\n"; | |
| $sPIN = "<sender>".$msg_data[1]."</sender>\n"; | |
| $rPIN = "<reciever>".$msg_data[2]."</reciever>\n"; | |
| //$bb_msg_text = '<msgText> '. safe_xml(mb_substr($bb_msg_text,0,-2)) .' </msgText>'."\n"; | |
| $bb_msg_text = '<msgText><![CDATA[ '. safe_xml(mb_substr($bb_msg_text,0,-2)) .' ]]></msgText>'."\n"; | |
| //Now lets store whole data into a message container | |
| $msg = "<msg>\n".$dateField.$sPIN.$rPIN.$bb_msg_text."</msg>\r\n"; | |
| $bb_xml_data .= utf8_encode($msg); | |
| $i++; | |
| } else { $first_run = FALSE; } | |
| // Grab the new messages data | |
| $bb_msg_data = mb_substr($input_line,0,43); // Get string with Date,SendersPIN,RecieversPIN up to the ',' after receivers pin. | |
| $bb_msg_text = mb_substr($input_line,44); | |
| } | |
| else | |
| { | |
| // This is another line of text of the message. | |
| $bb_msg_text .= $input_line; | |
| } | |
| // Read in the next line of text. | |
| $input_line = fgets($handle); | |
| } | |
| //Close current file | |
| fclose($handle); | |
| if ($i == 0 && $bb_msg_count > 0) { //If file has only one record data won't be converted to record & $i wont increase because of "first run" condition | |
| $msg_data = str_getcsv($bb_msg_data,','); //Spliting message data into array, to be stored in field variables | |
| $dF = date_create("@".mb_substr($msg_data[0],8,10)); | |
| date_timezone_set($dF, timezone_open('Asia/Calcutta')); | |
| $dateField = "<dateField><date>".date_format($dF,"d-m-Y")."</date><time>".date_format($dF,"H:i:s")."</time></dateField>\n"; | |
| $sPIN = "<sender>".$msg_data[1]."</sender>\n"; | |
| $rPIN = "<reciever>".$msg_data[2]."</reciever>\n"; | |
| $bb_msg_text = '<msgText><![CDATA[ '. safe_xml(mb_substr($bb_msg_text,0,-2)) .' ]]></msgText>'."\n"; | |
| //Now lets store whole data into a message container | |
| $msg = "<msg>\n".$dateField.$sPIN.$rPIN.$bb_msg_text."</msg>\n"; | |
| $bb_xml_data .= utf8_encode($msg); | |
| } | |
| //Write captured data to XML file everytime when a record is created in memory. | |
| fwrite($bb_xml_file,$bb_xml_data) ? $sf = $file_name : $ef = $file_name; | |
| $success = 1; | |
| $error = 0; | |
| if(isset($sf)) { return array($success, $sf); } | |
| if(isset($ef)) { return array($error, $ef); } | |
| } | |
| //BB routine ends here | |
| //Function to check if current line is starting of a new record or new line in message. Checks if first 21 characters are combination of YYYYMMDD+Epoch(milliseconds) | |
| function new_bbm_message($cln) { | |
| if (strlen($cln) > 21 && ($d2 = date_create("@".mb_substr($cln,8,10)))) { | |
| $d1 = mb_substr($cln,0,8); | |
| date_timezone_set($d2, timezone_open('Asia/Calcutta')); | |
| $d3 = $d2; | |
| $d2 = date_format($d2,"Ymd"); | |
| $isDate = ($d1 === $d2) ? TRUE : FALSE; | |
| return $isDate; | |
| } else { | |
| return FALSE; | |
| } | |
| } | |
| fwrite($bb_xml_file,"</bb_history>"); //Ends XML file with ending root tag. | |
| fclose($bb_xml_file); //Closes the XML file link. | |
| // Escaping ANSI control characters | |
| function safe_xml($stri) { | |
| $charcode = array(); | |
| for ($i=0, $j=0; $i<32; $i++) { | |
| if ($i != 10 AND $i != 13) { | |
| $j = chr($i); | |
| $charcode[$j] = chr(32); | |
| $j++; | |
| } | |
| } | |
| $newstri = strtr($stri, $charcode); | |
| $newstri = utf8_encode($newstri); | |
| return $newstri; | |
| } | |
| echo "\nFile operation done...\n\nOutput file: $bb_xml_file_name\nTotal files processed: ". count($files) ."\nSuccess on: ". count($sf) ." files.\nError occured on following: ". count($ef) ." files\n"; | |
| foreach ($ef as $errfile) { echo $errfile."\n";} | |
| $etime = new DateTime(); | |
| $totaltime = date_diff($stime,$etime); | |
| echo "\n\n****************************************\n\n"; | |
| $logfile = date("YdmHis").".log"; | |
| $logdata = "Input directory: $inputDir\nOutput Directory: $outputDir\nXML File name: $bb_xml_file_name\n\n\nStarted on: ".$stime->format("r")."\nEnded on: ".$etime->format("r")."\nTotal time taken: ".$totaltime->format('%I:%S minutes')."\nSusscessful completed files:\n".implode("\n",$sf)."\n\nFiles with errors: \n".implode("\n",$ef); | |
| file_put_contents("$outputDir/$logfile",$logdata,FILE_APPEND); | |
| echo "\nStarted on: ".$stime->format("r")."\nEnded on: ".$etime->format("r")."\nTotal time taken: ".$totaltime->format('%I:%S minutes')."\nLog file: $logfile"; | |
| die(); | |
| ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment