Skip to content

Instantly share code, notes, and snippets.

@dkcreatto
Created February 25, 2012 19:27
Show Gist options
  • Select an option

  • Save dkcreatto/1910203 to your computer and use it in GitHub Desktop.

Select an option

Save dkcreatto/1910203 to your computer and use it in GitHub Desktop.
Convertes Blackberry Messenger CSV files to XML, which can be imported to excel or any other software which supports it.
<?php
/*
*BBM to XML version 1.0
*
* This script will take CSV file generated by
* Blackberry Messenger and convert it to well
* formatted xml document which can be imported
* into any spreadsheet program such as MS Excel,
* OpenOffice Calc etc.
*
* This script came into existence due to my
* frustration in backing up chat history which
* was (luckily) stored as csv files in memory
* card when BBM is properly configured.
*
*
* This script is made to be used in php in command
* line in windows. Although, it can be also used in
* any other os due to php's portable code structure.
*
* To configure php to be used as command line utility
* follow this URI: http://www.youtube.com/watch?v=AEyzKm2pTBw
*
*/
$stime = new DateTime(); //Registers starting time of script. To be used in log.
if (isset($argv[1])) {
echo "\n\nInput Directory path ".$inputDir = //Stores and prints on screen Input directory consisting of BBM csv files
is_dir($argv[1]) //Checks if first parameter (http://php.net/manual/en/reserved.variables.argv.php) passed to script is a directory
? $argv[1] //If TRUE, then set that parameter to be Input Directory
: dirname($argv[1]);//If FALSE, then get the parent directory path and set it as input directory
} else {
die("Please provide path to BBM CSV file or the folder containing them.");
}
echo "\n\nOutput Directory path ".$outputDir = //Stores and prints on screen Output directory name where xml and logfile will be stored
isset($argv[2]) && is_dir($argv[2]) //checks if second parameter is set and is a directory path.
? $argv[2] //If TRUE, then set that parameter to be Output Directory
: $inputDir; //If FALSE, then set Input Directory as Output Directory
$files = is_dir($argv[1]) //Checks if first parameter is a directory
? glob("$argv[1]\*.csv") //If TRUE, get all the files with csv extension and save their full paths in a array
: $argv[1]; //If FALSE, deems first parameter as a file and sets it to be processed on
$bb_xml_file_name = (isset($argv[2]) && !is_dir($argv[2]) && pathinfo("$argv[2]",PATHINFO_EXTENSION) == "xml") //Checkes if parameter two is set, if it's not a directory and it's an xml file
? pathinfo("$argv[2]",PATHINFO_BASENAME) //If TRUE, ie. if para 2 is a full path to file, then get only name of file with extension_loaded
: "BBMExport-dk_".date("dmYHi").".xml"; //If FALSE, create a new name for xml file
$bb_xml_file = fopen("$outputDir/$bb_xml_file_name", "w"); //Creates or overwrites xml file and stores handle as bb_xml_file object. NOTE: here "w" parameter used for fopen to overwrite exported file each time to avoid duplication.
$bb_xml_data ='<?xml version="1.0" encoding="UTF-8" ?>' . "\n";//Creates variable in which we will store xml data with first line being dom declaration
$bb_xml_data .="<bb_history>\n"; //Adds xml root element in xml data by appending it to above string
fwrite($bb_xml_file,$bb_xml_data); //Stores/writes XML header to xml file
$sf = array(); //declaring empty array of successfully processed files, so can be used globally later in script
$ef = array(); //declaring empty array of unsuccessfully processed files, so can be used globally later in script
$dateLimit = date_create(1/1/2012); //Sets date limit if we just want to capture messages since a specific date only. Experimental. Currently not used.
//Basic information to let user know what's happening
echo "\n\n\nI'm working. Please wait.. ;)\nProcessing ";
echo is_array($files) ? count($files)." files.\n" : "1 file.\n";
echo "\n\n****************************************\n\n";
//If $files is an array containing a list of files then process each file one by one, if it's single file then just process it
If (is_array($files)) {
foreach ($files as $fname)
{
if (filesize($fname) > 0) { //Checks if file has any data
$bb_r = bb_routine($fname,$bb_xml_file); //Passes values to main bb_routine function to process file and saves results as an array in $bb_r
($bb_r[0] == 1) ? $sf[] = $bb_r[1] : $ef[] = $bb_r[1]; //If $bb_r[0] is 1 then all file was successfully processed, else there was failiure. Stores the successful files in $sf array and unsuccessful ones in $ef array.
}
else
{
$ef[] = $fname." is zero sized"; //If file has no data then stores message containing filename in $ef array
}
}
}
else {
(filesize($files) > 0 && pathinfo($files,PATHINFO_EXTENSION) == "csv") ? $bb_r = bb_routine($files,$bb_xml_file) : $ef[] = $files." is zero sized"; //Same as above, here is for if only single file is passed
}
//MAIN processing function which carries out all the extractions
function bb_routine($file_name,$bb_xml_file) {
$bb_xml_data = ""; //On each call, ie for each file, clears the previous file's $bb_xml_data so memmory don't get overloaded
$handle = is_readable($file_name) ? fopen($file_name,"r") : die("Something went wrong. Error for $file_name."); //creates a new handle object to read the passed csv file
echo pathinfo($file_name, PATHINFO_BASENAME)."\n"; //Prints current file name being processed to screen
$input_line = fgets($handle); // Reads the first line of the file, which we will discard
$input_line = fgets($handle); //read a line of text from the file
$bb_msg_data = ''; //Declaring string variable to store BBM message meta data like date, sender's pin and receiver's pin
$bb_msg_text = ''; //Declaring string variable to store actual BBM message text
$i = 0;
$bb_msg_count = 0;
$first_run = TRUE;
while ($input_line) // Check whether we've reached the end of the file,
// fgets returns FALSE if we have
{
if (new_bbm_message($input_line))
{
$bb_msg_count++;
if(!$first_run)
{
//You now have a complete message, do something with it.
$msg_data = str_getcsv($bb_msg_data,','); //Spliting message data into array, to be stored in field variables
$dF = date_create("@".mb_substr($msg_data[0],8,10));
date_timezone_set($dF, timezone_open('Asia/Calcutta'));
$dateField = "<dateField><date>".date_format($dF,"d-m-Y")."</date><time>".date_format($dF,"H:i:s")."</time></dateField>\n";
$sPIN = "<sender>".$msg_data[1]."</sender>\n";
$rPIN = "<reciever>".$msg_data[2]."</reciever>\n";
//$bb_msg_text = '<msgText> '. safe_xml(mb_substr($bb_msg_text,0,-2)) .' </msgText>'."\n";
$bb_msg_text = '<msgText><![CDATA[ '. safe_xml(mb_substr($bb_msg_text,0,-2)) .' ]]></msgText>'."\n";
//Now lets store whole data into a message container
$msg = "<msg>\n".$dateField.$sPIN.$rPIN.$bb_msg_text."</msg>\r\n";
$bb_xml_data .= utf8_encode($msg);
$i++;
} else { $first_run = FALSE; }
// Grab the new messages data
$bb_msg_data = mb_substr($input_line,0,43); // Get string with Date,SendersPIN,RecieversPIN up to the ',' after receivers pin.
$bb_msg_text = mb_substr($input_line,44);
}
else
{
// This is another line of text of the message.
$bb_msg_text .= $input_line;
}
// Read in the next line of text.
$input_line = fgets($handle);
}
//Close current file
fclose($handle);
if ($i == 0 && $bb_msg_count > 0) { //If file has only one record data won't be converted to record & $i wont increase because of "first run" condition
$msg_data = str_getcsv($bb_msg_data,','); //Spliting message data into array, to be stored in field variables
$dF = date_create("@".mb_substr($msg_data[0],8,10));
date_timezone_set($dF, timezone_open('Asia/Calcutta'));
$dateField = "<dateField><date>".date_format($dF,"d-m-Y")."</date><time>".date_format($dF,"H:i:s")."</time></dateField>\n";
$sPIN = "<sender>".$msg_data[1]."</sender>\n";
$rPIN = "<reciever>".$msg_data[2]."</reciever>\n";
$bb_msg_text = '<msgText><![CDATA[ '. safe_xml(mb_substr($bb_msg_text,0,-2)) .' ]]></msgText>'."\n";
//Now lets store whole data into a message container
$msg = "<msg>\n".$dateField.$sPIN.$rPIN.$bb_msg_text."</msg>\n";
$bb_xml_data .= utf8_encode($msg);
}
//Write captured data to XML file everytime when a record is created in memory.
fwrite($bb_xml_file,$bb_xml_data) ? $sf = $file_name : $ef = $file_name;
$success = 1;
$error = 0;
if(isset($sf)) { return array($success, $sf); }
if(isset($ef)) { return array($error, $ef); }
}
//BB routine ends here
//Function to check if current line is starting of a new record or new line in message. Checks if first 21 characters are combination of YYYYMMDD+Epoch(milliseconds)
function new_bbm_message($cln) {
if (strlen($cln) > 21 && ($d2 = date_create("@".mb_substr($cln,8,10)))) {
$d1 = mb_substr($cln,0,8);
date_timezone_set($d2, timezone_open('Asia/Calcutta'));
$d3 = $d2;
$d2 = date_format($d2,"Ymd");
$isDate = ($d1 === $d2) ? TRUE : FALSE;
return $isDate;
} else {
return FALSE;
}
}
fwrite($bb_xml_file,"</bb_history>"); //Ends XML file with ending root tag.
fclose($bb_xml_file); //Closes the XML file link.
// Escaping ANSI control characters
function safe_xml($stri) {
$charcode = array();
for ($i=0, $j=0; $i<32; $i++) {
if ($i != 10 AND $i != 13) {
$j = chr($i);
$charcode[$j] = chr(32);
$j++;
}
}
$newstri = strtr($stri, $charcode);
$newstri = utf8_encode($newstri);
return $newstri;
}
echo "\nFile operation done...\n\nOutput file: $bb_xml_file_name\nTotal files processed: ". count($files) ."\nSuccess on: ". count($sf) ." files.\nError occured on following: ". count($ef) ." files\n";
foreach ($ef as $errfile) { echo $errfile."\n";}
$etime = new DateTime();
$totaltime = date_diff($stime,$etime);
echo "\n\n****************************************\n\n";
$logfile = date("YdmHis").".log";
$logdata = "Input directory: $inputDir\nOutput Directory: $outputDir\nXML File name: $bb_xml_file_name\n\n\nStarted on: ".$stime->format("r")."\nEnded on: ".$etime->format("r")."\nTotal time taken: ".$totaltime->format('%I:%S minutes')."\nSusscessful completed files:\n".implode("\n",$sf)."\n\nFiles with errors: \n".implode("\n",$ef);
file_put_contents("$outputDir/$logfile",$logdata,FILE_APPEND);
echo "\nStarted on: ".$stime->format("r")."\nEnded on: ".$etime->format("r")."\nTotal time taken: ".$totaltime->format('%I:%S minutes')."\nLog file: $logfile";
die();
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment