Created
February 19, 2018 10:54
-
-
Save juanparati/97db4c7c0f93689337f542dff1aea31a to your computer and use it in GitHub Desktop.
Revisions
-
juanparati created this gist
Feb 19, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,301 @@ <?php /** * Class Model_CSVReader * * Parse and read CSV files as a stream keeping a low memory footprint */ class Model_CSVReader { /** * @var null|resource CSV file pointer */ protected $fp = null; /** * @var string CSV charset encoding * @link http://php.net/manual/en/function.mb-convert-encoding.php */ protected $charset; /** * Default decimal separator * * @var string */ protected $decimal_sep; /** * @var string CSV column delimiter * @link http://php.net/manual/en/function.fgetcsv.php */ protected $delimiter; /** * @var string * @link http://php.net/manual/en/function.fgetcsv.php */ protected $closure_char; /** * Column -> Field name map * @var array */ protected $fieldmap = []; /** * Field properties (Only when field map is used) * * @var array */ protected $field_props = []; /** * Model_CSVReader constructor. * * @param string $file * @param string $delimiter * @param string $closure_char * @param string $charset * @param string $currency */ public function __construct($file, $delimiter = ';', $closure_char = '"', $charset = 'UTF-8', $decimal_sep = ',') { ini_set('auto_detect_line_endings', true); $this->fp = fopen($file, "r"); if (!$this->fp) { Log::instance()->add(Log::ERROR, 'Unable to read CSV file: ' . $file); throw new Exception('Unable to read CSV file: ' . $file); } $this->delimiter = $delimiter; $this->closure_char = empty($closure_char) ? '"' : $closure_char; $this->charset = $charset; $this->decimal_sep = $decimal_sep; } /** * Set the field mapping (Used with CSV that have header columns) * * @param $fields * @param int $header_row * @return bool */ public function set_mapfield($fields, $header_row = 0) { // Reset fieldmap and properties $this->fieldmap = []; $this->field_props = []; // Reset pointer position if ($header_row !== false) $this->seekLine($header_row); $columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char); // Ignore empty header line if (empty($columns)) return false; // Ignore lines with less than 2 columns if (count($columns) < 2) return false; // Encode columns $columns = array_map([$this, 'encode'], $columns); // Map fields foreach ($fields as $k => $field) { if ($field === false || !isset($field['column'])) continue; if (is_int($field['column'])) $this->fieldmap[$k] = $field['column']; else $this->fieldmap[$k] = array_search($field['column'], $columns); } $this->field_props = $fields; return true; } /** * Read entire data from the CSV and return it structured according to the map fields. * It is not recommended to use this function for big CSV files. * * @param int $header_row * @return array */ public function read($header_row = 1) { $this->seekLine($header_row); $records = []; // Read CSV while(($row = $this->readLine())) $records[] = $row; return $records; } /** * Read the CSV file line by line * * @return array|bool */ public function readLine() { $columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char); if (!$columns) return false; // Detect empty lines if (count($columns) === 1) return true; $frow = []; if (empty($this->fieldmap)) $frow[] = $columns; else { foreach ($this->fieldmap as $k => $columnmap) { if (isset($columns[$columnmap])) { $value = $columns[$columnmap]; // Remove characters if (!empty($this->field_props[$k]['remove'])) $value = str_replace($this->field_props[$k]['remove'], '', $value); // Replace characters if (!empty($this->field_props[$k]['replace'])) { foreach ($this->field_props[$k]['replace'] as $search_str => $replace_str) $value = str_replace($search_str, $replace_str, $value); } // Extract word segments if (isset($this->field_props[$k]['segment']) && is_int($this->field_props[$k]['segment'])) { $segments = explode(' ', $value); $value = empty($segments[$this->field_props[$k]['segment']]) ? '' : $segments[$this->field_props[$k]['segment']]; } // Cast if (!empty($this->field_props[$k]['cast'])) { switch ($this->field_props[$k]['cast']) { case 'int': case 'integer': $value = (int)$value; break; case 'float': $value = (float)$value; break; case 'string': $value = (string)$value; break; } } // Apply exclusion list if (!empty($this->field_props[$k]['exclude'])) { if (Arr::expression_found($this->field_props[$k]['exclude'], $value)) $frow['exclude'] = true; } // Convert decimal values $currency = Sanitize::check_currency($value, $this->decimal_sep); // Save value or string $frow[$k] = $currency === false ? $this->encode($value) : $currency; } } // Set static values foreach ($this->field_props as $k => $props) { if (is_array($props) && array_key_exists('static_value', $props)) $frow[$k] = $props['static_value']; } } return $frow; } /** * Seek the file pointer to an specific line * * @param $line * @return bool */ public function seekLine($line) { // Reset file pointer position rewind($this->fp); $current = 0; do { if ($line === $current) return true; $current++; } while (fgets($this->fp) !== false); return false; } /** * Encode a text to UTF-8 * * @param $text * @return string */ protected function encode($text) { if ($this->charset === 'UTF-8') return $text; return mb_convert_encoding($text, 'UTF-8', $this->charset); } }