Skip to content

Instantly share code, notes, and snippets.

@juanparati
Created February 19, 2018 10:54
Show Gist options
  • Select an option

  • Save juanparati/97db4c7c0f93689337f542dff1aea31a to your computer and use it in GitHub Desktop.

Select an option

Save juanparati/97db4c7c0f93689337f542dff1aea31a to your computer and use it in GitHub Desktop.

Revisions

  1. juanparati created this gist Feb 19, 2018.
    301 changes: 301 additions & 0 deletions csvreader.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,301 @@
    <?php

    /**
    * Class Model_CSVReader
    *
    * Parse and read CSV files as a stream keeping a low memory footprint
    */
    class Model_CSVReader
    {

    /**
    * @var null|resource CSV file pointer
    */
    protected $fp = null;

    /**
    * @var string CSV charset encoding
    * @link http://php.net/manual/en/function.mb-convert-encoding.php
    */
    protected $charset;

    /**
    * Default decimal separator
    *
    * @var string
    */
    protected $decimal_sep;


    /**
    * @var string CSV column delimiter
    * @link http://php.net/manual/en/function.fgetcsv.php
    */
    protected $delimiter;

    /**
    * @var string
    * @link http://php.net/manual/en/function.fgetcsv.php
    */
    protected $closure_char;

    /**
    * Column -> Field name map
    * @var array
    */
    protected $fieldmap = [];


    /**
    * Field properties (Only when field map is used)
    *
    * @var array
    */
    protected $field_props = [];



    /**
    * Model_CSVReader constructor.
    *
    * @param string $file
    * @param string $delimiter
    * @param string $closure_char
    * @param string $charset
    * @param string $currency
    */
    public function __construct($file, $delimiter = ';', $closure_char = '"', $charset = 'UTF-8', $decimal_sep = ',')
    {
    ini_set('auto_detect_line_endings', true);

    $this->fp = fopen($file, "r");

    if (!$this->fp)
    {
    Log::instance()->add(Log::ERROR, 'Unable to read CSV file: ' . $file);

    throw new Exception('Unable to read CSV file: ' . $file);
    }

    $this->delimiter = $delimiter;
    $this->closure_char = empty($closure_char) ? '"' : $closure_char;
    $this->charset = $charset;
    $this->decimal_sep = $decimal_sep;
    }


    /**
    * Set the field mapping (Used with CSV that have header columns)
    *
    * @param $fields
    * @param int $header_row
    * @return bool
    */
    public function set_mapfield($fields, $header_row = 0)
    {

    // Reset fieldmap and properties
    $this->fieldmap = [];
    $this->field_props = [];

    // Reset pointer position
    if ($header_row !== false)
    $this->seekLine($header_row);

    $columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char);

    // Ignore empty header line
    if (empty($columns))
    return false;

    // Ignore lines with less than 2 columns
    if (count($columns) < 2)
    return false;

    // Encode columns
    $columns = array_map([$this, 'encode'], $columns);

    // Map fields
    foreach ($fields as $k => $field)
    {
    if ($field === false || !isset($field['column']))
    continue;

    if (is_int($field['column']))
    $this->fieldmap[$k] = $field['column'];
    else
    $this->fieldmap[$k] = array_search($field['column'], $columns);
    }

    $this->field_props = $fields;


    return true;

    }


    /**
    * Read entire data from the CSV and return it structured according to the map fields.
    * It is not recommended to use this function for big CSV files.
    *
    * @param int $header_row
    * @return array
    */
    public function read($header_row = 1)
    {

    $this->seekLine($header_row);

    $records = [];

    // Read CSV
    while(($row = $this->readLine()))
    $records[] = $row;

    return $records;
    }


    /**
    * Read the CSV file line by line
    *
    * @return array|bool
    */
    public function readLine()
    {

    $columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char);

    if (!$columns)
    return false;

    // Detect empty lines
    if (count($columns) === 1)
    return true;

    $frow = [];

    if (empty($this->fieldmap))
    $frow[] = $columns;
    else
    {
    foreach ($this->fieldmap as $k => $columnmap)
    {

    if (isset($columns[$columnmap]))
    {
    $value = $columns[$columnmap];

    // Remove characters
    if (!empty($this->field_props[$k]['remove']))
    $value = str_replace($this->field_props[$k]['remove'], '', $value);

    // Replace characters
    if (!empty($this->field_props[$k]['replace']))
    {
    foreach ($this->field_props[$k]['replace'] as $search_str => $replace_str)
    $value = str_replace($search_str, $replace_str, $value);
    }

    // Extract word segments
    if (isset($this->field_props[$k]['segment']) && is_int($this->field_props[$k]['segment']))
    {
    $segments = explode(' ', $value);
    $value = empty($segments[$this->field_props[$k]['segment']]) ? '' : $segments[$this->field_props[$k]['segment']];
    }

    // Cast
    if (!empty($this->field_props[$k]['cast']))
    {
    switch ($this->field_props[$k]['cast'])
    {
    case 'int':
    case 'integer':
    $value = (int)$value;
    break;

    case 'float':
    $value = (float)$value;
    break;

    case 'string':
    $value = (string)$value;
    break;
    }
    }

    // Apply exclusion list
    if (!empty($this->field_props[$k]['exclude']))
    {
    if (Arr::expression_found($this->field_props[$k]['exclude'], $value))
    $frow['exclude'] = true;
    }

    // Convert decimal values
    $currency = Sanitize::check_currency($value, $this->decimal_sep);

    // Save value or string
    $frow[$k] = $currency === false ? $this->encode($value) : $currency;

    }

    }

    // Set static values
    foreach ($this->field_props as $k => $props)
    {
    if (is_array($props) && array_key_exists('static_value', $props))
    $frow[$k] = $props['static_value'];
    }
    }




    return $frow;
    }


    /**
    * Seek the file pointer to an specific line
    *
    * @param $line
    * @return bool
    */
    public function seekLine($line)
    {
    // Reset file pointer position
    rewind($this->fp);

    $current = 0;

    do
    {
    if ($line === $current)
    return true;

    $current++;

    } while (fgets($this->fp) !== false);

    return false;
    }


    /**
    * Encode a text to UTF-8
    *
    * @param $text
    * @return string
    */
    protected function encode($text)
    {
    if ($this->charset === 'UTF-8')
    return $text;

    return mb_convert_encoding($text, 'UTF-8', $this->charset);
    }


    }