congpeijun · December 6, 2012 05:52
diff --git a/ical-RFC-2445-4.1.php b/ical-RFC-2445-4.1.php
 <?php
 mb_internal_encoding("UTF-8");
 $desc = <<<TEXT
 <p>Lines of text SHOULD NOT be longer than 75 octets, (och hör på den) excluding the line break. Long content lines SHOULD be split into a multiple line representations using a line "folding" technique.</p>
   
   That is, a long line can be split between any two characters by inserting a CRLF
   immediately followed by a single linear white space character (i.e.,
   SPACE, <b>US-ASCII</b> decimal 32 or HTAB, US-ASCII decimal 9). Any sequence
   of CRLF followed immediately by a single linear white space character
   is ignored (i.e., removed) when processing the content type.
 TEXT;

 function ical_split($preamble, $value) {
  $value = trim($value);
  $value = strip_tags($value);
  $value = preg_replace('/\n+/', ' ', $value);
  $value = preg_replace('/\s{2,}/', ' ', $value);

  $preamble_len = strlen($preamble);

  $lines = array();
  while (strlen($value)>(75-$preamble_len)) {
    $space = (75-$preamble_len);
    $mbcc = $space;
    while ($mbcc) {
      $line = mb_substr($value, 0, $mbcc);
      $oct = strlen($line);
      if ($oct > $space) {
        $mbcc -= $oct-$space;
      }
      else {
        $lines[] = $line;
        $preamble_len = 1; // Still take the tab into account
        $value = mb_substr($value, $mbcc);
        break;
      }
    }
  }
  if (!empty($value)) {
    $lines[] = $value;
  }

  return join($lines, "\n\t");
 }

 $split = ical_split('DESCRIPTION:', $desc);
 print 'DESCRIPTION:' . $split;

 // Test results
 $lines = preg_split('/\n/', 'DESCRIPTION:' . $split);

 print "\n\nTests\n";
 foreach ($lines as $i => $line) {
  print "Line {$i}: " . strlen($line) . " octets\n";
 }
diff --git a/output.txt b/output.txt
 DESCRIPTION:Lines of text SHOULD NOT be longer than 75 octets, (och hör p
 	å den) excluding the line break. Long content lines SHOULD be split into 
 	a multiple line representations using a line "folding" technique. That is,
 	 a long line can be split between any two characters by inserting a CRLF i
 	mmediately followed by a single linear white space character (i.e., SPACE,
 	 US-ASCII decimal 32 or HTAB, US-ASCII decimal 9). Any sequence of CRLF fo
 	llowed immediately by a single linear white space character is ignored (i.
 	e., removed) when processing the content type.

 Tests
 Line 0: 74 octets
 Line 1: 75 octets
 Line 2: 75 octets
 Line 3: 75 octets
 Line 4: 75 octets
 Line 5: 75 octets
 Line 6: 75 octets
 Line 7: 47 octets
	<?php
	mb_internal_encoding("UTF-8");
	$desc = <<<TEXT
	<p>Lines of text SHOULD NOT be longer than 75 octets, (och hör på den) excluding the line break. Long content lines SHOULD be split into a multiple line representations using a line "folding" technique.</p>

	That is, a long line can be split between any two characters by inserting a CRLF
	immediately followed by a single linear white space character (i.e.,
	SPACE, <b>US-ASCII</b> decimal 32 or HTAB, US-ASCII decimal 9). Any sequence
	of CRLF followed immediately by a single linear white space character
	is ignored (i.e., removed) when processing the content type.
	TEXT;

	function ical_split($preamble, $value) {
	$value = trim($value);
	$value = strip_tags($value);
	$value = preg_replace('/\n+/', ' ', $value);
	$value = preg_replace('/\s{2,}/', ' ', $value);

	$preamble_len = strlen($preamble);

	$lines = array();
	while (strlen($value)>(75-$preamble_len)) {
	$space = (75-$preamble_len);
	$mbcc = $space;
	while ($mbcc) {
	$line = mb_substr($value, 0, $mbcc);
	$oct = strlen($line);
	if ($oct > $space) {
	$mbcc -= $oct-$space;
	}
	else {
	$lines[] = $line;
	$preamble_len = 1; // Still take the tab into account
	$value = mb_substr($value, $mbcc);
	break;
	}
	}
	}
	if (!empty($value)) {
	$lines[] = $value;
	}

	return join($lines, "\n\t");
	}

	$split = ical_split('DESCRIPTION:', $desc);
	print 'DESCRIPTION:' . $split;

	// Test results
	$lines = preg_split('/\n/', 'DESCRIPTION:' . $split);

	print "\n\nTests\n";
	foreach ($lines as $i => $line) {
	print "Line {$i}: " . strlen($line) . " octets\n";
	}
	DESCRIPTION:Lines of text SHOULD NOT be longer than 75 octets, (och hör p
	å den) excluding the line break. Long content lines SHOULD be split into
	a multiple line representations using a line "folding" technique. That is,
	a long line can be split between any two characters by inserting a CRLF i
	mmediately followed by a single linear white space character (i.e., SPACE,
	US-ASCII decimal 32 or HTAB, US-ASCII decimal 9). Any sequence of CRLF fo
	llowed immediately by a single linear white space character is ignored (i.
	e., removed) when processing the content type.

	Tests
	Line 0: 74 octets
	Line 1: 75 octets
	Line 2: 75 octets
	Line 3: 75 octets
	Line 4: 75 octets
	Line 5: 75 octets
	Line 6: 75 octets
	Line 7: 47 octets