hugowetterberg · March 9, 2025 14:48 · Oct 6, 2022 · Oct 6, 2022 · Mar 19, 2009
diff --git a/output.txt b/output.txt
@@ -1,18 +1,29 @@
-DESCRIPTION:<p>Lines of text SHOULD NOT be longer than 75 octets, (och h
-	r på den) excluding the line break. Long content lines SHOULD be split in
-	o a multiple line representations using a line "folding" technique.</p> Th
-	t is, a long line can be split between any two characters by inserting a C
-	LF immediately followed by a single linear white space character (i.e., SP
-	CE, <b>US-ASCII</b> decimal 32 or HTAB, US-ASCII decimal 9). Any sequence 
-	f CRLF followed immediately by a single linear white space character is ig
-	ored (i.e., removed) when processing the content type.
+DESCRIPTION:Lines of text SHOULD NOT be longer than 75 octets, (och hör 
+	å den) excluding the line break. Long content lines SHOULD be split into 
+	 multiple line representations using a line "folding" technique. That is, 
+	 long line can be split between any two characters by inserting a CRLF imm
+	diately followed by a single linear white space character (i.e., SPACE, US
+	ASCII decimal 32 or HTAB, US-ASCII decimal 9). Any sequence of CRLF follow
+	d immediately by a single linear white space character is ignored (i.e., r
+	moved) when processing the content type.
 
 Tests
-Line 0: 72 octets
+Line 0: 73 octets
 Line 1: 75 octets
 Line 2: 75 octets
 Line 3: 75 octets
 Line 4: 75 octets
 Line 5: 75 octets
 Line 6: 75 octets
-Line 7: 55 octets
+Line 7: 41 octets
+
+Alt desc output:
+X-ALT-DESC:<p>Lines of text SHOULD NOT be longer than 75 octets, (och hö
+	 på den) excluding the line break. Long content lines SHOULD be split int
+	 a multiple line representations using a line "folding" technique.</p> Tha
+	 is, a long line can be split between any two characters by inserting a CR
+	F immediately followed by a single linear white space character (i.e., SPA
+	E, <b>US-ASCII</b> decimal 32 or HTAB, US-ASCII decimal 9). Any sequence o
+	 CRLF followed immediately by a single linear white space character is ign
+	red (i.e., removed) when processing the content type.
+
diff --git a/ical-RFC-2445-4.1.php b/ical-RFC-2445-4.1.php
@@ -10,46 +10,51 @@
    is ignored (i.e., removed) when processing the content type.
 TEXT;
 
-function ical_split($preamble, $value) {
-  $value = trim($value);
-  $value = strip_tags($value);
-  $value = preg_replace('/\n+/', ' ', $value);
-  $value = preg_replace('/\s{2,}/', ' ', $value);
-
-  $preamble_len = strlen($preamble);
-
-  $lines = array();
-  while (strlen($value)>(75-$preamble_len)) {
-    $space = (75-$preamble_len);
-    $mbcc = $space;
-    while ($mbcc) {
-      $line = mb_substr($value, 0, $mbcc);
-      $oct = strlen($line);
-      if ($oct > $space) {
-        $mbcc -= $oct-$space;
-      }
-      else {
+/**
+ * Apply folding compliant with RFC 5545
+ * See https://www.rfc-editor.org/rfc/rfc5545#section-3.1
+ *
+ * @param   string  $preamble   The property name, e.g. DESCRIPTION
+ * @param   string  $value      The value for the property, e.g. a very long string
+ * @param   bool    $strip_tags Strip HTML tags from the value
+ *
+ * @return  string              Returns the folded string without the property name
+ */
+function ical_split($preamble, $value, $strip_tags=true)
+{
+    $value = trim($value);
+    $value = preg_replace('/[\r\n]+/', ' ', $value);
+    $value = preg_replace('/\s{2,}/', ' ', $value);
+
+    if ($strip_tags) {
+        $value = strip_tags($value);
+    }
+
+    $value = $preamble . ':' . $value;
+    $offset = 0;
+    $chunkSize = 75;
+    $lines = [];
+    while ($line = mb_strcut($value, $offset, $chunkSize - 1)) {
         $lines[] = $line;
-        $preamble_len = 1; // Still take the tab into account
-        $value = mb_substr($value, $mbcc);
-        break;
-      }
+        $offset += $chunkSize;
     }
-  }
-  if (!empty($value)) {
-    $lines[] = $value;
-  }
 
-  return join($lines, "\n\t");
+    return substr(join("\r\n\t", $lines), strlen($preamble) + 1);
 }
 
 $split = ical_split('DESCRIPTION:', $desc);
 print 'DESCRIPTION:' . $split;
 
 // Test results
-$lines = preg_split('/\n/', 'DESCRIPTION:' . $split);
+$lines = preg_split('/\r\n/', 'DESCRIPTION:' . $split);
 
 print "\n\nTests\n";
 foreach ($lines as $i => $line) {
   print "Line {$i}: " . strlen($line) . " octets\n";
-}
+}
+
+print "\nAlt desc output:\n";
+
+$split = ical_split('X-ALT-DESC:', $desc, false);
+print 'X-ALT-DESC:' . $split;
+print "\n\n";
diff --git a/output.txt b/output.txt
@@ -1,18 +1,18 @@
-DESCRIPTION:Lines of text SHOULD NOT be longer than 75 octets, (och hör p
-	å den) excluding the line break. Long content lines SHOULD be split into 
-	a multiple line representations using a line "folding" technique. That is,
-	 a long line can be split between any two characters by inserting a CRLF i
-	mmediately followed by a single linear white space character (i.e., SPACE,
-	 US-ASCII decimal 32 or HTAB, US-ASCII decimal 9). Any sequence of CRLF fo
-	llowed immediately by a single linear white space character is ignored (i.
-	e., removed) when processing the content type.
-
-Tests
-Line 0: 74 octets
-Line 1: 75 octets
-Line 2: 75 octets
-Line 3: 75 octets
-Line 4: 75 octets
-Line 5: 75 octets
-Line 6: 75 octets
-Line 7: 47 octets
+DESCRIPTION:<p>Lines of text SHOULD NOT be longer than 75 octets, (och h
+	r på den) excluding the line break. Long content lines SHOULD be split in
+	o a multiple line representations using a line "folding" technique.</p> Th
+	t is, a long line can be split between any two characters by inserting a C
+	LF immediately followed by a single linear white space character (i.e., SP
+	CE, <b>US-ASCII</b> decimal 32 or HTAB, US-ASCII decimal 9). Any sequence 
+	f CRLF followed immediately by a single linear white space character is ig
+	ored (i.e., removed) when processing the content type.
+
+Tests
+Line 0: 72 octets
+Line 1: 75 octets
+Line 2: 75 octets
+Line 3: 75 octets
+Line 4: 75 octets
+Line 5: 75 octets
+Line 6: 75 octets
+Line 7: 55 octets
diff --git a/ical-RFC-2445-4.1.php b/ical-RFC-2445-4.1.php
@@ -0,0 +1,55 @@
+<?php
+mb_internal_encoding("UTF-8");
+$desc = <<<TEXT
+<p>Lines of text SHOULD NOT be longer than 75 octets, (och hör på den) excluding the line break. Long content lines SHOULD be split into a multiple line representations using a line "folding" technique.</p>
+   
+   That is, a long line can be split between any two characters by inserting a CRLF
+   immediately followed by a single linear white space character (i.e.,
+   SPACE, <b>US-ASCII</b> decimal 32 or HTAB, US-ASCII decimal 9). Any sequence
+   of CRLF followed immediately by a single linear white space character
+   is ignored (i.e., removed) when processing the content type.
+TEXT;
+
+function ical_split($preamble, $value) {
+  $value = trim($value);
+  $value = strip_tags($value);
+  $value = preg_replace('/\n+/', ' ', $value);
+  $value = preg_replace('/\s{2,}/', ' ', $value);
+
+  $preamble_len = strlen($preamble);
+
+  $lines = array();
+  while (strlen($value)>(75-$preamble_len)) {
+    $space = (75-$preamble_len);
+    $mbcc = $space;
+    while ($mbcc) {
+      $line = mb_substr($value, 0, $mbcc);
+      $oct = strlen($line);
+      if ($oct > $space) {
+        $mbcc -= $oct-$space;
+      }
+      else {
+        $lines[] = $line;
+        $preamble_len = 1; // Still take the tab into account
+        $value = mb_substr($value, $mbcc);
+        break;
+      }
+    }
+  }
+  if (!empty($value)) {
+    $lines[] = $value;
+  }
+
+  return join($lines, "\n\t");
+}
+
+$split = ical_split('DESCRIPTION:', $desc);
+print 'DESCRIPTION:' . $split;
+
+// Test results
+$lines = preg_split('/\n/', 'DESCRIPTION:' . $split);
+
+print "\n\nTests\n";
+foreach ($lines as $i => $line) {
+  print "Line {$i}: " . strlen($line) . " octets\n";
+}
diff --git a/output.txt b/output.txt
@@ -0,0 +1,18 @@
+DESCRIPTION:Lines of text SHOULD NOT be longer than 75 octets, (och hör p
+	å den) excluding the line break. Long content lines SHOULD be split into 
+	a multiple line representations using a line "folding" technique. That is,
+	 a long line can be split between any two characters by inserting a CRLF i
+	mmediately followed by a single linear white space character (i.e., SPACE,
+	 US-ASCII decimal 32 or HTAB, US-ASCII decimal 9). Any sequence of CRLF fo
+	llowed immediately by a single linear white space character is ignored (i.
+	e., removed) when processing the content type.
+
+Tests
+Line 0: 74 octets
+Line 1: 75 octets
+Line 2: 75 octets
+Line 3: 75 octets
+Line 4: 75 octets
+Line 5: 75 octets
+Line 6: 75 octets
+Line 7: 47 octets
No results found