Article::all() ->each(function (Article $article) { // Mark "width" inline styles for keeping $article->content = Str::replaceMatches( '/style=(\"[^\"]*width:[^\"]*\")/', fn($matches) => 'keepwidth_123=' . $matches[1], $article->old_content ); // Remove inline styles, except image widths $article->content = Str::replaceMatches( '/style=\"[^\"]*\"/', '', $article->content ); // put widths back $article->content = Str::replaceMatches( '/keepwidth_123=(\"[^\"]*width:[^\"]*\")/', fn($matches) => 'style=' . $matches[1], $article->content, ); // Remove inline classes $article->content = Str::replaceMatches( '/class=\"[^\"]*\"/', '', $article->content ); // Remove inline font declarations (!) $article->content = Str::replaceMatches( '/]*>/', '', $article->content ); // remove errant spaces before the end of html tags <> $article->content = Str::replaceMatches( '/\s+>/', '>', $article->content ); // remove no-brake-spaces (hidden characters) $article->content = Str::replaceMatches( [ '/ /', '//', '/<\/o:p>/', '/<\/font>/', '/]*>/', '/<\/span>/', ], '', $article->content ); // remove newlines (line-breaks are all over the place in many of the articles) $article->content = Str::replaceMatches( '/[\n\r]+/', ' ', $article->content ); // Add in linebreaks to match paragraph tags $article->content = Str::replaceMatches( '/<\/p>?[\s]* <', $article->content ); // Add in linebreaks after header tags $article->content = Str::replaceMatches( '/<\/h(\d)>?[\s]* ' <', $article->content ); // and after
tags $article->content = Str::replaceMatches( '/?[\s]* <', $article->content ); // Remove empty tags $article->content = Str::replaceMatches( '/[\r\n]

\s*<\/p>/', '', $article->content ); // Remove empty space before the end of a

tag. $article->content = Str::replaceMatches( '/[\s\n\r]*<\/p>/', '

', $article->content ); // Remove span tags, as they're only used for inline formatting $article->content = Str::replaceMatches( [ '//', '/<\/span>/', ], '', $article->content ); // $article->content = $article->old_content; $article->save(); });