curl("http://www.example.com/", [ 'cookies' => 'fruit=apple; colour=red', 'headers' => [ 'Authorization: Bearer AbCdEfGhIjKlMnOpQ', 'Content-Type: application/json', ], 'post' => [ 'firstname' => 'Xavi', 'lastname' => 'Esteve' ], 'userpass' => 'admin:password', ); echo $ws->regex( "#This domain is established to be used for (.*?) examples in documents#mi", $html['content'] )[0][1]; */ class WebScrap { /* $custom = [ 'cookies' => 'name1=content1; name2=content2;', 'headers' => ['Authorization: Bearer AbCdEfGhIjKlMnOpQ','Content-Type: application/json'], 'post' => ['firstname' => 'Xavi','lastname' => 'Esteve'], 'user_agent' => '', // if none set, it will randomize from the list 'userpass' => 'clark:kent', ]; */ public function curl( $url, $custom = [] ){ $user_agent = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11) AppleWebKit/601.1.56 (KHTML, like Gecko) Version/9.0 Safari/601.1.56', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13', 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)', ]; // http://php.net/manual/en/function.curl-setopt.php $options = [ CURLOPT_RETURNTRANSFER => true, // return web page CURLOPT_HEADER => true, //return headers in addition to content CURLOPT_FOLLOWLOCATION => true, // follow redirects CURLOPT_ENCODING => "", // handle all encodings CURLOPT_AUTOREFERER => true, // set referer on redirect CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect CURLOPT_TIMEOUT => 120, // timeout on response CURLOPT_MAXREDIRS => 10, // stop after 10 redirects CURLINFO_HEADER_OUT => true, CURLOPT_SSL_VERIFYPEER => false, // Disabled SSL Cert checks CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_COOKIE => ( array_key_exists('cookies', $custom) ? $custom['cookies'] : null ), CURLOPT_USERAGENT => ( array_key_exists('user_agent', $custom) ? $custom['user_agent'] : $user_agent[ array_rand($user_agent) ] ), ]; // Headers if ( array_key_exists('headers', $custom) AND is_array( $custom['headers'] ) ) { $options[ CURLOPT_HTTPHEADER ] = $custom['headers']; } // Post data (put as PHP array, this converts to JSON) if ( array_key_exists('post', $custom) AND is_array( $custom['post'] ) ) { $options[ CURLOPT_POST ] = true; $options[ CURLOPT_POSTFIELDS ] = $custom['post']; } if ( array_key_exists('userpass', $custom) ) { $options[ CURLOPT_USERPWD ] = $custom['userpass']; } $ch = curl_init( $url ); curl_setopt_array( $ch, $options ); $rough_content = curl_exec( $ch ); $err = curl_errno( $ch ); $errmsg = curl_error( $ch ); $header = curl_getinfo( $ch ); curl_close( $ch ); $header_content = substr( $rough_content, 0, $header['header_size'] ); $body_content = trim( str_replace( $header_content, '', $rough_content ) ); preg_match_all( "#Set-Cookie:\\s+(?[^=]+=[^;]+)#m", $header_content, $matches ); $cookiesOut = implode( "; ", $matches['cookie'] ); $header['errno'] = $err; $header['errmsg'] = $errmsg; $header['headers'] = $header_content; $header['content'] = $body_content; $header['cookies'] = $cookiesOut; return $header; } public function regex( $regex, $string ) { // regex flags: http://php.net/manual/en/reference.pcre.pattern.modifiers.php preg_match_all( $regex, $string, $matches, PREG_SET_ORDER // formats data into an array of items ); return $matches; } }