라라벨 크롤링 laravel Guzzle client, crawler, DOMdocument

2022. 5. 18. 15:27PHP/Laravel8.X

728x90
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;

use Illuminate\Support\Facades\Http;
use DOMDocument;


class CurlController extends Controller
{
    /**
     * Display a listing of the resource.
     *
     * @return \Illuminate\Http\Response
     */
    public function index()
    {
        /*
        // 1. Client, Crawler 사용
        $url = 'http://ujsstudio.com';
        $client = new Client();
        $res = $client->get($url);
        $res = $res->getBody();
        $html = (string)$res;
        
        $crawler = new Crawler($html);
        $nodeValues = $crawler->filter('#primary a')->each(function(Crawler $node, $i){
            echo $node->attr('href') . PHP_EOL;
        });
        */

        
        /*
        // 2. Http, DOMDocument 사용
        $response = Http::withOptions([
            'verify'    => false,
        ])->get('http://ujsstudio.com');

        if ($response->successful()) {
            $html = (string)$response->body();
            $dom = new DOMDocument;
            libxml_use_internal_errors(true);
            // libxml_clear_errors();
            $dom->loadHTML($html);
            $links = $dom->getElementsByTagName('a');
            
            foreach ($links as $k => $v) {
                echo $v->nodeValue;
                echo '<br>';
                // echo $v->getAttribute('href');
            }

        } else {
            return 'fail';
        }
        */
        
    }
}

 

이 게시글은

유지성 선생님과

스택오버플로우 선생님의 도움을 받아 작성하였습니다.

 

http://blog.ujsstudio.com/2021/06/11/php%EB%A5%BC-%EC%9D%B4%EC%9A%A9%ED%95%9C-%EC%9B%B9%ED%81%AC%EB%A1%A4%EB%A7%81/

 

https://stackoverflow.com/questions/54103215/how-to-extract-a-tag-using-guzzlehttp-on-laravel

 

 

728x90
반응형