⚝
One Hat Cyber Team
⚝
Your IP:
216.73.216.180
Server IP:
13.127.59.50
Server:
Linux ip-172-31-46-210 5.15.0-1033-aws #37~20.04.1-Ubuntu SMP Fri Mar 17 11:39:30 UTC 2023 x86_64
Server Software:
Apache/2.4.41 (Ubuntu)
PHP Version:
7.4.3-4ubuntu2.29
Buat File
|
Buat Folder
Eksekusi
Dir :
~
/
home
/
ubuntu
/
vendor
/
spatie
/
crawler
/
src
/
Handlers
/
View File Name :
CrawlRequestFulfilled.php
<?php namespace Spatie\Crawler\Handlers; use function GuzzleHttp\Psr7\stream_for; use GuzzleHttp\Psr7\Uri; use GuzzleHttp\RedirectMiddleware; use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\StreamInterface; use Psr\Http\Message\UriInterface; use Spatie\Crawler\Crawler; use Spatie\Crawler\CrawlerRobots; use Spatie\Crawler\CrawlSubdomains; use Spatie\Crawler\CrawlUrl; use Spatie\Crawler\LinkAdder; class CrawlRequestFulfilled { /** @var \Spatie\Crawler\Crawler */ protected $crawler; /** @var \Spatie\Crawler\LinkAdder */ protected $linkAdder; public function __construct(Crawler $crawler) { $this->crawler = $crawler; $this->linkAdder = new LinkAdder($this->crawler); } public function __invoke(ResponseInterface $response, $index) { $body = $this->getBody($response); $robots = new CrawlerRobots( $response->getHeaders(), $body, $this->crawler->mustRespectRobots() ); $crawlUrl = $this->crawler->getCrawlQueue()->getUrlById($index); if ($this->crawler->mayExecuteJavaScript()) { $body = $this->getBodyAfterExecutingJavaScript($crawlUrl->url); $response = $response->withBody(stream_for($body)); } if ($robots->mayIndex()) { $this->handleCrawled($response, $crawlUrl); } if (! $this->crawler->getCrawlProfile() instanceof CrawlSubdomains) { if ($crawlUrl->url->getHost() !== $this->crawler->getBaseUrl()->getHost()) { return; } } if (! $robots->mayFollow()) { return; } $baseUrl = $this->getBaseUrl($response, $crawlUrl); $this->linkAdder->addFromHtml($body, $baseUrl); usleep($this->crawler->getDelayBetweenRequests()); } protected function getBaseUrl(ResponseInterface $response, CrawlUrl $crawlUrl) { $redirectHistory = $response->getHeader(RedirectMiddleware::HISTORY_HEADER); if (empty($redirectHistory)) { return $crawlUrl->url; } return new Uri(end($redirectHistory)); } protected function handleCrawled(ResponseInterface $response, CrawlUrl $crawlUrl) { $this->crawler->getCrawlObservers()->crawled($crawlUrl, $response); } protected function convertBodyToString(StreamInterface $bodyStream, $readMaximumBytes = 1024 * 1024 * 2): string { if ($bodyStream->isSeekable()) { $bodyStream->rewind(); } $body = $bodyStream->read($readMaximumBytes); return $body; } protected function getBody(ResponseInterface $response): string { $contentType = $response->getHeaderLine('Content-Type'); if (! $this->isMimetypeAllowedToParse($contentType)) { return ''; } return $this->convertBodyToString($response->getBody(), $this->crawler->getMaximumResponseSize()); } protected function getBodyAfterExecutingJavaScript(UriInterface $url): string { $browsershot = $this->crawler->getBrowsershot(); $html = $browsershot->setUrl((string) $url)->bodyHtml(); return html_entity_decode($html); } protected function isMimetypeAllowedToParse($contentType): bool { if (empty($contentType)) { return true; } if (! count($this->crawler->getParseableMimeTypes())) { return true; } foreach ($this->crawler->getParseableMimeTypes() as $allowedType) { if (stristr($contentType, $allowedType)) { return true; } } return false; } }