You are here

HttpFetcher.php in Feeds 8.3

File

src/Feeds/Fetcher/HttpFetcher.php
View source
<?php

namespace Drupal\feeds\Feeds\Fetcher;

use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\feeds\Exception\EmptyFeedException;
use Drupal\feeds\FeedInterface;
use Drupal\feeds\Plugin\Type\ClearableInterface;
use Drupal\feeds\Plugin\Type\Fetcher\FetcherInterface;
use Drupal\feeds\Plugin\Type\PluginBase;
use Drupal\feeds\Result\HttpFetcherResult;
use Drupal\feeds\StateInterface;
use Drupal\feeds\Utility\Feed;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\RequestOptions;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Symfony\Component\HttpFoundation\Response;

/**
 * Defines an HTTP fetcher.
 *
 * @FeedsFetcher(
 *   id = "http",
 *   title = @Translation("Download from url"),
 *   description = @Translation("Downloads data from a URL using Drupal's HTTP request handler."),
 *   form = {
 *     "configuration" = "Drupal\feeds\Feeds\Fetcher\Form\HttpFetcherForm",
 *     "feed" = "Drupal\feeds\Feeds\Fetcher\Form\HttpFetcherFeedForm",
 *   }
 * )
 */
class HttpFetcher extends PluginBase implements ClearableInterface, FetcherInterface, ContainerFactoryPluginInterface {

  /**
   * The Guzzle client.
   *
   * @var \GuzzleHttp\ClientInterface
   */
  protected $client;

  /**
   * The cache backend.
   *
   * @var \Drupal\Core\Cache\CacheBackendInterface
   */
  protected $cache;

  /**
   * Drupal file system helper.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * Constructs an UploadFetcher object.
   *
   * @param array $configuration
   *   The plugin configuration.
   * @param string $plugin_id
   *   The plugin id.
   * @param array $plugin_definition
   *   The plugin definition.
   * @param \GuzzleHttp\ClientInterface $client
   *   The Guzzle client.
   * @param \Drupal\Core\Cache\CacheBackendInterface $cache
   *   The cache backend.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The Drupal file system helper.
   */
  public function __construct(array $configuration, $plugin_id, array $plugin_definition, ClientInterface $client, CacheBackendInterface $cache, FileSystemInterface $file_system) {
    $this->client = $client;
    $this->cache = $cache;
    $this->fileSystem = $file_system;
    parent::__construct($configuration, $plugin_id, $plugin_definition);
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static($configuration, $plugin_id, $plugin_definition, $container
      ->get('http_client'), $container
      ->get('cache.feeds_download'), $container
      ->get('file_system'));
  }

  /**
   * {@inheritdoc}
   */
  public function fetch(FeedInterface $feed, StateInterface $state) {
    $sink = $this->fileSystem
      ->tempnam('temporary://', 'feeds_http_fetcher');
    $sink = $this->fileSystem
      ->realpath($sink);

    // Get cache key if caching is enabled.
    $cache_key = $this
      ->useCache() ? $this
      ->getCacheKey($feed) : FALSE;
    $response = $this
      ->get($feed
      ->getSource(), $sink, $cache_key);

    // @todo Handle redirects.
    // @codingStandardsIgnoreStart
    // $feed->setSource($response->getEffectiveUrl());
    // @codingStandardsIgnoreEnd
    // 304, nothing to see here.
    if ($response
      ->getStatusCode() == Response::HTTP_NOT_MODIFIED) {
      $state
        ->setMessage($this
        ->t('The feed has not been updated.'));
      throw new EmptyFeedException();
    }
    return new HttpFetcherResult($sink, $response
      ->getHeaders());
  }

  /**
   * Performs a GET request.
   *
   * @param string $url
   *   The URL to GET.
   * @param string $sink
   *   The location where the downloaded content will be saved. This can be a
   *   resource, path or a StreamInterface object.
   * @param string $cache_key
   *   (optional) The cache key to find cached headers. Defaults to false.
   *
   * @return \Guzzle\Http\Message\Response
   *   A Guzzle response.
   *
   * @throws \RuntimeException
   *   Thrown if the GET request failed.
   *
   * @see \GuzzleHttp\RequestOptions
   */
  protected function get($url, $sink, $cache_key = FALSE) {
    $url = Feed::translateSchemes($url);
    $options = [
      RequestOptions::SINK => $sink,
    ];

    // Adding User-Agent header from the default guzzle client config for feeds
    // that require that.
    if (isset($this->client
      ->getConfig('headers')['User-Agent'])) {
      $options[RequestOptions::HEADERS]['User-Agent'] = $this->client
        ->getConfig('headers')['User-Agent'];
    }

    // Add cached headers if requested.
    if ($cache_key && ($cache = $this->cache
      ->get($cache_key))) {
      if (isset($cache->data['etag'])) {
        $options[RequestOptions::HEADERS]['If-None-Match'] = $cache->data['etag'];
      }
      if (isset($cache->data['last-modified'])) {
        $options[RequestOptions::HEADERS]['If-Modified-Since'] = $cache->data['last-modified'];
      }
    }
    try {
      $response = $this->client
        ->get($url, $options);
    } catch (RequestException $e) {
      $args = [
        '%site' => $url,
        '%error' => $e
          ->getMessage(),
      ];
      throw new \RuntimeException($this
        ->t('The feed from %site seems to be broken because of error "%error".', $args));
    }
    if ($cache_key) {
      $this->cache
        ->set($cache_key, array_change_key_case($response
        ->getHeaders()));
    }
    return $response;
  }

  /**
   * Returns if the cache should be used.
   *
   * @return bool
   *   True if results should be cached. False otherwise.
   */
  protected function useCache() {
    return !$this->configuration['always_download'];
  }

  /**
   * Returns the download cache key for a given feed.
   *
   * @param \Drupal\feeds\FeedInterface $feed
   *   The feed to find the cache key for.
   *
   * @return string
   *   The cache key for the feed.
   */
  protected function getCacheKey(FeedInterface $feed) {
    return $feed
      ->id() . ':' . hash('sha256', $feed
      ->getSource());
  }

  /**
   * {@inheritdoc}
   */
  public function clear(FeedInterface $feed, StateInterface $state) {
    $this
      ->onFeedDeleteMultiple([
      $feed,
    ]);
  }

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration() {
    return [
      // @todo auto_detect_feeds causes issues with downloading files that are
      // not a RSS feed. Set the default to TRUE as soon as that issue is
      // resolved.
      'auto_detect_feeds' => FALSE,
      'use_pubsubhubbub' => FALSE,
      'always_download' => FALSE,
      'fallback_hub' => '',
      'request_timeout' => 30,
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function onFeedDeleteMultiple(array $feeds) {
    foreach ($feeds as $feed) {
      $this->cache
        ->delete($this
        ->getCacheKey($feed));
    }
  }

}

Classes

Namesort descending Description
HttpFetcher Defines an HTTP fetcher.