Drupal investigation

Link.php 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\DomCrawler;
  11. /**
  12. * Link represents an HTML link (an HTML a, area or link tag).
  13. *
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Link
  17. {
  18. /**
  19. * @var \DOMElement
  20. */
  21. protected $node;
  22. /**
  23. * @var string The method to use for the link
  24. */
  25. protected $method;
  26. /**
  27. * @var string The URI of the page where the link is embedded (or the base href)
  28. */
  29. protected $currentUri;
  30. /**
  31. * Constructor.
  32. *
  33. * @param \DOMElement $node A \DOMElement instance
  34. * @param string $currentUri The URI of the page where the link is embedded (or the base href)
  35. * @param string $method The method to use for the link (get by default)
  36. *
  37. * @throws \InvalidArgumentException if the node is not a link
  38. */
  39. public function __construct(\DOMElement $node, $currentUri, $method = 'GET')
  40. {
  41. if (!in_array(strtolower(substr($currentUri, 0, 4)), array('http', 'file'))) {
  42. throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri));
  43. }
  44. $this->setNode($node);
  45. $this->method = $method ? strtoupper($method) : null;
  46. $this->currentUri = $currentUri;
  47. }
  48. /**
  49. * Gets the node associated with this link.
  50. *
  51. * @return \DOMElement A \DOMElement instance
  52. */
  53. public function getNode()
  54. {
  55. return $this->node;
  56. }
  57. /**
  58. * Gets the method associated with this link.
  59. *
  60. * @return string The method
  61. */
  62. public function getMethod()
  63. {
  64. return $this->method;
  65. }
  66. /**
  67. * Gets the URI associated with this link.
  68. *
  69. * @return string The URI
  70. */
  71. public function getUri()
  72. {
  73. $uri = trim($this->getRawUri());
  74. // absolute URL?
  75. if (null !== parse_url($uri, PHP_URL_SCHEME)) {
  76. return $uri;
  77. }
  78. // empty URI
  79. if (!$uri) {
  80. return $this->currentUri;
  81. }
  82. // an anchor
  83. if ('#' === $uri[0]) {
  84. return $this->cleanupAnchor($this->currentUri).$uri;
  85. }
  86. $baseUri = $this->cleanupUri($this->currentUri);
  87. if ('?' === $uri[0]) {
  88. return $baseUri.$uri;
  89. }
  90. // absolute URL with relative schema
  91. if (0 === strpos($uri, '//')) {
  92. return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri;
  93. }
  94. $baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri);
  95. // absolute path
  96. if ('/' === $uri[0]) {
  97. return $baseUri.$uri;
  98. }
  99. // relative path
  100. $path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH);
  101. $path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
  102. return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
  103. }
  104. /**
  105. * Returns raw URI data.
  106. *
  107. * @return string
  108. */
  109. protected function getRawUri()
  110. {
  111. return $this->node->getAttribute('href');
  112. }
  113. /**
  114. * Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
  115. *
  116. * @param string $path URI path
  117. *
  118. * @return string
  119. */
  120. protected function canonicalizePath($path)
  121. {
  122. if ('' === $path || '/' === $path) {
  123. return $path;
  124. }
  125. if ('.' === substr($path, -1)) {
  126. $path .= '/';
  127. }
  128. $output = array();
  129. foreach (explode('/', $path) as $segment) {
  130. if ('..' === $segment) {
  131. array_pop($output);
  132. } elseif ('.' !== $segment) {
  133. $output[] = $segment;
  134. }
  135. }
  136. return implode('/', $output);
  137. }
  138. /**
  139. * Sets current \DOMElement instance.
  140. *
  141. * @param \DOMElement $node A \DOMElement instance
  142. *
  143. * @throws \LogicException If given node is not an anchor
  144. */
  145. protected function setNode(\DOMElement $node)
  146. {
  147. if ('a' !== $node->nodeName && 'area' !== $node->nodeName && 'link' !== $node->nodeName) {
  148. throw new \LogicException(sprintf('Unable to navigate from a "%s" tag.', $node->nodeName));
  149. }
  150. $this->node = $node;
  151. }
  152. /**
  153. * Removes the query string and the anchor from the given uri.
  154. *
  155. * @param string $uri The uri to clean
  156. *
  157. * @return string
  158. */
  159. private function cleanupUri($uri)
  160. {
  161. return $this->cleanupQuery($this->cleanupAnchor($uri));
  162. }
  163. /**
  164. * Remove the query string from the uri.
  165. *
  166. * @param string $uri
  167. *
  168. * @return string
  169. */
  170. private function cleanupQuery($uri)
  171. {
  172. if (false !== $pos = strpos($uri, '?')) {
  173. return substr($uri, 0, $pos);
  174. }
  175. return $uri;
  176. }
  177. /**
  178. * Remove the anchor from the uri.
  179. *
  180. * @param string $uri
  181. *
  182. * @return string
  183. */
  184. private function cleanupAnchor($uri)
  185. {
  186. if (false !== $pos = strpos($uri, '#')) {
  187. return substr($uri, 0, $pos);
  188. }
  189. return $uri;
  190. }
  191. }