| 1 |
<?php
|
| 2 |
// $Id: browser.inc,v 1.2 2009/08/21 17:07:17 dries Exp $
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file
|
| 6 |
* Browser API class.
|
| 7 |
*/
|
| 8 |
|
| 9 |
/**
|
| 10 |
* @defgroup browser Browser
|
| 11 |
* @{
|
| 12 |
* Provides a powerful text based browser through a class based API.
|
| 13 |
* The browser supports two HTTP backends natively: 1) PHP streams, and
|
| 14 |
* 2) curl. The browser also supports arbitrary HTTP request types in addtion
|
| 15 |
* to GET and POST, given that the backend supports them.
|
| 16 |
*
|
| 17 |
* The browser can be used to make a simple GET request to example.com as
|
| 18 |
* shown below.
|
| 19 |
* @code
|
| 20 |
* $browser = new Browser();
|
| 21 |
* $browser->get('http://example.com');
|
| 22 |
* @endcode
|
| 23 |
* The result of the GET request can be accessed in two ways: 1) the get()
|
| 24 |
* method returns an array defining the result of the request, or 2) the
|
| 25 |
* individual properties can be accessed from the browser instance via their
|
| 26 |
* respective access methods. The following demonstrates the properties that
|
| 27 |
* are avaialable and how to access them.
|
| 28 |
* @code
|
| 29 |
* $browser->getUrl();
|
| 30 |
* $browser->getResponseHeaders();
|
| 31 |
* $browser->getContent();
|
| 32 |
* @endcode
|
| 33 |
*
|
| 34 |
* When performing a POST request the following format is used.
|
| 35 |
* @code
|
| 36 |
* $browser = new Browser();
|
| 37 |
* $post = array(
|
| 38 |
* 'field_name1' => 'foo',
|
| 39 |
* 'checkbox1' => TRUE,
|
| 40 |
* 'multipleselect1[]' => array(
|
| 41 |
* 'value1',
|
| 42 |
* 'value2',
|
| 43 |
* ),
|
| 44 |
* );
|
| 45 |
* $browser->post('http://example.com/form', $post, 'Submit button text');
|
| 46 |
* @endcode
|
| 47 |
* To submit a multi-step form or to post to the current page the URL passed to
|
| 48 |
* post() may be set to NULL. If there were two steps on the form shown in the
|
| 49 |
* example above with the mutliple select field on the second page and a submit
|
| 50 |
* button with the title "Next" on the first page the code be as follows.
|
| 51 |
* @code
|
| 52 |
* $browser = new Browser();
|
| 53 |
* $post = array(
|
| 54 |
* 'field_name1' => 'foo',
|
| 55 |
* 'checkbox1' => TRUE,
|
| 56 |
* );
|
| 57 |
* $browser->post('http://example.com/form', $post, 'Next');
|
| 58 |
*
|
| 59 |
* $post = array(
|
| 60 |
* 'multipleselect1[]' => array(
|
| 61 |
* 'value1',
|
| 62 |
* 'value2',
|
| 63 |
* ),
|
| 64 |
* );
|
| 65 |
* $browser->post(NULL, $post, 'Final');
|
| 66 |
* @endcode
|
| 67 |
*/
|
| 68 |
|
| 69 |
/**
|
| 70 |
* Browser API class.
|
| 71 |
*
|
| 72 |
* All browser functionality is provided by this main class which manages the
|
| 73 |
* various aspects of the browser.
|
| 74 |
*/
|
| 75 |
class Browser {
|
| 76 |
|
| 77 |
/**
|
| 78 |
* Flag indicating if curl is available.
|
| 79 |
*
|
| 80 |
* @var boolean
|
| 81 |
*/
|
| 82 |
protected $curl;
|
| 83 |
|
| 84 |
/**
|
| 85 |
* The handle of the current curl connection.
|
| 86 |
*
|
| 87 |
* @var resource
|
| 88 |
*/
|
| 89 |
protected $handle;
|
| 90 |
|
| 91 |
/**
|
| 92 |
* The current cookie file used by curl.
|
| 93 |
*
|
| 94 |
* Cookies are not reused so they can be stored in memory instead of a file.
|
| 95 |
*
|
| 96 |
* @var mixed
|
| 97 |
*/
|
| 98 |
protected $cookieFile = NULL;
|
| 99 |
|
| 100 |
/**
|
| 101 |
* The request headers.
|
| 102 |
*
|
| 103 |
* @var array
|
| 104 |
*/
|
| 105 |
protected $requestHeaders = array();
|
| 106 |
|
| 107 |
/**
|
| 108 |
* The URL of the current page.
|
| 109 |
*
|
| 110 |
* @var string
|
| 111 |
*/
|
| 112 |
protected $url;
|
| 113 |
|
| 114 |
/**
|
| 115 |
* The response headers of the current page.
|
| 116 |
*
|
| 117 |
* @var Array
|
| 118 |
*/
|
| 119 |
protected $headers = array();
|
| 120 |
|
| 121 |
/**
|
| 122 |
* The raw content of the current page.
|
| 123 |
*
|
| 124 |
* @var string
|
| 125 |
*/
|
| 126 |
protected $content;
|
| 127 |
|
| 128 |
/**
|
| 129 |
* The BrowserPage class representing to the current page.
|
| 130 |
*
|
| 131 |
* @var BrowserPage
|
| 132 |
*/
|
| 133 |
protected $page;
|
| 134 |
|
| 135 |
/**
|
| 136 |
* Initialize the browser.
|
| 137 |
*
|
| 138 |
* @param $force_stream
|
| 139 |
* Force the use of the PHP stream wrappers insead of CURL. This is used
|
| 140 |
* during testing to force the use of the stream wrapper so it can be
|
| 141 |
* tested.
|
| 142 |
*/
|
| 143 |
public function __construct($force_stream = FALSE) {
|
| 144 |
$this->curl = $force_stream ? FALSE : function_exists('curl_init');
|
| 145 |
$this->setUserAgent('Drupal (+http://drupal.org/)');
|
| 146 |
|
| 147 |
if ($this->curl) {
|
| 148 |
$this->handle = curl_init();
|
| 149 |
curl_setopt_array($this->handle, $this->curlOptions());
|
| 150 |
}
|
| 151 |
else {
|
| 152 |
$this->handle = stream_context_create();
|
| 153 |
}
|
| 154 |
}
|
| 155 |
|
| 156 |
/**
|
| 157 |
* Check the the method is supported by the backend.
|
| 158 |
*
|
| 159 |
* @param $method
|
| 160 |
* The method string identifier.
|
| 161 |
*/
|
| 162 |
public function isMethodSupported($method) {
|
| 163 |
return $method == 'GET' || $method == 'POST';
|
| 164 |
}
|
| 165 |
|
| 166 |
/**
|
| 167 |
* Get the request headers.
|
| 168 |
*
|
| 169 |
* The request headers are sent in every request made by the browser with a
|
| 170 |
* few changes made the the individual request methods.
|
| 171 |
*
|
| 172 |
* @return
|
| 173 |
* Associative array of request headers.
|
| 174 |
*/
|
| 175 |
public function getRequestHeaders() {
|
| 176 |
return $this->requestHeaders;
|
| 177 |
}
|
| 178 |
|
| 179 |
/**
|
| 180 |
* Set the request headers.
|
| 181 |
*
|
| 182 |
* @param $headers
|
| 183 |
* Associative array of request headers.
|
| 184 |
*/
|
| 185 |
public function setRequestHeaders(array $headers) {
|
| 186 |
$this->requestHeaders = $headers;
|
| 187 |
}
|
| 188 |
|
| 189 |
/**
|
| 190 |
* Get the user-agent that the browser is identifying itself as.
|
| 191 |
*
|
| 192 |
* @return
|
| 193 |
* Browser user-agent.
|
| 194 |
*/
|
| 195 |
public function getUserAgent() {
|
| 196 |
return $this->requestHeaders['User-Agent'];
|
| 197 |
}
|
| 198 |
|
| 199 |
/**
|
| 200 |
* Set the user-agent that the browser will identify itself as.
|
| 201 |
*
|
| 202 |
* @param $agent
|
| 203 |
* User-agent to to identify as.
|
| 204 |
*/
|
| 205 |
public function setUserAgent($agent) {
|
| 206 |
$this->requestHeaders['User-Agent'] = $agent;
|
| 207 |
}
|
| 208 |
|
| 209 |
/**
|
| 210 |
* Get HTTP authentication information.
|
| 211 |
*
|
| 212 |
* @return
|
| 213 |
* Authentication information in the format, username:password.
|
| 214 |
*/
|
| 215 |
public function getHttpAuthentication() {
|
| 216 |
if (isset($this->requestHeaders['Authorization'])) {
|
| 217 |
return base64_decode($this->requestHeaders['Authorization']);
|
| 218 |
}
|
| 219 |
return NULL;
|
| 220 |
}
|
| 221 |
|
| 222 |
/**
|
| 223 |
* Set HTTP authentication information.
|
| 224 |
*
|
| 225 |
* @param $username
|
| 226 |
* HTTP authentication username, which cannot contain a ":".
|
| 227 |
* @param $password
|
| 228 |
* HTTP authentication password.
|
| 229 |
*/
|
| 230 |
public function setHttpAuthentication($username, $password) {
|
| 231 |
$this->requestHeaders['Authorization'] = base64_encode("$username:$password");
|
| 232 |
}
|
| 233 |
|
| 234 |
/**
|
| 235 |
* Get the URL of the current page.
|
| 236 |
*
|
| 237 |
* @return
|
| 238 |
* The URL of the current page.
|
| 239 |
*/
|
| 240 |
public function getUrl() {
|
| 241 |
return $this->url;
|
| 242 |
}
|
| 243 |
|
| 244 |
/**
|
| 245 |
* Get the response headers of the current page.
|
| 246 |
*
|
| 247 |
* @return
|
| 248 |
* The response headers of the current page.
|
| 249 |
*/
|
| 250 |
public function getResponseHeaders() {
|
| 251 |
return $this->headers;
|
| 252 |
}
|
| 253 |
|
| 254 |
/**
|
| 255 |
* Get the raw content of the current page.
|
| 256 |
*
|
| 257 |
* @return
|
| 258 |
* The raw content for the current page.
|
| 259 |
*/
|
| 260 |
public function getContent() {
|
| 261 |
return $this->content;
|
| 262 |
}
|
| 263 |
|
| 264 |
/**
|
| 265 |
* Get the BrowserPage instance for the current page.
|
| 266 |
*
|
| 267 |
* If the raw content is new and the page has not yet been parsed then parse
|
| 268 |
* the content and ensure that it is valid.
|
| 269 |
*
|
| 270 |
* @return
|
| 271 |
* BrowserPage instance for the current page.
|
| 272 |
*/
|
| 273 |
public function getPage() {
|
| 274 |
if (!isset($this->page)) {
|
| 275 |
$this->page = new BrowserPage($this->url, $this->headers, $this->content);
|
| 276 |
}
|
| 277 |
return $this->page;
|
| 278 |
}
|
| 279 |
|
| 280 |
/**
|
| 281 |
* Get the current state of the browser.
|
| 282 |
*
|
| 283 |
* @return
|
| 284 |
* An associative array containing state information, including: 1) url, 2)
|
| 285 |
* headers, 3) content.
|
| 286 |
* @see getUrl()
|
| 287 |
* @see getResponseHeaders()
|
| 288 |
* @see getContent()
|
| 289 |
*/
|
| 290 |
public function getState() {
|
| 291 |
return array(
|
| 292 |
'url' => $this->url,
|
| 293 |
'headers' => $this->headers,
|
| 294 |
'content' => $this->content,
|
| 295 |
);
|
| 296 |
}
|
| 297 |
|
| 298 |
/**
|
| 299 |
* Set the state of the browser.
|
| 300 |
*
|
| 301 |
* @param $url
|
| 302 |
* The URL of the current page.
|
| 303 |
* @param $headers
|
| 304 |
* The response headers of the current page.
|
| 305 |
* @param $content
|
| 306 |
* The raw content of the current page.
|
| 307 |
*/
|
| 308 |
public function setState($url, $headers, $content) {
|
| 309 |
$this->url = $url;
|
| 310 |
$this->headers = $headers;
|
| 311 |
$this->content = $content;
|
| 312 |
|
| 313 |
// Clear the page variable since the content has change.
|
| 314 |
unset($this->page);
|
| 315 |
|
| 316 |
$this->refreshCheck();
|
| 317 |
}
|
| 318 |
|
| 319 |
/**
|
| 320 |
* Perform a GET request.
|
| 321 |
*
|
| 322 |
* @param $url
|
| 323 |
* Absolute URL to request.
|
| 324 |
* @return
|
| 325 |
* Associative array of state information, as returned by getState().
|
| 326 |
* @see getState().
|
| 327 |
*/
|
| 328 |
public function get($url) {
|
| 329 |
if ($this->curl) {
|
| 330 |
$this->curlExecute(array(
|
| 331 |
CURLOPT_HTTPGET => TRUE,
|
| 332 |
CURLOPT_URL => $url,
|
| 333 |
CURLOPT_NOBODY => FALSE,
|
| 334 |
));
|
| 335 |
}
|
| 336 |
else {
|
| 337 |
$this->streamExecute($url, array(
|
| 338 |
'method' => 'GET',
|
| 339 |
'header' => array(
|
| 340 |
'Content-Type' => 'application/x-www-form-urlencoded',
|
| 341 |
),
|
| 342 |
));
|
| 343 |
}
|
| 344 |
|
| 345 |
$this->refreshCheck();
|
| 346 |
|
| 347 |
return $this->getState();
|
| 348 |
}
|
| 349 |
|
| 350 |
/**
|
| 351 |
* Perform a POST request.
|
| 352 |
*
|
| 353 |
* @param $url
|
| 354 |
* Absolute URL to request, or NULL to submit the current page.
|
| 355 |
* @param $fields
|
| 356 |
* Associative array of fields to submit as POST variables.
|
| 357 |
* @param $submit
|
| 358 |
* Text contained in 'value' properly of submit button of which to press.
|
| 359 |
* @return
|
| 360 |
* Associative array of state information, as returned by
|
| 361 |
* browser_state_get().
|
| 362 |
* @see browser_state_get()
|
| 363 |
*/
|
| 364 |
public function post($url, array $fields, $submit) {
|
| 365 |
// If URL is set then request the page, otherwise use the current page.
|
| 366 |
if ($url) {
|
| 367 |
$this->get($url);
|
| 368 |
}
|
| 369 |
else {
|
| 370 |
$url = $this->url;
|
| 371 |
}
|
| 372 |
|
| 373 |
if (($page = $this->getPage()) === FALSE) {
|
| 374 |
return FALSE;
|
| 375 |
}
|
| 376 |
|
| 377 |
if (($form = $this->findForm($fields, $submit)) === FALSE) {
|
| 378 |
return FALSE;
|
| 379 |
}
|
| 380 |
|
| 381 |
// If form specified action then use that for the post url.
|
| 382 |
if ($form['action']) {
|
| 383 |
$url = $page->getAbsoluteUrl($form['action']);
|
| 384 |
}
|
| 385 |
|
| 386 |
if ($this->curl) {
|
| 387 |
$this->curlExecute(array(
|
| 388 |
CURLOPT_POST => TRUE,
|
| 389 |
CURLOPT_URL => $url,
|
| 390 |
CURLOPT_POSTFIELDS => http_build_query($form['post'], NULL, '&'),
|
| 391 |
));
|
| 392 |
}
|
| 393 |
else {
|
| 394 |
$this->streamExecute($url, array(
|
| 395 |
'method' => 'POST',
|
| 396 |
'header' => array(
|
| 397 |
'Content-Type' => 'application/x-www-form-urlencoded',
|
| 398 |
),
|
| 399 |
'content' => http_build_query($form['post'], NULL, '&'),
|
| 400 |
));
|
| 401 |
}
|
| 402 |
|
| 403 |
$this->refreshCheck();
|
| 404 |
|
| 405 |
return $this->getState();
|
| 406 |
}
|
| 407 |
|
| 408 |
/**
|
| 409 |
* Find the the form that patches the conditions.
|
| 410 |
*
|
| 411 |
* @param $fields
|
| 412 |
* Associative array of fields to submit as POST variables.
|
| 413 |
* @param $submit
|
| 414 |
* Text contained in 'value' properly of submit button of which to press.
|
| 415 |
* @return
|
| 416 |
* Form action and the complete post array containing default values if not
|
| 417 |
* overridden, or FALSE if no form matching the conditions was found.
|
| 418 |
*/
|
| 419 |
protected function findForm(array $fields, $submit) {
|
| 420 |
$page = $this->getPage();
|
| 421 |
|
| 422 |
$forms = $page->getForms();
|
| 423 |
foreach ($forms as $form) {
|
| 424 |
if (($post = $this->processForm($form, $fields, $submit)) !== FALSE) {
|
| 425 |
$action = (isset($form['action']) ? (string) $form['action'] : FALSE);
|
| 426 |
return array(
|
| 427 |
'action' => $action,
|
| 428 |
'post' => $post,
|
| 429 |
);
|
| 430 |
}
|
| 431 |
}
|
| 432 |
return FALSE;
|
| 433 |
}
|
| 434 |
|
| 435 |
/**
|
| 436 |
* Check the conditions against the specified form and process values.
|
| 437 |
*
|
| 438 |
* @param $form
|
| 439 |
* Form SimpleXMLElement object.
|
| 440 |
* @param $fields
|
| 441 |
* Associative array of fields to submit as POST variables.
|
| 442 |
* @param $submit
|
| 443 |
* Text contained in 'value' properly of submit button of which to press.
|
| 444 |
* @return
|
| 445 |
* The complete post array containing default values if not overridden, or
|
| 446 |
* FALSE if no form matching the conditions was found.
|
| 447 |
*/
|
| 448 |
protected function processForm($form, $fields, $submit) {
|
| 449 |
$page = $this->getPage();
|
| 450 |
|
| 451 |
$post = array();
|
| 452 |
$submit_found = FALSE;
|
| 453 |
$inputs = $page->getInputs($form);
|
| 454 |
foreach ($inputs as $input) {
|
| 455 |
$name = (string) $input['name'];
|
| 456 |
$html_value = isset($input['value']) ? (string) $input['value'] : '';
|
| 457 |
|
| 458 |
// Get type from input vs textarea and select.
|
| 459 |
$type = isset($input['type']) ? (string) $input['type'] : $input->getName();
|
| 460 |
|
| 461 |
if (isset($fields[$name])) {
|
| 462 |
if ($type == 'file') {
|
| 463 |
// Make sure the file path is the absolute path.
|
| 464 |
$file = realpath($fields[$name]);
|
| 465 |
if ($file && is_file($file)) {
|
| 466 |
// Signify that the post field is a file in case backend needs to
|
| 467 |
// perform additional processing.
|
| 468 |
$post[$name] = '@' . $file;
|
| 469 |
}
|
| 470 |
// Known type, field processed.
|
| 471 |
unset($fields[$name]);
|
| 472 |
}
|
| 473 |
elseif (($processed_value = $this->processField($input, $type, $fields[$name], $html_value)) !== NULL) {
|
| 474 |
// Value may be ommitted (checkbox).
|
| 475 |
if ($processed_value !== FALSE) {
|
| 476 |
if (is_array($processed_value)) {
|
| 477 |
$post += $processed_value;
|
| 478 |
}
|
| 479 |
else {
|
| 480 |
$post[$name] = $processed_value;
|
| 481 |
}
|
| 482 |
}
|
| 483 |
// Known type, field processed.
|
| 484 |
unset($fields[$name]);
|
| 485 |
}
|
| 486 |
}
|
| 487 |
|
| 488 |
// No post value for the field means that: no post field value specified,
|
| 489 |
// the value does not match the field (checkbox, radio, select), or the
|
| 490 |
// field is of an unknown type.
|
| 491 |
if (!isset($post[$name])) {
|
| 492 |
// No value specified so use default value (value in HTML).
|
| 493 |
if (($default_value = $this->getDefaultFieldValue($input, $type, $html_value)) !== NULL) {
|
| 494 |
$post[$name] = $default_value;
|
| 495 |
unset($fields[$name]);
|
| 496 |
}
|
| 497 |
}
|
| 498 |
|
| 499 |
// Check if the
|
| 500 |
if (($type == 'submit' || $type == 'image') && $submit == $html_value) {
|
| 501 |
$post[$name] = $html_value;
|
| 502 |
$submit_found = TRUE;
|
| 503 |
}
|
| 504 |
}
|
| 505 |
|
| 506 |
if ($submit_found) {
|
| 507 |
return $post;
|
| 508 |
}
|
| 509 |
return FALSE;
|
| 510 |
}
|
| 511 |
|
| 512 |
/**
|
| 513 |
* Get the value to be sent for the specified field.
|
| 514 |
*
|
| 515 |
* @param $input
|
| 516 |
* Input SimpleXMLElement object.
|
| 517 |
* @param $type
|
| 518 |
* Input type: text, textarea, password, radio, checkbox, or select.
|
| 519 |
* @param $new_value
|
| 520 |
* The new value to be assigned to the input.
|
| 521 |
* @param $html_value
|
| 522 |
* The cleaned default value for the input from the HTML value.
|
| 523 |
*/
|
| 524 |
protected function processField($input, $type, $new_value, $html_value) {
|
| 525 |
switch ($type) {
|
| 526 |
case 'text':
|
| 527 |
case 'textarea':
|
| 528 |
case 'password':
|
| 529 |
return $new_value;
|
| 530 |
case 'radio':
|
| 531 |
if ($new_value == $html_value) {
|
| 532 |
return $new_value;
|
| 533 |
}
|
| 534 |
return NULL;
|
| 535 |
case 'checkbox':
|
| 536 |
// If $new_value is set to FALSE then ommit checkbox value, otherwise
|
| 537 |
// pass original value.
|
| 538 |
if ($new_value === FALSE) {
|
| 539 |
return FALSE;
|
| 540 |
}
|
| 541 |
return $html_value;
|
| 542 |
case 'select':
|
| 543 |
// Remove the ending [] from multi-select element name.
|
| 544 |
$key = preg_replace('/\[\]$/', '', (string) $input['name']);
|
| 545 |
|
| 546 |
$options = $page->getSelectOptions($input);
|
| 547 |
$index = 0;
|
| 548 |
$out = array();
|
| 549 |
foreach ($options as $value => $text) {
|
| 550 |
if (is_array($value)) {
|
| 551 |
if (in_array($value, $new_value)) {
|
| 552 |
$out[$key . '[' . $index++ . ']'] = $value;
|
| 553 |
}
|
| 554 |
}
|
| 555 |
elseif ($new_value == $value) {
|
| 556 |
return $new_value;
|
| 557 |
}
|
| 558 |
}
|
| 559 |
return ($out ? $out : NULL);
|
| 560 |
default:
|
| 561 |
return NULL;
|
| 562 |
}
|
| 563 |
}
|
| 564 |
|
| 565 |
/**
|
| 566 |
* Get the cleaned default value for the input from the HTML value.
|
| 567 |
*
|
| 568 |
* @param $input
|
| 569 |
* Input SimpleXMLElement object.
|
| 570 |
* @param $type
|
| 571 |
* Input type: text, textarea, password, radio, checkbox, or select.
|
| 572 |
* @param $html_value
|
| 573 |
* The default value for the input, as specified in the HTML.
|
| 574 |
*/
|
| 575 |
protected function getDefaultFieldValue($input, $type, $html_value) {
|
| 576 |
switch ($type) {
|
| 577 |
case 'textarea':
|
| 578 |
return (string) $input;
|
| 579 |
case 'select':
|
| 580 |
// Remove the ending [] from multi-select element name.
|
| 581 |
$key = preg_replace('/\[\]$/', '', (string) $input['name']);
|
| 582 |
$single = empty($input['multiple']);
|
| 583 |
|
| 584 |
$options = $page->getSelectOptionElements($input);
|
| 585 |
$first = TRUE;
|
| 586 |
$index = 0;
|
| 587 |
$out = array();
|
| 588 |
foreach ($options as $option) {
|
| 589 |
// For single select, we load the first option, if there is a
|
| 590 |
// selected option that will overwrite it later.
|
| 591 |
if ($option['selected'] || ($first && $single)) {
|
| 592 |
$first = FALSE;
|
| 593 |
if ($single) {
|
| 594 |
$out[$key] = (string) $option['value'];
|
| 595 |
}
|
| 596 |
else {
|
| 597 |
$out[$key . '[' . $index++ . ']'] = (string) $option['value'];
|
| 598 |
}
|
| 599 |
}
|
| 600 |
return ($single ? $out[$key] : $out);
|
| 601 |
}
|
| 602 |
break;
|
| 603 |
case 'file':
|
| 604 |
return NULL;
|
| 605 |
case 'radio':
|
| 606 |
case 'checkbox':
|
| 607 |
if (!isset($input['checked'])) {
|
| 608 |
return NULL;
|
| 609 |
}
|
| 610 |
// Deliberately no break.
|
| 611 |
default:
|
| 612 |
return $html_value;
|
| 613 |
}
|
| 614 |
}
|
| 615 |
|
| 616 |
/**
|
| 617 |
* Perform a request of arbitrary type.
|
| 618 |
*
|
| 619 |
* Please use get() and post() for GET and POST requests respectively.
|
| 620 |
*
|
| 621 |
* @param $method
|
| 622 |
* The method string identifier.
|
| 623 |
* @param $url
|
| 624 |
* Absolute URL to request.
|
| 625 |
* @param $additional
|
| 626 |
* Additional parameters related to the particular request method.
|
| 627 |
* @return
|
| 628 |
* Associative array of state information, as returned by getState().
|
| 629 |
* @see getState().
|
| 630 |
*/
|
| 631 |
public function request($method, $url, array $additional) {
|
| 632 |
if (!$this->isMethodSupported($method)) {
|
| 633 |
return FALSE;
|
| 634 |
}
|
| 635 |
|
| 636 |
// TODO
|
| 637 |
}
|
| 638 |
|
| 639 |
/**
|
| 640 |
* Perform the request using the PHP stream wrapper.
|
| 641 |
*
|
| 642 |
* @param $url
|
| 643 |
* The url to request.
|
| 644 |
* @param $options
|
| 645 |
* The HTTP stream context options to be passed to
|
| 646 |
* stream_context_set_params().
|
| 647 |
*/
|
| 648 |
protected function streamExecute($url, array $options) {
|
| 649 |
// Global variable provided by PHP stream wapper.
|
| 650 |
global $http_response_header;
|
| 651 |
|
| 652 |
if (!isset($options['header'])) {
|
| 653 |
$options['header'] = array();
|
| 654 |
}
|
| 655 |
|
| 656 |
// Merge default request headers with the passed headers and generate
|
| 657 |
// header string to be sent in http request.
|
| 658 |
$headers = $this->requestHeaders + $options['header'];
|
| 659 |
$options['header'] = $this->headerString($headers);
|
| 660 |
|
| 661 |
// Update the handler options.
|
| 662 |
stream_context_set_params($this->handle, array(
|
| 663 |
'options' => array(
|
| 664 |
'http' => $options,
|
| 665 |
)
|
| 666 |
));
|
| 667 |
|
| 668 |
// Make the request.
|
| 669 |
$this->content = file_get_contents($url, FALSE, $this->handle);
|
| 670 |
$this->url = $url;
|
| 671 |
$this->headers = $this->headerParseAll($http_response_header);
|
| 672 |
unset($this->page);
|
| 673 |
}
|
| 674 |
|
| 675 |
|
| 676 |
/**
|
| 677 |
* Perform curl_exec() with the specified option changes.
|
| 678 |
*
|
| 679 |
* @param $options
|
| 680 |
* Curl options to set, any options not set will maintain their previous
|
| 681 |
* value.
|
| 682 |
*/
|
| 683 |
function curlExecute(array $options) {
|
| 684 |
// Headers need to be reset since callback appends.
|
| 685 |
$this->headers = array();
|
| 686 |
|
| 687 |
// Ensure that request headers are up to date.
|
| 688 |
if ($this->getHttpAuthentication()) {
|
| 689 |
curl_setopt($this->handle, CURLOPT_USERPWD, $this->getHttpAuthentication());
|
| 690 |
}
|
| 691 |
curl_setopt($this->handle, CURLOPT_USERAGENT, $this->requestHeaders['User-Agent']);
|
| 692 |
curl_setopt($this->handle, CURLOPT_HTTPHEADER, $this->requestHeaders);
|
| 693 |
|
| 694 |
curl_setopt_array($this->handle, $options);
|
| 695 |
$this->content = curl_exec($this->handle);
|
| 696 |
$this->url = curl_getinfo($this->handle, CURLINFO_EFFECTIVE_URL);
|
| 697 |
|
| 698 |
// $this->headers should be filled by $this->curlHeaderCallback().
|
| 699 |
unset($this->page);
|
| 700 |
}
|
| 701 |
|
| 702 |
/**
|
| 703 |
* Get the default curl options to be used with each request.
|
| 704 |
*
|
| 705 |
* @return
|
| 706 |
* Default curl options.
|
| 707 |
*/
|
| 708 |
protected function curlOptions() {
|
| 709 |
return array(
|
| 710 |
CURLOPT_COOKIEJAR => $this->cookieFile,
|
| 711 |
CURLOPT_FOLLOWLOCATION => TRUE,
|
| 712 |
CURLOPT_HEADERFUNCTION => array($this, 'curlHeaderCallback'),
|
| 713 |
CURLOPT_HTTPHEADER => $this->requestHeaders,
|
| 714 |
CURLOPT_RETURNTRANSFER => TRUE,
|
| 715 |
CURLOPT_SSL_VERIFYPEER => FALSE,
|
| 716 |
CURLOPT_SSL_VERIFYHOST => FALSE,
|
| 717 |
CURLOPT_URL => '/',
|
| 718 |
CURLOPT_USERAGENT => $this->requestHeaders['User-Agent'],
|
| 719 |
);
|
| 720 |
}
|
| 721 |
|
| 722 |
/**
|
| 723 |
* Reads reponse headers and stores in $headers array.
|
| 724 |
*
|
| 725 |
* @param $curlHandler
|
| 726 |
* The curl handler.
|
| 727 |
* @param $header
|
| 728 |
* An header.
|
| 729 |
* @return
|
| 730 |
* The string length of the header. (required by curl)
|
| 731 |
*/
|
| 732 |
protected function curlHeaderCallback($handler, $header) {
|
| 733 |
// Ignore blank header lines.
|
| 734 |
$clean_header = trim($header);
|
| 735 |
if ($clean_header) {
|
| 736 |
$this->headers += $this->headerParse($clean_header);
|
| 737 |
}
|
| 738 |
|
| 739 |
// Curl requires strlen() to be returned.
|
| 740 |
return strlen($header);
|
| 741 |
}
|
| 742 |
|
| 743 |
/**
|
| 744 |
* Generate a header string given he associative array of headers.
|
| 745 |
*
|
| 746 |
* @param $headers
|
| 747 |
* Associative array of headers.
|
| 748 |
* @return
|
| 749 |
* Header string to be used with stream.
|
| 750 |
*/
|
| 751 |
protected function headerString(array $headers) {
|
| 752 |
$string = '';
|
| 753 |
foreach ($headers as $key => $header) {
|
| 754 |
$string .= "$key: $header\r\n";
|
| 755 |
}
|
| 756 |
return $string;
|
| 757 |
}
|
| 758 |
|
| 759 |
/**
|
| 760 |
* Parse the response header array to create an associative array.
|
| 761 |
*
|
| 762 |
* @param $headers
|
| 763 |
* Array of headers.
|
| 764 |
* @return
|
| 765 |
* An associative array of headers.
|
| 766 |
*/
|
| 767 |
protected function headerParseAll(array $headers) {
|
| 768 |
$out = array();
|
| 769 |
foreach ($headers as $header) {
|
| 770 |
$out += $this->headerParse($header);
|
| 771 |
}
|
| 772 |
return $out;
|
| 773 |
}
|
| 774 |
|
| 775 |
/**
|
| 776 |
* Parse an individual header into name and value.
|
| 777 |
*
|
| 778 |
* @param $header
|
| 779 |
* A string header string.
|
| 780 |
* @return
|
| 781 |
* Parsed header as array($name => $value), or array() if parse failed.
|
| 782 |
*/
|
| 783 |
protected function headerParse($header) {
|
| 784 |
$parts = explode(':', $header, 2);
|
| 785 |
|
| 786 |
// Ensure header line is valid.
|
| 787 |
if (count($parts) == 2) {
|
| 788 |
$name = $this->headerName(trim($parts[0]));
|
| 789 |
return array($name => trim($parts[1]));
|
| 790 |
}
|
| 791 |
return array();
|
| 792 |
}
|
| 793 |
|
| 794 |
/**
|
| 795 |
* Ensure that header name is formatted with all lowercase letters.
|
| 796 |
*
|
| 797 |
* @param $name
|
| 798 |
* Header name to format.
|
| 799 |
* @return
|
| 800 |
* Formatted header name.
|
| 801 |
*/
|
| 802 |
protected function headerName($name) {
|
| 803 |
return strtolower($name);
|
| 804 |
}
|
| 805 |
|
| 806 |
/**
|
| 807 |
* Check for a refresh signifier.
|
| 808 |
*
|
| 809 |
* A refresh signifier can either be the 'Location' HTTP header or the meta
|
| 810 |
* tag 'http-equiv="Refresh"'.
|
| 811 |
*/
|
| 812 |
protected function refreshCheck() {
|
| 813 |
// If not handled by backend wrapper then go ahead and handle.
|
| 814 |
if (isset($this->headers['Location'])) {
|
| 815 |
// Expect absolute URL.
|
| 816 |
$this->get($this->headers['Location']);
|
| 817 |
}
|
| 818 |
|
| 819 |
if (($page = $this->getPage()) !== FALSE && ($tag = $page->getMetaTag('Refresh', 'http-equiv'))) {
|
| 820 |
// Parse the content attribute of the meta tag for the format:
|
| 821 |
// "[delay]: URL=[path_to_redirect_to]".
|
| 822 |
if (preg_match('/\d+;\s*URL=(?P<url>.*)/i', $tag['content'], $match)) {
|
| 823 |
$this->get($page->getAbsoluteUrl(decode_entities($match['url'])));
|
| 824 |
}
|
| 825 |
}
|
| 826 |
}
|
| 827 |
|
| 828 |
/**
|
| 829 |
* Close the wrapper connection.
|
| 830 |
*/
|
| 831 |
function __destruct() {
|
| 832 |
if (isset($this->handle)) {
|
| 833 |
if ($this->curl) {
|
| 834 |
curl_close($this->handle);
|
| 835 |
}
|
| 836 |
unset($this->handle);
|
| 837 |
}
|
| 838 |
}
|
| 839 |
}
|
| 840 |
|
| 841 |
|
| 842 |
/**
|
| 843 |
* Represents a page of content that has been fetched by the Browser. The class
|
| 844 |
* provides a number of convenience methods that relate to page content.
|
| 845 |
*/
|
| 846 |
class BrowserPage {
|
| 847 |
|
| 848 |
/**
|
| 849 |
* The URL of the page.
|
| 850 |
*
|
| 851 |
* @var string
|
| 852 |
*/
|
| 853 |
protected $url;
|
| 854 |
|
| 855 |
/**
|
| 856 |
* The response headers of the page.
|
| 857 |
*
|
| 858 |
* @var Array
|
| 859 |
*/
|
| 860 |
protected $headers;
|
| 861 |
|
| 862 |
/**
|
| 863 |
* The root element of the page.
|
| 864 |
*
|
| 865 |
* @var SimpleXMLElement
|
| 866 |
*/
|
| 867 |
protected $root;
|
| 868 |
|
| 869 |
/**
|
| 870 |
* Initialize the BrowserPage with the page state information.
|
| 871 |
*
|
| 872 |
* @param $url
|
| 873 |
* The URL of the page.
|
| 874 |
* @param $headers
|
| 875 |
* The response headers of the page.
|
| 876 |
* @param $content
|
| 877 |
* The raw content of the page.
|
| 878 |
*/
|
| 879 |
public function BrowserPage($url, $headers, $content) {
|
| 880 |
$this->url = $url;
|
| 881 |
$this->headers = $headers;
|
| 882 |
$this->root = $this->load($content);
|
| 883 |
}
|
| 884 |
|
| 885 |
/**
|
| 886 |
* Attempt to parse the raw content using DOM and import it into SimpleXML.
|
| 887 |
*
|
| 888 |
* @param $content
|
| 889 |
* The raw content of the page.
|
| 890 |
* @return
|
| 891 |
* The root element of the page, or FALSE.
|
| 892 |
*/
|
| 893 |
protected function load($content) {
|
| 894 |
// Use DOM to load HTML soup, and hide warnings.
|
| 895 |
$document = @DOMDocument::loadHTML($content);
|
| 896 |
if ($document) {
|
| 897 |
return simplexml_import_dom($document);
|
| 898 |
}
|
| 899 |
return FALSE;
|
| 900 |
}
|
| 901 |
|
| 902 |
/**
|
| 903 |
* Check if the raw content is valid and could be parse.
|
| 904 |
*
|
| 905 |
* @return
|
| 906 |
* TRUE if content is valid, otherwise FALSE.
|
| 907 |
*/
|
| 908 |
public function isValid() {
|
| 909 |
return ($this->root !== FALSE);
|
| 910 |
}
|
| 911 |
|
| 912 |
/**
|
| 913 |
* Perform an xpath search on the contents of the page.
|
| 914 |
*
|
| 915 |
* The search is relative to the root element, usually the HTML tag, of the
|
| 916 |
* page. To perform a search using a different root element follow the
|
| 917 |
* example below.
|
| 918 |
* @code
|
| 919 |
* $parent = $page->xpath('.//parent');
|
| 920 |
* $parent[0]->xpath('//children');
|
| 921 |
* @endcode
|
| 922 |
*
|
| 923 |
* @param $xpath
|
| 924 |
* The xpath string.
|
| 925 |
* @return
|
| 926 |
* An array of SimpleXMLElement objects or FALSE in case of an error.
|
| 927 |
* @link http://us.php.net/manual/function.simplexml-element-xpath.php
|
| 928 |
*/
|
| 929 |
public function xpath($xpath) {
|
| 930 |
if ($this->isValid()) {
|
| 931 |
return $this->root->xpath($xpath);
|
| 932 |
}
|
| 933 |
return FALSE;
|
| 934 |
}
|
| 935 |
|
| 936 |
/**
|
| 937 |
* Get all the meta tags.
|
| 938 |
*
|
| 939 |
* @return
|
| 940 |
* An array of SimpleXMLElement objects representing meta tags.
|
| 941 |
*/
|
| 942 |
public function getMetaTags() {
|
| 943 |
return $this->xpath('//meta');
|
| 944 |
}
|
| 945 |
|
| 946 |
/**
|
| 947 |
* Get a specific meta tag.
|
| 948 |
*
|
| 949 |
* @param $key
|
| 950 |
* The meta tag key.
|
| 951 |
* @param $type
|
| 952 |
* The type of meta tag, either: 'name' or 'http-equiv'.
|
| 953 |
* @return
|
| 954 |
* A SimpleXMLElement object representing the meta tag, or FALSE if not
|
| 955 |
* found.
|
| 956 |
*/
|
| 957 |
public function getMetaTag($key, $type = 'name') {
|
| 958 |
if ($tags = $this->getMetaTags()) {
|
| 959 |
foreach ($tags as $tag) {
|
| 960 |
if ($tag[$type] == $key) {
|
| 961 |
return $tag;
|
| 962 |
}
|
| 963 |
}
|
| 964 |
}
|
| 965 |
return FALSE;
|
| 966 |
}
|
| 967 |
|
| 968 |
/**
|
| 969 |
* Get all the form elements.
|
| 970 |
*
|
| 971 |
* @return
|
| 972 |
* An array of SimpleXMLElement objects representing form elements.
|
| 973 |
*/
|
| 974 |
public function getForms() {
|
| 975 |
return $this->xpath('//form');
|
| 976 |
}
|
| 977 |
|
| 978 |
/**
|
| 979 |
* Get all the input elements, or only those nested within a parent element.
|
| 980 |
*
|
| 981 |
* @param $parent
|
| 982 |
* SimpleXMLElement representing the parent to search within.
|
| 983 |
* @return
|
| 984 |
* An array of SimpleXMLElement objects representing form elements.
|
| 985 |
*/
|
| 986 |
public function getInputs($parent = NULL) {
|
| 987 |
if ($parent) {
|
| 988 |
return $parent->xpath('.//input|.//textarea|.//select');
|
| 989 |
}
|
| 990 |
return $this->xpath('.//input|.//textarea|.//select');
|
| 991 |
}
|
| 992 |
|
| 993 |
/**
|
| 994 |
* Get all the options contained by a select, including nested options.
|
| 995 |
*
|
| 996 |
* @param $select
|
| 997 |
* SimpleXMLElement representing the select to extract option from.
|
| 998 |
* @return
|
| 999 |
* Associative array where the keys represent each option value and the
|
| 1000 |
* value is the text contained within the option tag. For example:
|
| 1001 |
* @code
|
| 1002 |
* array(
|
| 1003 |
* 'option1' => 'Option 1',
|
| 1004 |
* 'option2' => 'Option 2',
|
| 1005 |
* )
|
| 1006 |
* @endcode
|
| 1007 |
*/
|
| 1008 |
public function getSelectOptions(SimpleXMLElement $select) {
|
| 1009 |
$elements = $this->getSelectOptionElements($select);
|
| 1010 |
|
| 1011 |
$options = array();
|
| 1012 |
foreach ($elements as $element) {
|
| 1013 |
$options[(string) $element['value']] = $this->asText($element);
|
| 1014 |
}
|
| 1015 |
return $options;
|
| 1016 |
}
|
| 1017 |
|
| 1018 |
/**
|
| 1019 |
* Get all selected options contained by a select, including nested options.
|
| 1020 |
*
|
| 1021 |
* @param $select
|
| 1022 |
* SimpleXMLElement representing the select to extract option from.
|
| 1023 |
* @return
|
| 1024 |
* Associative array of selected items in the format described by
|
| 1025 |
* BrowserPage->getSelectOptions().
|
| 1026 |
* @see BrowserPage->getSelectOptions()
|
| 1027 |
*/
|
| 1028 |
public function getSelectedOptions(SimpleXMLElement $select) {
|
| 1029 |
$elements = getSelectOptionElements($select);
|
| 1030 |
|
| 1031 |
$options = array();
|
| 1032 |
foreach ($elements as $element) {
|
| 1033 |
if (isset($elements['selected'])) {
|
| 1034 |
$options[(string) $element['value']] = asText($element);
|
| 1035 |
}
|
| 1036 |
}
|
| 1037 |
return $options;
|
| 1038 |
}
|
| 1039 |
|
| 1040 |
/**
|
| 1041 |
* Get all the options contained by a select, including nested options.
|
| 1042 |
*
|
| 1043 |
* @param $element
|
| 1044 |
* SimpleXMLElement representing the select to extract option from.
|
| 1045 |
* @return
|
| 1046 |
* An array of SimpleXMLElement objects representing option elements.
|
| 1047 |
*/
|
| 1048 |
public function getSelectOptionElements(SimpleXMLElement $element) {
|
| 1049 |
$options = array();
|
| 1050 |
|
| 1051 |
// Add all options items.
|
| 1052 |
foreach ($element->option as $option) {
|
| 1053 |
$options[] = $option;
|
| 1054 |
}
|
| 1055 |
|
| 1056 |
// Search option group children.
|
| 1057 |
if (isset($element->optgroup)) {
|
| 1058 |
foreach ($element->optgroup as $group) {
|
| 1059 |
$options = array_merge($options, $this->getSelectOptionElements($group));
|
| 1060 |
}
|
| 1061 |
}
|
| 1062 |
return $options;
|
| 1063 |
}
|
| 1064 |
|
| 1065 |
/**
|
| 1066 |
* Get the absolute URL for a given path, relative to the page.
|
| 1067 |
*
|
| 1068 |
* @param
|
| 1069 |
* A path relative to the page or absolute.
|
| 1070 |
* @return
|
| 1071 |
* An absolute path.
|
| 1072 |
*/
|
| 1073 |
public function getAbsoluteUrl($path) {
|
| 1074 |
$parts = @parse_url($path);
|
| 1075 |
if (isset($parts['scheme'])) {
|
| 1076 |
return $path;
|
| 1077 |
}
|
| 1078 |
|
| 1079 |
$base = $this->getBaseUrl();
|
| 1080 |
if ($path[0] == '/') {
|
| 1081 |
// Lead / then use host as base.
|
| 1082 |
$parts = parse_url($base);
|
| 1083 |
$base = $parts['scheme'] . '://' . $parts['host'];
|
| 1084 |
}
|
| 1085 |
return $base . $path;
|
| 1086 |
}
|
| 1087 |
|
| 1088 |
/**
|
| 1089 |
* Get the base URL of the page.
|
| 1090 |
*
|
| 1091 |
* If a 'base' HTML element is defined then the URL it defines is used as the
|
| 1092 |
* base URL for the page, otherwise the page URL is used to determine the
|
| 1093 |
* base URL.
|
| 1094 |
*
|
| 1095 |
* @return
|
| 1096 |
* The base URL of the page.
|
| 1097 |
*/
|
| 1098 |
public function getBaseUrl() {
|
| 1099 |
// Check for base element.
|
| 1100 |
$elements = $this->xpath('.//base');
|
| 1101 |
if ($elements) {
|
| 1102 |
// More than one may be specified.
|
| 1103 |
foreach ($elements as $element) {
|
| 1104 |
if (isset($element['href'])) {
|
| 1105 |
$base = (string) $element['href'];
|
| 1106 |
break;
|
| 1107 |
}
|
| 1108 |
}
|
| 1109 |
}
|
| 1110 |
else {
|
| 1111 |
$base = $this->url;
|
| 1112 |
if ($pos = strpos($base, '?')) {
|
| 1113 |
// Remove query string.
|
| 1114 |
$base = substr($base, 0, $pos);
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
// Ignore everything after the last forward slash.
|
| 1118 |
$base = substr($base, 0, strrpos($base, '/'));
|
| 1119 |
}
|
| 1120 |
|
| 1121 |
// Ensure that the last character is a forward slash.
|
| 1122 |
if ($base[strlen($base) - 1] != '/') {
|
| 1123 |
$base .= '/';
|
| 1124 |
}
|
| 1125 |
return $base;
|
| 1126 |
}
|
| 1127 |
|
| 1128 |
/**
|
| 1129 |
* Extract the text contained by the element.
|
| 1130 |
*
|
| 1131 |
* Strips all XML/HTML tags, decodes HTML entities, and trims the result.
|
| 1132 |
*
|
| 1133 |
* @param $element
|
| 1134 |
* SimpleXMLElement to extract text from.
|
| 1135 |
* @return
|
| 1136 |
* Extracted text.
|
| 1137 |
*/
|
| 1138 |
public function asText(SimpleXMLElement $element) {
|
| 1139 |
return trim(html_entity_decode(strip_tags($element->asXML())));
|
| 1140 |
}
|
| 1141 |
}
|
| 1142 |
|
| 1143 |
/**
|
| 1144 |
* @} End of "defgroup browser".
|
| 1145 |
*/
|