/[drupal]/drupal/includes/browser.inc
ViewVC logotype

Contents of /drupal/includes/browser.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download) (as text)
Mon Aug 31 05:56:54 2009 UTC (2 months, 4 weeks ago) by dries
Branch: MAIN
CVS Tags: DRUPAL-7-0-UNSTABLE-9
Changes since 1.2: +29 -1 lines
File MIME type: text/x-php
- Patch #553276 by boombatower: added HTTP authentication support to the (simpletest) browser.
1 <?php
2 // $Id: browser.inc,v 1.2 2009/08/21 17:07:17 dries Exp $
3
4 /**
5 * @file
6 * Browser API class.
7 */
8
9 /**
10 * @defgroup browser Browser
11 * @{
12 * Provides a powerful text based browser through a class based API.
13 * The browser supports two HTTP backends natively: 1) PHP streams, and
14 * 2) curl. The browser also supports arbitrary HTTP request types in addtion
15 * to GET and POST, given that the backend supports them.
16 *
17 * The browser can be used to make a simple GET request to example.com as
18 * shown below.
19 * @code
20 * $browser = new Browser();
21 * $browser->get('http://example.com');
22 * @endcode
23 * The result of the GET request can be accessed in two ways: 1) the get()
24 * method returns an array defining the result of the request, or 2) the
25 * individual properties can be accessed from the browser instance via their
26 * respective access methods. The following demonstrates the properties that
27 * are avaialable and how to access them.
28 * @code
29 * $browser->getUrl();
30 * $browser->getResponseHeaders();
31 * $browser->getContent();
32 * @endcode
33 *
34 * When performing a POST request the following format is used.
35 * @code
36 * $browser = new Browser();
37 * $post = array(
38 * 'field_name1' => 'foo',
39 * 'checkbox1' => TRUE,
40 * 'multipleselect1[]' => array(
41 * 'value1',
42 * 'value2',
43 * ),
44 * );
45 * $browser->post('http://example.com/form', $post, 'Submit button text');
46 * @endcode
47 * To submit a multi-step form or to post to the current page the URL passed to
48 * post() may be set to NULL. If there were two steps on the form shown in the
49 * example above with the mutliple select field on the second page and a submit
50 * button with the title "Next" on the first page the code be as follows.
51 * @code
52 * $browser = new Browser();
53 * $post = array(
54 * 'field_name1' => 'foo',
55 * 'checkbox1' => TRUE,
56 * );
57 * $browser->post('http://example.com/form', $post, 'Next');
58 *
59 * $post = array(
60 * 'multipleselect1[]' => array(
61 * 'value1',
62 * 'value2',
63 * ),
64 * );
65 * $browser->post(NULL, $post, 'Final');
66 * @endcode
67 */
68
69 /**
70 * Browser API class.
71 *
72 * All browser functionality is provided by this main class which manages the
73 * various aspects of the browser.
74 */
75 class Browser {
76
77 /**
78 * Flag indicating if curl is available.
79 *
80 * @var boolean
81 */
82 protected $curl;
83
84 /**
85 * The handle of the current curl connection.
86 *
87 * @var resource
88 */
89 protected $handle;
90
91 /**
92 * The current cookie file used by curl.
93 *
94 * Cookies are not reused so they can be stored in memory instead of a file.
95 *
96 * @var mixed
97 */
98 protected $cookieFile = NULL;
99
100 /**
101 * The request headers.
102 *
103 * @var array
104 */
105 protected $requestHeaders = array();
106
107 /**
108 * The URL of the current page.
109 *
110 * @var string
111 */
112 protected $url;
113
114 /**
115 * The response headers of the current page.
116 *
117 * @var Array
118 */
119 protected $headers = array();
120
121 /**
122 * The raw content of the current page.
123 *
124 * @var string
125 */
126 protected $content;
127
128 /**
129 * The BrowserPage class representing to the current page.
130 *
131 * @var BrowserPage
132 */
133 protected $page;
134
135 /**
136 * Initialize the browser.
137 *
138 * @param $force_stream
139 * Force the use of the PHP stream wrappers insead of CURL. This is used
140 * during testing to force the use of the stream wrapper so it can be
141 * tested.
142 */
143 public function __construct($force_stream = FALSE) {
144 $this->curl = $force_stream ? FALSE : function_exists('curl_init');
145 $this->setUserAgent('Drupal (+http://drupal.org/)');
146
147 if ($this->curl) {
148 $this->handle = curl_init();
149 curl_setopt_array($this->handle, $this->curlOptions());
150 }
151 else {
152 $this->handle = stream_context_create();
153 }
154 }
155
156 /**
157 * Check the the method is supported by the backend.
158 *
159 * @param $method
160 * The method string identifier.
161 */
162 public function isMethodSupported($method) {
163 return $method == 'GET' || $method == 'POST';
164 }
165
166 /**
167 * Get the request headers.
168 *
169 * The request headers are sent in every request made by the browser with a
170 * few changes made the the individual request methods.
171 *
172 * @return
173 * Associative array of request headers.
174 */
175 public function getRequestHeaders() {
176 return $this->requestHeaders;
177 }
178
179 /**
180 * Set the request headers.
181 *
182 * @param $headers
183 * Associative array of request headers.
184 */
185 public function setRequestHeaders(array $headers) {
186 $this->requestHeaders = $headers;
187 }
188
189 /**
190 * Get the user-agent that the browser is identifying itself as.
191 *
192 * @return
193 * Browser user-agent.
194 */
195 public function getUserAgent() {
196 return $this->requestHeaders['User-Agent'];
197 }
198
199 /**
200 * Set the user-agent that the browser will identify itself as.
201 *
202 * @param $agent
203 * User-agent to to identify as.
204 */
205 public function setUserAgent($agent) {
206 $this->requestHeaders['User-Agent'] = $agent;
207 }
208
209 /**
210 * Get HTTP authentication information.
211 *
212 * @return
213 * Authentication information in the format, username:password.
214 */
215 public function getHttpAuthentication() {
216 if (isset($this->requestHeaders['Authorization'])) {
217 return base64_decode($this->requestHeaders['Authorization']);
218 }
219 return NULL;
220 }
221
222 /**
223 * Set HTTP authentication information.
224 *
225 * @param $username
226 * HTTP authentication username, which cannot contain a ":".
227 * @param $password
228 * HTTP authentication password.
229 */
230 public function setHttpAuthentication($username, $password) {
231 $this->requestHeaders['Authorization'] = base64_encode("$username:$password");
232 }
233
234 /**
235 * Get the URL of the current page.
236 *
237 * @return
238 * The URL of the current page.
239 */
240 public function getUrl() {
241 return $this->url;
242 }
243
244 /**
245 * Get the response headers of the current page.
246 *
247 * @return
248 * The response headers of the current page.
249 */
250 public function getResponseHeaders() {
251 return $this->headers;
252 }
253
254 /**
255 * Get the raw content of the current page.
256 *
257 * @return
258 * The raw content for the current page.
259 */
260 public function getContent() {
261 return $this->content;
262 }
263
264 /**
265 * Get the BrowserPage instance for the current page.
266 *
267 * If the raw content is new and the page has not yet been parsed then parse
268 * the content and ensure that it is valid.
269 *
270 * @return
271 * BrowserPage instance for the current page.
272 */
273 public function getPage() {
274 if (!isset($this->page)) {
275 $this->page = new BrowserPage($this->url, $this->headers, $this->content);
276 }
277 return $this->page;
278 }
279
280 /**
281 * Get the current state of the browser.
282 *
283 * @return
284 * An associative array containing state information, including: 1) url, 2)
285 * headers, 3) content.
286 * @see getUrl()
287 * @see getResponseHeaders()
288 * @see getContent()
289 */
290 public function getState() {
291 return array(
292 'url' => $this->url,
293 'headers' => $this->headers,
294 'content' => $this->content,
295 );
296 }
297
298 /**
299 * Set the state of the browser.
300 *
301 * @param $url
302 * The URL of the current page.
303 * @param $headers
304 * The response headers of the current page.
305 * @param $content
306 * The raw content of the current page.
307 */
308 public function setState($url, $headers, $content) {
309 $this->url = $url;
310 $this->headers = $headers;
311 $this->content = $content;
312
313 // Clear the page variable since the content has change.
314 unset($this->page);
315
316 $this->refreshCheck();
317 }
318
319 /**
320 * Perform a GET request.
321 *
322 * @param $url
323 * Absolute URL to request.
324 * @return
325 * Associative array of state information, as returned by getState().
326 * @see getState().
327 */
328 public function get($url) {
329 if ($this->curl) {
330 $this->curlExecute(array(
331 CURLOPT_HTTPGET => TRUE,
332 CURLOPT_URL => $url,
333 CURLOPT_NOBODY => FALSE,
334 ));
335 }
336 else {
337 $this->streamExecute($url, array(
338 'method' => 'GET',
339 'header' => array(
340 'Content-Type' => 'application/x-www-form-urlencoded',
341 ),
342 ));
343 }
344
345 $this->refreshCheck();
346
347 return $this->getState();
348 }
349
350 /**
351 * Perform a POST request.
352 *
353 * @param $url
354 * Absolute URL to request, or NULL to submit the current page.
355 * @param $fields
356 * Associative array of fields to submit as POST variables.
357 * @param $submit
358 * Text contained in 'value' properly of submit button of which to press.
359 * @return
360 * Associative array of state information, as returned by
361 * browser_state_get().
362 * @see browser_state_get()
363 */
364 public function post($url, array $fields, $submit) {
365 // If URL is set then request the page, otherwise use the current page.
366 if ($url) {
367 $this->get($url);
368 }
369 else {
370 $url = $this->url;
371 }
372
373 if (($page = $this->getPage()) === FALSE) {
374 return FALSE;
375 }
376
377 if (($form = $this->findForm($fields, $submit)) === FALSE) {
378 return FALSE;
379 }
380
381 // If form specified action then use that for the post url.
382 if ($form['action']) {
383 $url = $page->getAbsoluteUrl($form['action']);
384 }
385
386 if ($this->curl) {
387 $this->curlExecute(array(
388 CURLOPT_POST => TRUE,
389 CURLOPT_URL => $url,
390 CURLOPT_POSTFIELDS => http_build_query($form['post'], NULL, '&'),
391 ));
392 }
393 else {
394 $this->streamExecute($url, array(
395 'method' => 'POST',
396 'header' => array(
397 'Content-Type' => 'application/x-www-form-urlencoded',
398 ),
399 'content' => http_build_query($form['post'], NULL, '&'),
400 ));
401 }
402
403 $this->refreshCheck();
404
405 return $this->getState();
406 }
407
408 /**
409 * Find the the form that patches the conditions.
410 *
411 * @param $fields
412 * Associative array of fields to submit as POST variables.
413 * @param $submit
414 * Text contained in 'value' properly of submit button of which to press.
415 * @return
416 * Form action and the complete post array containing default values if not
417 * overridden, or FALSE if no form matching the conditions was found.
418 */
419 protected function findForm(array $fields, $submit) {
420 $page = $this->getPage();
421
422 $forms = $page->getForms();
423 foreach ($forms as $form) {
424 if (($post = $this->processForm($form, $fields, $submit)) !== FALSE) {
425 $action = (isset($form['action']) ? (string) $form['action'] : FALSE);
426 return array(
427 'action' => $action,
428 'post' => $post,
429 );
430 }
431 }
432 return FALSE;
433 }
434
435 /**
436 * Check the conditions against the specified form and process values.
437 *
438 * @param $form
439 * Form SimpleXMLElement object.
440 * @param $fields
441 * Associative array of fields to submit as POST variables.
442 * @param $submit
443 * Text contained in 'value' properly of submit button of which to press.
444 * @return
445 * The complete post array containing default values if not overridden, or
446 * FALSE if no form matching the conditions was found.
447 */
448 protected function processForm($form, $fields, $submit) {
449 $page = $this->getPage();
450
451 $post = array();
452 $submit_found = FALSE;
453 $inputs = $page->getInputs($form);
454 foreach ($inputs as $input) {
455 $name = (string) $input['name'];
456 $html_value = isset($input['value']) ? (string) $input['value'] : '';
457
458 // Get type from input vs textarea and select.
459 $type = isset($input['type']) ? (string) $input['type'] : $input->getName();
460
461 if (isset($fields[$name])) {
462 if ($type == 'file') {
463 // Make sure the file path is the absolute path.
464 $file = realpath($fields[$name]);
465 if ($file && is_file($file)) {
466 // Signify that the post field is a file in case backend needs to
467 // perform additional processing.
468 $post[$name] = '@' . $file;
469 }
470 // Known type, field processed.
471 unset($fields[$name]);
472 }
473 elseif (($processed_value = $this->processField($input, $type, $fields[$name], $html_value)) !== NULL) {
474 // Value may be ommitted (checkbox).
475 if ($processed_value !== FALSE) {
476 if (is_array($processed_value)) {
477 $post += $processed_value;
478 }
479 else {
480 $post[$name] = $processed_value;
481 }
482 }
483 // Known type, field processed.
484 unset($fields[$name]);
485 }
486 }
487
488 // No post value for the field means that: no post field value specified,
489 // the value does not match the field (checkbox, radio, select), or the
490 // field is of an unknown type.
491 if (!isset($post[$name])) {
492 // No value specified so use default value (value in HTML).
493 if (($default_value = $this->getDefaultFieldValue($input, $type, $html_value)) !== NULL) {
494 $post[$name] = $default_value;
495 unset($fields[$name]);
496 }
497 }
498
499 // Check if the
500 if (($type == 'submit' || $type == 'image') && $submit == $html_value) {
501 $post[$name] = $html_value;
502 $submit_found = TRUE;
503 }
504 }
505
506 if ($submit_found) {
507 return $post;
508 }
509 return FALSE;
510 }
511
512 /**
513 * Get the value to be sent for the specified field.
514 *
515 * @param $input
516 * Input SimpleXMLElement object.
517 * @param $type
518 * Input type: text, textarea, password, radio, checkbox, or select.
519 * @param $new_value
520 * The new value to be assigned to the input.
521 * @param $html_value
522 * The cleaned default value for the input from the HTML value.
523 */
524 protected function processField($input, $type, $new_value, $html_value) {
525 switch ($type) {
526 case 'text':
527 case 'textarea':
528 case 'password':
529 return $new_value;
530 case 'radio':
531 if ($new_value == $html_value) {
532 return $new_value;
533 }
534 return NULL;
535 case 'checkbox':
536 // If $new_value is set to FALSE then ommit checkbox value, otherwise
537 // pass original value.
538 if ($new_value === FALSE) {
539 return FALSE;
540 }
541 return $html_value;
542 case 'select':
543 // Remove the ending [] from multi-select element name.
544 $key = preg_replace('/\[\]$/', '', (string) $input['name']);
545
546 $options = $page->getSelectOptions($input);
547 $index = 0;
548 $out = array();
549 foreach ($options as $value => $text) {
550 if (is_array($value)) {
551 if (in_array($value, $new_value)) {
552 $out[$key . '[' . $index++ . ']'] = $value;
553 }
554 }
555 elseif ($new_value == $value) {
556 return $new_value;
557 }
558 }
559 return ($out ? $out : NULL);
560 default:
561 return NULL;
562 }
563 }
564
565 /**
566 * Get the cleaned default value for the input from the HTML value.
567 *
568 * @param $input
569 * Input SimpleXMLElement object.
570 * @param $type
571 * Input type: text, textarea, password, radio, checkbox, or select.
572 * @param $html_value
573 * The default value for the input, as specified in the HTML.
574 */
575 protected function getDefaultFieldValue($input, $type, $html_value) {
576 switch ($type) {
577 case 'textarea':
578 return (string) $input;
579 case 'select':
580 // Remove the ending [] from multi-select element name.
581 $key = preg_replace('/\[\]$/', '', (string) $input['name']);
582 $single = empty($input['multiple']);
583
584 $options = $page->getSelectOptionElements($input);
585 $first = TRUE;
586 $index = 0;
587 $out = array();
588 foreach ($options as $option) {
589 // For single select, we load the first option, if there is a
590 // selected option that will overwrite it later.
591 if ($option['selected'] || ($first && $single)) {
592 $first = FALSE;
593 if ($single) {
594 $out[$key] = (string) $option['value'];
595 }
596 else {
597 $out[$key . '[' . $index++ . ']'] = (string) $option['value'];
598 }
599 }
600 return ($single ? $out[$key] : $out);
601 }
602 break;
603 case 'file':
604 return NULL;
605 case 'radio':
606 case 'checkbox':
607 if (!isset($input['checked'])) {
608 return NULL;
609 }
610 // Deliberately no break.
611 default:
612 return $html_value;
613 }
614 }
615
616 /**
617 * Perform a request of arbitrary type.
618 *
619 * Please use get() and post() for GET and POST requests respectively.
620 *
621 * @param $method
622 * The method string identifier.
623 * @param $url
624 * Absolute URL to request.
625 * @param $additional
626 * Additional parameters related to the particular request method.
627 * @return
628 * Associative array of state information, as returned by getState().
629 * @see getState().
630 */
631 public function request($method, $url, array $additional) {
632 if (!$this->isMethodSupported($method)) {
633 return FALSE;
634 }
635
636 // TODO
637 }
638
639 /**
640 * Perform the request using the PHP stream wrapper.
641 *
642 * @param $url
643 * The url to request.
644 * @param $options
645 * The HTTP stream context options to be passed to
646 * stream_context_set_params().
647 */
648 protected function streamExecute($url, array $options) {
649 // Global variable provided by PHP stream wapper.
650 global $http_response_header;
651
652 if (!isset($options['header'])) {
653 $options['header'] = array();
654 }
655
656 // Merge default request headers with the passed headers and generate
657 // header string to be sent in http request.
658 $headers = $this->requestHeaders + $options['header'];
659 $options['header'] = $this->headerString($headers);
660
661 // Update the handler options.
662 stream_context_set_params($this->handle, array(
663 'options' => array(
664 'http' => $options,
665 )
666 ));
667
668 // Make the request.
669 $this->content = file_get_contents($url, FALSE, $this->handle);
670 $this->url = $url;
671 $this->headers = $this->headerParseAll($http_response_header);
672 unset($this->page);
673 }
674
675
676 /**
677 * Perform curl_exec() with the specified option changes.
678 *
679 * @param $options
680 * Curl options to set, any options not set will maintain their previous
681 * value.
682 */
683 function curlExecute(array $options) {
684 // Headers need to be reset since callback appends.
685 $this->headers = array();
686
687 // Ensure that request headers are up to date.
688 if ($this->getHttpAuthentication()) {
689 curl_setopt($this->handle, CURLOPT_USERPWD, $this->getHttpAuthentication());
690 }
691 curl_setopt($this->handle, CURLOPT_USERAGENT, $this->requestHeaders['User-Agent']);
692 curl_setopt($this->handle, CURLOPT_HTTPHEADER, $this->requestHeaders);
693
694 curl_setopt_array($this->handle, $options);
695 $this->content = curl_exec($this->handle);
696 $this->url = curl_getinfo($this->handle, CURLINFO_EFFECTIVE_URL);
697
698 // $this->headers should be filled by $this->curlHeaderCallback().
699 unset($this->page);
700 }
701
702 /**
703 * Get the default curl options to be used with each request.
704 *
705 * @return
706 * Default curl options.
707 */
708 protected function curlOptions() {
709 return array(
710 CURLOPT_COOKIEJAR => $this->cookieFile,
711 CURLOPT_FOLLOWLOCATION => TRUE,
712 CURLOPT_HEADERFUNCTION => array($this, 'curlHeaderCallback'),
713 CURLOPT_HTTPHEADER => $this->requestHeaders,
714 CURLOPT_RETURNTRANSFER => TRUE,
715 CURLOPT_SSL_VERIFYPEER => FALSE,
716 CURLOPT_SSL_VERIFYHOST => FALSE,
717 CURLOPT_URL => '/',
718 CURLOPT_USERAGENT => $this->requestHeaders['User-Agent'],
719 );
720 }
721
722 /**
723 * Reads reponse headers and stores in $headers array.
724 *
725 * @param $curlHandler
726 * The curl handler.
727 * @param $header
728 * An header.
729 * @return
730 * The string length of the header. (required by curl)
731 */
732 protected function curlHeaderCallback($handler, $header) {
733 // Ignore blank header lines.
734 $clean_header = trim($header);
735 if ($clean_header) {
736 $this->headers += $this->headerParse($clean_header);
737 }
738
739 // Curl requires strlen() to be returned.
740 return strlen($header);
741 }
742
743 /**
744 * Generate a header string given he associative array of headers.
745 *
746 * @param $headers
747 * Associative array of headers.
748 * @return
749 * Header string to be used with stream.
750 */
751 protected function headerString(array $headers) {
752 $string = '';
753 foreach ($headers as $key => $header) {
754 $string .= "$key: $header\r\n";
755 }
756 return $string;
757 }
758
759 /**
760 * Parse the response header array to create an associative array.
761 *
762 * @param $headers
763 * Array of headers.
764 * @return
765 * An associative array of headers.
766 */
767 protected function headerParseAll(array $headers) {
768 $out = array();
769 foreach ($headers as $header) {
770 $out += $this->headerParse($header);
771 }
772 return $out;
773 }
774
775 /**
776 * Parse an individual header into name and value.
777 *
778 * @param $header
779 * A string header string.
780 * @return
781 * Parsed header as array($name => $value), or array() if parse failed.
782 */
783 protected function headerParse($header) {
784 $parts = explode(':', $header, 2);
785
786 // Ensure header line is valid.
787 if (count($parts) == 2) {
788 $name = $this->headerName(trim($parts[0]));
789 return array($name => trim($parts[1]));
790 }
791 return array();
792 }
793
794 /**
795 * Ensure that header name is formatted with all lowercase letters.
796 *
797 * @param $name
798 * Header name to format.
799 * @return
800 * Formatted header name.
801 */
802 protected function headerName($name) {
803 return strtolower($name);
804 }
805
806 /**
807 * Check for a refresh signifier.
808 *
809 * A refresh signifier can either be the 'Location' HTTP header or the meta
810 * tag 'http-equiv="Refresh"'.
811 */
812 protected function refreshCheck() {
813 // If not handled by backend wrapper then go ahead and handle.
814 if (isset($this->headers['Location'])) {
815 // Expect absolute URL.
816 $this->get($this->headers['Location']);
817 }
818
819 if (($page = $this->getPage()) !== FALSE && ($tag = $page->getMetaTag('Refresh', 'http-equiv'))) {
820 // Parse the content attribute of the meta tag for the format:
821 // "[delay]: URL=[path_to_redirect_to]".
822 if (preg_match('/\d+;\s*URL=(?P<url>.*)/i', $tag['content'], $match)) {
823 $this->get($page->getAbsoluteUrl(decode_entities($match['url'])));
824 }
825 }
826 }
827
828 /**
829 * Close the wrapper connection.
830 */
831 function __destruct() {
832 if (isset($this->handle)) {
833 if ($this->curl) {
834 curl_close($this->handle);
835 }
836 unset($this->handle);
837 }
838 }
839 }
840
841
842 /**
843 * Represents a page of content that has been fetched by the Browser. The class
844 * provides a number of convenience methods that relate to page content.
845 */
846 class BrowserPage {
847
848 /**
849 * The URL of the page.
850 *
851 * @var string
852 */
853 protected $url;
854
855 /**
856 * The response headers of the page.
857 *
858 * @var Array
859 */
860 protected $headers;
861
862 /**
863 * The root element of the page.
864 *
865 * @var SimpleXMLElement
866 */
867 protected $root;
868
869 /**
870 * Initialize the BrowserPage with the page state information.
871 *
872 * @param $url
873 * The URL of the page.
874 * @param $headers
875 * The response headers of the page.
876 * @param $content
877 * The raw content of the page.
878 */
879 public function BrowserPage($url, $headers, $content) {
880 $this->url = $url;
881 $this->headers = $headers;
882 $this->root = $this->load($content);
883 }
884
885 /**
886 * Attempt to parse the raw content using DOM and import it into SimpleXML.
887 *
888 * @param $content
889 * The raw content of the page.
890 * @return
891 * The root element of the page, or FALSE.
892 */
893 protected function load($content) {
894 // Use DOM to load HTML soup, and hide warnings.
895 $document = @DOMDocument::loadHTML($content);
896 if ($document) {
897 return simplexml_import_dom($document);
898 }
899 return FALSE;
900 }
901
902 /**
903 * Check if the raw content is valid and could be parse.
904 *
905 * @return
906 * TRUE if content is valid, otherwise FALSE.
907 */
908 public function isValid() {
909 return ($this->root !== FALSE);
910 }
911
912 /**
913 * Perform an xpath search on the contents of the page.
914 *
915 * The search is relative to the root element, usually the HTML tag, of the
916 * page. To perform a search using a different root element follow the
917 * example below.
918 * @code
919 * $parent = $page->xpath('.//parent');
920 * $parent[0]->xpath('//children');
921 * @endcode
922 *
923 * @param $xpath
924 * The xpath string.
925 * @return
926 * An array of SimpleXMLElement objects or FALSE in case of an error.
927 * @link http://us.php.net/manual/function.simplexml-element-xpath.php
928 */
929 public function xpath($xpath) {
930 if ($this->isValid()) {
931 return $this->root->xpath($xpath);
932 }
933 return FALSE;
934 }
935
936 /**
937 * Get all the meta tags.
938 *
939 * @return
940 * An array of SimpleXMLElement objects representing meta tags.
941 */
942 public function getMetaTags() {
943 return $this->xpath('//meta');
944 }
945
946 /**
947 * Get a specific meta tag.
948 *
949 * @param $key
950 * The meta tag key.
951 * @param $type
952 * The type of meta tag, either: 'name' or 'http-equiv'.
953 * @return
954 * A SimpleXMLElement object representing the meta tag, or FALSE if not
955 * found.
956 */
957 public function getMetaTag($key, $type = 'name') {
958 if ($tags = $this->getMetaTags()) {
959 foreach ($tags as $tag) {
960 if ($tag[$type] == $key) {
961 return $tag;
962 }
963 }
964 }
965 return FALSE;
966 }
967
968 /**
969 * Get all the form elements.
970 *
971 * @return
972 * An array of SimpleXMLElement objects representing form elements.
973 */
974 public function getForms() {
975 return $this->xpath('//form');
976 }
977
978 /**
979 * Get all the input elements, or only those nested within a parent element.
980 *
981 * @param $parent
982 * SimpleXMLElement representing the parent to search within.
983 * @return
984 * An array of SimpleXMLElement objects representing form elements.
985 */
986 public function getInputs($parent = NULL) {
987 if ($parent) {
988 return $parent->xpath('.//input|.//textarea|.//select');
989 }
990 return $this->xpath('.//input|.//textarea|.//select');
991 }
992
993 /**
994 * Get all the options contained by a select, including nested options.
995 *
996 * @param $select
997 * SimpleXMLElement representing the select to extract option from.
998 * @return
999 * Associative array where the keys represent each option value and the
1000 * value is the text contained within the option tag. For example:
1001 * @code
1002 * array(
1003 * 'option1' => 'Option 1',
1004 * 'option2' => 'Option 2',
1005 * )
1006 * @endcode
1007 */
1008 public function getSelectOptions(SimpleXMLElement $select) {
1009 $elements = $this->getSelectOptionElements($select);
1010
1011 $options = array();
1012 foreach ($elements as $element) {
1013 $options[(string) $element['value']] = $this->asText($element);
1014 }
1015 return $options;
1016 }
1017
1018 /**
1019 * Get all selected options contained by a select, including nested options.
1020 *
1021 * @param $select
1022 * SimpleXMLElement representing the select to extract option from.
1023 * @return
1024 * Associative array of selected items in the format described by
1025 * BrowserPage->getSelectOptions().
1026 * @see BrowserPage->getSelectOptions()
1027 */
1028 public function getSelectedOptions(SimpleXMLElement $select) {
1029 $elements = getSelectOptionElements($select);
1030
1031 $options = array();
1032 foreach ($elements as $element) {
1033 if (isset($elements['selected'])) {
1034 $options[(string) $element['value']] = asText($element);
1035 }
1036 }
1037 return $options;
1038 }
1039
1040 /**
1041 * Get all the options contained by a select, including nested options.
1042 *
1043 * @param $element
1044 * SimpleXMLElement representing the select to extract option from.
1045 * @return
1046 * An array of SimpleXMLElement objects representing option elements.
1047 */
1048 public function getSelectOptionElements(SimpleXMLElement $element) {
1049 $options = array();
1050
1051 // Add all options items.
1052 foreach ($element->option as $option) {
1053 $options[] = $option;
1054 }
1055
1056 // Search option group children.
1057 if (isset($element->optgroup)) {
1058 foreach ($element->optgroup as $group) {
1059 $options = array_merge($options, $this->getSelectOptionElements($group));
1060 }
1061 }
1062 return $options;
1063 }
1064
1065 /**
1066 * Get the absolute URL for a given path, relative to the page.
1067 *
1068 * @param
1069 * A path relative to the page or absolute.
1070 * @return
1071 * An absolute path.
1072 */
1073 public function getAbsoluteUrl($path) {
1074 $parts = @parse_url($path);
1075 if (isset($parts['scheme'])) {
1076 return $path;
1077 }
1078
1079 $base = $this->getBaseUrl();
1080 if ($path[0] == '/') {
1081 // Lead / then use host as base.
1082 $parts = parse_url($base);
1083 $base = $parts['scheme'] . '://' . $parts['host'];
1084 }
1085 return $base . $path;
1086 }
1087
1088 /**
1089 * Get the base URL of the page.
1090 *
1091 * If a 'base' HTML element is defined then the URL it defines is used as the
1092 * base URL for the page, otherwise the page URL is used to determine the
1093 * base URL.
1094 *
1095 * @return
1096 * The base URL of the page.
1097 */
1098 public function getBaseUrl() {
1099 // Check for base element.
1100 $elements = $this->xpath('.//base');
1101 if ($elements) {
1102 // More than one may be specified.
1103 foreach ($elements as $element) {
1104 if (isset($element['href'])) {
1105 $base = (string) $element['href'];
1106 break;
1107 }
1108 }
1109 }
1110 else {
1111 $base = $this->url;
1112 if ($pos = strpos($base, '?')) {
1113 // Remove query string.
1114 $base = substr($base, 0, $pos);
1115 }
1116
1117 // Ignore everything after the last forward slash.
1118 $base = substr($base, 0, strrpos($base, '/'));
1119 }
1120
1121 // Ensure that the last character is a forward slash.
1122 if ($base[strlen($base) - 1] != '/') {
1123 $base .= '/';
1124 }
1125 return $base;
1126 }
1127
1128 /**
1129 * Extract the text contained by the element.
1130 *
1131 * Strips all XML/HTML tags, decodes HTML entities, and trims the result.
1132 *
1133 * @param $element
1134 * SimpleXMLElement to extract text from.
1135 * @return
1136 * Extracted text.
1137 */
1138 public function asText(SimpleXMLElement $element) {
1139 return trim(html_entity_decode(strip_tags($element->asXML())));
1140 }
1141 }
1142
1143 /**
1144 * @} End of "defgroup browser".
1145 */

  ViewVC Help
Powered by ViewVC 1.1.2