| 1 |
<?php |
<?php |
| 2 |
// $Id: serapi.module,v 1.1 2008/11/05 00:03:40 yaph Exp $ |
// $Id: serapi.module,v 1.2 2008/11/05 09:00:03 yaph Exp $ |
| 3 |
error_reporting(E_ALL); |
error_reporting(E_ALL); |
|
serapi_test(); |
|
| 4 |
|
|
| 5 |
/** |
/** |
| 6 |
* Returns an array with information on the performed search |
* Returns an array with information on the performed search |
| 12 |
} |
} |
| 13 |
|
|
| 14 |
if (!valid_url($referer)) { |
if (!valid_url($referer)) { |
| 15 |
|
watchdog('serapi', 'The referring URL @url could not be validated', array('@url' => $referer)); |
| 16 |
return false; |
return false; |
| 17 |
} |
} |
| 18 |
|
|
| 35 |
) |
) |
| 36 |
); |
); |
| 37 |
|
|
| 38 |
$url_parts = parse_url(urldecode($referer)); |
$url_parts = parse_url($referer); |
| 39 |
// only check urls that contain a query string, hell awaits when |
// only check urls that contain a query string, hell awaits when |
| 40 |
// search engines start to use url rewriting for serps |
// search engines start to use url rewriting for serps |
| 41 |
if (is_array($url_parts) && isset($url_parts['host']) && isset($url_parts['query'])) { |
if (is_array($url_parts) && isset($url_parts['host']) && isset($url_parts['query'])) { |
| 43 |
if (false !== stripos($url_parts['host'], $prop['host_pattern'])) { |
if (false !== stripos($url_parts['host'], $prop['host_pattern'])) { |
| 44 |
$query = $url_parts['query']; |
$query = $url_parts['query']; |
| 45 |
$param = $prop['query_param']; |
$param = $prop['query_param']; |
| 46 |
|
parse_str($query, $query_parts); |
| 47 |
// check that the query contains the search parameter |
// check that the query contains the search parameter |
| 48 |
if (false !== strpos($query, $param)) { |
if (isset($query_parts[$param])) { |
| 49 |
#@todo use http://php.net/parse_str |
$search_string = $query_parts[$param]; |
| 50 |
if (preg_match("%" . $param . "=([^&]+)%", $query, $match)) { |
// check whether there is more than one search keyword |
| 51 |
$search = array(); |
if (false !== strpos($search_string, ' ')) { |
| 52 |
$search_string = $match[1]; |
$search['keywords'] = explode(' ', $search_string); |
|
// check whether there is more than one search keyword |
|
|
if (false !== strpos($search_string, ' ')) { |
|
|
$search['keywords'] = explode(' ', $search_string); |
|
|
} |
|
|
else { |
|
|
$search['keywords'] = array($search_string); |
|
|
} |
|
|
$search['string'] = $search_string; |
|
|
$search['url'] = $referer; |
|
|
$search['engine'] = $name; |
|
|
return $search; |
|
| 53 |
} |
} |
| 54 |
|
else { |
| 55 |
|
$search['keywords'] = array($search_string); |
| 56 |
|
} |
| 57 |
|
$search['string'] = $search_string; |
| 58 |
|
$search['url'] = $referer; |
| 59 |
|
$search['engine'] = $name; |
| 60 |
|
return $search; |
| 61 |
} |
} |
| 62 |
} |
} |
| 63 |
} |
} |
| 66 |
} |
} |
| 67 |
|
|
| 68 |
/** |
/** |
| 69 |
* For testing purposes |
* Implementation of hook_block |
| 70 |
*/ |
*/ |
| 71 |
function serapi_test() { |
function serapi_block($op = 'list', $delta = 0, $edit = array()) { |
| 72 |
$searches = array( |
if ('list' == $op) { |
| 73 |
|
$blocks = array(); |
| 74 |
|
$blocks[0] = array( |
| 75 |
|
'info' => t('Search Engine Referer Information'), |
| 76 |
|
'weight' => 0 |
| 77 |
|
); |
| 78 |
|
$blocks[1] = array( |
| 79 |
|
'info' => t('Search Engine Referer Test'), |
| 80 |
|
'weight' => 0 |
| 81 |
|
); |
| 82 |
|
return $blocks; |
| 83 |
|
} |
| 84 |
|
elseif ('view' == $op) { |
| 85 |
|
switch($delta) { |
| 86 |
|
case 0: |
| 87 |
|
$block['content'] = serapi_block_search_engine_information(); |
| 88 |
|
$block['subject'] = t('Search Engine Referer Information'); |
| 89 |
|
break; |
| 90 |
|
case 1: |
| 91 |
|
$block['content'] = serapi_block_search_engine_test(); |
| 92 |
|
$block['subject'] = t('Search Engine Referer Test'); |
| 93 |
|
break; |
| 94 |
|
} |
| 95 |
|
return $block; |
| 96 |
|
} |
| 97 |
|
} |
| 98 |
|
|
| 99 |
|
function serapi_block_search_engine_information() { |
| 100 |
|
$search_info = serapi_get_search(); |
| 101 |
|
if (false !== $search_info) { |
| 102 |
|
return var_export($search_info, true); |
| 103 |
|
} |
| 104 |
|
} |
| 105 |
|
|
| 106 |
|
/** |
| 107 |
|
* Calls serapi_get_search() for all test URLs and returns |
| 108 |
|
* a parsable string representation of the returned array |
| 109 |
|
* |
| 110 |
|
* @return String |
| 111 |
|
* A parsable string representation of the serapi_get_search() |
| 112 |
|
* test result |
| 113 |
|
*/ |
| 114 |
|
function serapi_block_search_engine_test() { |
| 115 |
|
$search_info = array_map('serapi_get_search', serapi_get_test_urls()); |
| 116 |
|
if (false !== $search_info) { |
| 117 |
|
var_dump($search_info);die; |
| 118 |
|
return var_export($search_info, true); |
| 119 |
|
} |
| 120 |
|
} |
| 121 |
|
|
| 122 |
|
/** |
| 123 |
|
* Returns an array of possible referer URLs for testing purposes |
| 124 |
|
* |
| 125 |
|
* @return Array |
| 126 |
|
* An array of search engine result page URLs |
| 127 |
|
*/ |
| 128 |
|
function serapi_get_test_urls() { |
| 129 |
|
return array( |
| 130 |
|
// google |
| 131 |
'http://www.google.de/search?hl=de&q=drupal&btnG=Google-Suche&meta=', |
'http://www.google.de/search?hl=de&q=drupal&btnG=Google-Suche&meta=', |
| 132 |
'http://www.google.com/search?q=drupal', |
'http://www.google.com/search?q=drupal', |
| 133 |
'http://www.google.com/search?q=drupal+cms&ie=utf-8&oe=utf-8', |
'http://www.google.com/search?q=drupal+cms&ie=utf-8&oe=utf-8', |
| 134 |
|
// yahoo |
| 135 |
'http://search.yahoo.com/search?p=drupal&vc=&fr=yfp-t-501&toggle=1&cop=mss&ei=UTF-8', |
'http://search.yahoo.com/search?p=drupal&vc=&fr=yfp-t-501&toggle=1&cop=mss&ei=UTF-8', |
| 136 |
'http://de.search.yahoo.com/search?p=drupal&fr=yfp-t-501&ei=UTF-8&rd=r1', |
'http://de.search.yahoo.com/search?p=drupal&fr=yfp-t-501&ei=UTF-8&rd=r1', |
| 137 |
'http://de.search.yahoo.com/search?p=drupal+cms&ei=UTF-8&fr=moz2', |
'http://de.search.yahoo.com/search?p=drupal+cms&ei=UTF-8&fr=moz2', |
| 138 |
|
// ask |
| 139 |
'http://de.ask.com/web?q=drupal&qsrc=0&o=0&l=dir&s=&part=&dm=lang', |
'http://de.ask.com/web?q=drupal&qsrc=0&o=0&l=dir&s=&part=&dm=lang', |
| 140 |
'http://www.ask.com/web?q=drupal&search=search&qsrc=0&o=312&l=dir', |
'http://www.ask.com/web?q=drupal&search=search&qsrc=0&o=312&l=dir', |
| 141 |
'http://de.ask.com/web?q=is+drupal+a+framework&qsrc=1&o=312&l=dir&s=&part=&dm=all', |
'http://de.ask.com/web?q=is+drupal+a+framework&qsrc=1&o=312&l=dir&s=&part=&dm=all', |
| 142 |
|
// live |
| 143 |
'http://search.live.com/results.aspx?q=drupal', |
'http://search.live.com/results.aspx?q=drupal', |
| 144 |
'http://search.live.com/results.aspx?q=drupal+cms&go=&form=QBLH', |
'http://search.live.com/results.aspx?q=drupal+cms&go=&form=QBLH', |
| 145 |
'http://search.live.com/results.aspx?q=live+search+sucks&go=&form=QBLH' |
'http://search.live.com/results.aspx?q=live+search+sucks&go=&form=QBLH' |
| 146 |
); |
); |
|
$test_result = array_map('serapi_get_search', $searches); |
|
|
var_dump($test_result); |
|
| 147 |
} |
} |