| 1 |
<?php
|
| 2 |
/*
|
| 3 |
Copyright (C) 2008 by Phase2 Technology.
|
| 4 |
Author(s): Frank Febbraro, Irakli Nadareishvili
|
| 5 |
|
| 6 |
This program is free software; you can redistribute it and/or modify
|
| 7 |
it under the terms of the GNU General Public License.
|
| 8 |
This program is distributed in the hope that it will be useful,
|
| 9 |
but WITHOUT ANY WARRANTY. See the LICENSE.txt file for more details.
|
| 10 |
|
| 11 |
$Id$
|
| 12 |
*/
|
| 13 |
|
| 14 |
|
| 15 |
/**
|
| 16 |
* Implementation of hook_requirements(). Check to make sure the API key is specified.
|
| 17 |
*/
|
| 18 |
function calais_api_requirements($phase) {
|
| 19 |
|
| 20 |
$requirements = array();
|
| 21 |
$t = get_t();
|
| 22 |
|
| 23 |
if ($phase == 'runtime') {
|
| 24 |
$apikey = variable_get('calais_api_key', false);
|
| 25 |
if ($apikey === false || empty($apikey)) {
|
| 26 |
|
| 27 |
$settings_uri = array(
|
| 28 |
'!calaissetturi' => l(t('Calais API Settings'),
|
| 29 |
'admin/settings/calais-api')
|
| 30 |
);
|
| 31 |
|
| 32 |
$requirements['calais-api'] = array('title' => $t('Calais API Key is not set'),
|
| 33 |
'description' => $t('Calais integration module is enabled, but
|
| 34 |
Calais API Key is not set. Module will not function properly.
|
| 35 |
Please obtain the key and and set it at !calaissetturi as
|
| 36 |
soon as possible', $settings_uri),
|
| 37 |
'severity' => REQUIREMENT_ERROR);
|
| 38 |
}
|
| 39 |
|
| 40 |
$arc2 = class_exists('ARC2');
|
| 41 |
$requirements['calais-rdf'] = array(
|
| 42 |
'title' => $t('Calais RDF Parser'),
|
| 43 |
'value' => $arc2 ? $t('ARC2 @version is installed', array('@version' => ARC2::getVersion())) : $t('ARC2 Not installed'),
|
| 44 |
'description' => $arc2 ? '' : $t('<a href="@arc2">ARC2</a> is not available. Please <a href="@download">download</a> the latest version of the library and install it in the RDF module. For more information please refer to the <a href="@handbook">handbook</a>.', array('@arc2' => 'http://arc.semsol.org/', '@download' => 'http://arc.semsol.org/download', '@handbook' => 'http://drupal.org/node/219852')),
|
| 45 |
'severity' => $arc2 ? REQUIREMENT_OK : REQUIREMENT_ERROR,
|
| 46 |
);
|
| 47 |
}
|
| 48 |
|
| 49 |
return $requirements;
|
| 50 |
}
|
| 51 |
|
| 52 |
/**
|
| 53 |
* Implementation of hook_perm().
|
| 54 |
*/
|
| 55 |
function calais_api_perm() {
|
| 56 |
return array('administer calais api');
|
| 57 |
}
|
| 58 |
|
| 59 |
/**
|
| 60 |
* Implementation of hook_menu().
|
| 61 |
*/
|
| 62 |
function calais_api_menu() {
|
| 63 |
$items = array();
|
| 64 |
|
| 65 |
$items['admin/settings/calais-api'] = array(
|
| 66 |
'title' => 'Calais API Settings',
|
| 67 |
'description' => 'Configurations for Calais API',
|
| 68 |
'page callback' => 'drupal_get_form',
|
| 69 |
'page arguments' => array('calais_api_admin_settings'),
|
| 70 |
'access arguments' => array('administer calais api'),
|
| 71 |
);
|
| 72 |
|
| 73 |
return $items;
|
| 74 |
}
|
| 75 |
|
| 76 |
/**
|
| 77 |
* Build the admin settings form.
|
| 78 |
*/
|
| 79 |
function calais_api_admin_settings() {
|
| 80 |
$form = array();
|
| 81 |
$calais_url = array(
|
| 82 |
'!calaisurl' => l(t('Calais Website'), 'http://www.opencalais.com/member/register')
|
| 83 |
);
|
| 84 |
|
| 85 |
$form['calais_api_key'] = array(
|
| 86 |
'#type' => 'textfield',
|
| 87 |
'#title' => t('Calais API Key'),
|
| 88 |
'#default_value' => variable_get('calais_api_key', NULL),
|
| 89 |
'#size' => 60,
|
| 90 |
'#description' => t('You need to obtain an API Key from the !calaisurl first', $calais_url),
|
| 91 |
);
|
| 92 |
$form['calais_api_allow_searching'] = array(
|
| 93 |
'#type' => 'checkbox',
|
| 94 |
'#title' => t('Allow Calais Searching'),
|
| 95 |
'#default_value' => variable_get('calais_api_allow_searching', NULL),
|
| 96 |
'#description' => t('Indicates whether future searches can be performed on the extracted metadata by Calais'),
|
| 97 |
);
|
| 98 |
$form['calais_api_allow_distribution'] = array(
|
| 99 |
'#type' => 'checkbox',
|
| 100 |
'#title' => t('Allow Calais Distribution'),
|
| 101 |
'#default_value' => variable_get('calais_api_allow_distribution', NULL),
|
| 102 |
'#description' => t('Indicates whether the extracted metadata can be distributed by Calais'),
|
| 103 |
);
|
| 104 |
|
| 105 |
$form = system_settings_form($form);
|
| 106 |
return $form;
|
| 107 |
}
|
| 108 |
|
| 109 |
/**
|
| 110 |
* Analyze the content via Calais.
|
| 111 |
*
|
| 112 |
* @param $content The content to ship off to Calais for analysis
|
| 113 |
* @param $node_type The node type for this content, if applicable.
|
| 114 |
* @param $parameters Array of Calais parameters for overriding defaults.
|
| 115 |
* @see calais_api_build_xml_params for applicable Parameter values.
|
| 116 |
*/
|
| 117 |
function calais_api_analyze($node, $content, $parameters = array()) {
|
| 118 |
|
| 119 |
$HOST = "http://api.opencalais.com";
|
| 120 |
$URI = "/enlighten/calais.asmx/Enlighten";
|
| 121 |
|
| 122 |
$headers = array('Content-Type' => 'application/x-www-form-urlencoded');
|
| 123 |
$method = 'POST';
|
| 124 |
|
| 125 |
$data = array(
|
| 126 |
'licenseID' => variable_get('calais_api_key', NULL),
|
| 127 |
'content' => $content,
|
| 128 |
'paramsXML' => calais_api_xml_params($node->type, $parameters),
|
| 129 |
);
|
| 130 |
|
| 131 |
$data_enc = http_build_query($data, '', '&');
|
| 132 |
$ret = drupal_http_request($HOST . $URI, $headers, $method, $data_enc);
|
| 133 |
$xml = $ret->data;
|
| 134 |
$rdfxml = calais_api_extract_rdf($xml);
|
| 135 |
$keywords = calais_api_parse_rdf($rdfxml);
|
| 136 |
|
| 137 |
return $keywords;
|
| 138 |
}
|
| 139 |
|
| 140 |
/**
|
| 141 |
* Get the XML Parameters required by the Calais Web-Service based on the node type.
|
| 142 |
*
|
| 143 |
* @return an XML Document
|
| 144 |
*/
|
| 145 |
function calais_api_xml_params($node_type, $parameters) {
|
| 146 |
|
| 147 |
$global_params = array(
|
| 148 |
'allowSearch' => variable_get('calais_api_allow_searching', false) ? 'true' : 'false',
|
| 149 |
'allowDistribution' => variable_get('calais_api_allow_distribution', false) ? 'true' : 'false',
|
| 150 |
);
|
| 151 |
|
| 152 |
$options = array_merge($global_params, $parameters);
|
| 153 |
|
| 154 |
return calais_api_build_xml_params($options);
|
| 155 |
}
|
| 156 |
|
| 157 |
/**
|
| 158 |
* Build the XML Parameters required by the Calais Web-Service
|
| 159 |
*
|
| 160 |
* Valid parameters are specified in the options array as key/value pairs with the
|
| 161 |
* parameter name being the key and the parameter setting being the value
|
| 162 |
* e.g. array('allowSearch' => 'false')
|
| 163 |
*
|
| 164 |
* @return XML document of Calais parameters.
|
| 165 |
* @see http://opencalais.mashery.com/page/documentation#inputparameters for valid parameters.
|
| 166 |
*/
|
| 167 |
function calais_api_build_xml_params($options) {
|
| 168 |
$defaults = array(
|
| 169 |
'contentType' => 'TEXT/HTML',
|
| 170 |
'outputFormat' => 'XML/RDF',
|
| 171 |
'allowSearch' => 'false',
|
| 172 |
'allowDistribution' => 'false',
|
| 173 |
'externalID' => time(),
|
| 174 |
'submitter' => "Drupal Calais",
|
| 175 |
);
|
| 176 |
|
| 177 |
$attributes = array_merge($defaults, $options);
|
| 178 |
|
| 179 |
$ret = <<<EOD
|
| 180 |
<c:params xmlns:c="http://s.opencalais.com/1/pred/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
| 181 |
<c:processingDirectives c:contentType="$attributes[contentType]" c:outputFormat="$attributes[outputFormat]">
|
| 182 |
</c:processingDirectives>
|
| 183 |
<c:userDirectives c:allowDistribution="$attributes[allowDistribution]"
|
| 184 |
c:allowSearch="$attributes[allowSearch]"
|
| 185 |
c:externalID="$attributes[externalID]"
|
| 186 |
c:submitter="$attributes[submitter]">
|
| 187 |
</c:userDirectives>
|
| 188 |
<c:externalMetadata>
|
| 189 |
</c:externalMetadata>
|
| 190 |
</c:params>
|
| 191 |
EOD;
|
| 192 |
return $ret;
|
| 193 |
}
|
| 194 |
|
| 195 |
/**
|
| 196 |
* You need to understand how Calais responses are structured to parse its
|
| 197 |
* RDF.
|
| 198 |
*
|
| 199 |
* Samle element:
|
| 200 |
* [http://d.opencalais.com/pershash-1/fc0ac3c8-5d95-3625-bbd2-6e6fc3f1d703] => Array (
|
| 201 |
* 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' =>
|
| 202 |
* array (
|
| 203 |
* 0 => 'http://s.opencalais.com/1/type/em/e/Person',
|
| 204 |
* ),
|
| 205 |
* 'http://s.opencalais.com/1/pred/name' =>
|
| 206 |
* array (
|
| 207 |
* 0 => 'Diana',
|
| 208 |
* )
|
| 209 |
* )
|
| 210 |
*/
|
| 211 |
function calais_api_parse_rdf($rdf_xml) {
|
| 212 |
|
| 213 |
$parser = ARC2::getRDFXMLParser();
|
| 214 |
$parser->parse(null, $rdf_xml);
|
| 215 |
$indexedTriples = $parser->getSimpleIndex();
|
| 216 |
|
| 217 |
$keywords = new StdClass();
|
| 218 |
|
| 219 |
foreach ($indexedTriples as $indx) {
|
| 220 |
$isKeyword = false;
|
| 221 |
|
| 222 |
foreach ($indx as $key => $value) {
|
| 223 |
|
| 224 |
if ($key == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') {
|
| 225 |
$keywordType = $value[0];
|
| 226 |
// take the last part only
|
| 227 |
$keywordType = preg_replace('/.*\//ims', '', $keywordType);
|
| 228 |
//$keywordType = strtolower($keywordType);
|
| 229 |
// Value of keyword will come in the next iteration, sorry.
|
| 230 |
$isKeyword = True;
|
| 231 |
}
|
| 232 |
|
| 233 |
if ($isKeyword == true && $key == 'http://s.opencalais.com/1/pred/name') {
|
| 234 |
// nullify to avoid double-passes
|
| 235 |
$isKeyword = False;
|
| 236 |
$keywordValue = $value[0];
|
| 237 |
if (!is_array($keywords->$keywordType)) {
|
| 238 |
$keywords->$keywordType = array();
|
| 239 |
}
|
| 240 |
//Gotta do "eval" to be able to use $arr[] = $nealemenet notation;
|
| 241 |
$keywordValue = db_escape_string($keywordValue);
|
| 242 |
eval("\$keywords->$keywordType". "[] = '$keywordValue';");
|
| 243 |
}
|
| 244 |
}
|
| 245 |
}
|
| 246 |
|
| 247 |
return $keywords;
|
| 248 |
}
|
| 249 |
|
| 250 |
/**
|
| 251 |
* For whatever reason Calais returns XML wrapped around the RDF, instead of
|
| 252 |
* pure RDF-XML, so we need to remove it.
|
| 253 |
*
|
| 254 |
* @return unknown
|
| 255 |
*/
|
| 256 |
function calais_api_extract_rdf($data) {
|
| 257 |
|
| 258 |
$xml = html_entity_decode($data);
|
| 259 |
|
| 260 |
$success = preg_match('/<rdf:RDF.+\/rdf:RDF>/ims', $xml, $matches);
|
| 261 |
$rdfxml = $matches[0];
|
| 262 |
|
| 263 |
return $rdfxml;
|
| 264 |
}
|
| 265 |
|
| 266 |
/**
|
| 267 |
* Get a list of the entities that Calais API defines:
|
| 268 |
* http://opencalais.mashery.com/page/calaissemanticmetadata
|
| 269 |
*
|
| 270 |
* TODO: When Calais updates to have a static list at a URL or via API call, return that instead.
|
| 271 |
*
|
| 272 |
* @return flat array listing of Calais entities
|
| 273 |
*/
|
| 274 |
function calais_api_get_all_entities() {
|
| 275 |
return array(
|
| 276 |
'Anniversary',
|
| 277 |
'City',
|
| 278 |
'Company',
|
| 279 |
'Continent',
|
| 280 |
'Country',
|
| 281 |
'Currency',
|
| 282 |
'EmailAddress',
|
| 283 |
'Facility',
|
| 284 |
'FaxNumber',
|
| 285 |
'Holiday',
|
| 286 |
'IndustryTerm',
|
| 287 |
'NaturalDisaster',
|
| 288 |
'NaturalFeature',
|
| 289 |
'Organization',
|
| 290 |
'Person',
|
| 291 |
'PhoneNumber',
|
| 292 |
'ProvinceOrState',
|
| 293 |
'Region',
|
| 294 |
'Technology',
|
| 295 |
'URL',
|
| 296 |
);
|
| 297 |
}
|
| 298 |
|