/[drupal]/contributions/modules/opencalais/calais_api.module
ViewVC logotype

Contents of /contributions/modules/opencalais/calais_api.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.6 - (show annotations) (download) (as text)
Thu Apr 24 17:56:49 2008 UTC (19 months ago) by febbraro
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +11 -1 lines
File MIME type: text/x-php
added license
1 <?php
2 /*
3 Copyright (C) 2008 by Phase2 Technology.
4 Author(s): Frank Febbraro, Irakli Nadareishvili
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY. See the LICENSE.txt file for more details.
10
11 $Id$
12 */
13
14
15 /**
16 * Implementation of hook_requirements(). Check to make sure the API key is specified.
17 */
18 function calais_api_requirements($phase) {
19
20 $requirements = array();
21 $t = get_t();
22
23 if ($phase == 'runtime') {
24 $apikey = variable_get('calais_api_key', false);
25 if ($apikey === false || empty($apikey)) {
26
27 $settings_uri = array(
28 '!calaissetturi' => l(t('Calais API Settings'),
29 'admin/settings/calais-api')
30 );
31
32 $requirements['calais-api'] = array('title' => $t('Calais API Key is not set'),
33 'description' => $t('Calais integration module is enabled, but
34 Calais API Key is not set. Module will not function properly.
35 Please obtain the key and and set it at !calaissetturi as
36 soon as possible', $settings_uri),
37 'severity' => REQUIREMENT_ERROR);
38 }
39
40 $arc2 = class_exists('ARC2');
41 $requirements['calais-rdf'] = array(
42 'title' => $t('Calais RDF Parser'),
43 'value' => $arc2 ? $t('ARC2 @version is installed', array('@version' => ARC2::getVersion())) : $t('ARC2 Not installed'),
44 'description' => $arc2 ? '' : $t('<a href="@arc2">ARC2</a> is not available. Please <a href="@download">download</a> the latest version of the library and install it in the RDF module. For more information please refer to the <a href="@handbook">handbook</a>.', array('@arc2' => 'http://arc.semsol.org/', '@download' => 'http://arc.semsol.org/download', '@handbook' => 'http://drupal.org/node/219852')),
45 'severity' => $arc2 ? REQUIREMENT_OK : REQUIREMENT_ERROR,
46 );
47 }
48
49 return $requirements;
50 }
51
52 /**
53 * Implementation of hook_perm().
54 */
55 function calais_api_perm() {
56 return array('administer calais api');
57 }
58
59 /**
60 * Implementation of hook_menu().
61 */
62 function calais_api_menu() {
63 $items = array();
64
65 $items['admin/settings/calais-api'] = array(
66 'title' => 'Calais API Settings',
67 'description' => 'Configurations for Calais API',
68 'page callback' => 'drupal_get_form',
69 'page arguments' => array('calais_api_admin_settings'),
70 'access arguments' => array('administer calais api'),
71 );
72
73 return $items;
74 }
75
76 /**
77 * Build the admin settings form.
78 */
79 function calais_api_admin_settings() {
80 $form = array();
81 $calais_url = array(
82 '!calaisurl' => l(t('Calais Website'), 'http://www.opencalais.com/member/register')
83 );
84
85 $form['calais_api_key'] = array(
86 '#type' => 'textfield',
87 '#title' => t('Calais API Key'),
88 '#default_value' => variable_get('calais_api_key', NULL),
89 '#size' => 60,
90 '#description' => t('You need to obtain an API Key from the !calaisurl first', $calais_url),
91 );
92 $form['calais_api_allow_searching'] = array(
93 '#type' => 'checkbox',
94 '#title' => t('Allow Calais Searching'),
95 '#default_value' => variable_get('calais_api_allow_searching', NULL),
96 '#description' => t('Indicates whether future searches can be performed on the extracted metadata by Calais'),
97 );
98 $form['calais_api_allow_distribution'] = array(
99 '#type' => 'checkbox',
100 '#title' => t('Allow Calais Distribution'),
101 '#default_value' => variable_get('calais_api_allow_distribution', NULL),
102 '#description' => t('Indicates whether the extracted metadata can be distributed by Calais'),
103 );
104
105 $form = system_settings_form($form);
106 return $form;
107 }
108
109 /**
110 * Analyze the content via Calais.
111 *
112 * @param $content The content to ship off to Calais for analysis
113 * @param $node_type The node type for this content, if applicable.
114 * @param $parameters Array of Calais parameters for overriding defaults.
115 * @see calais_api_build_xml_params for applicable Parameter values.
116 */
117 function calais_api_analyze($node, $content, $parameters = array()) {
118
119 $HOST = "http://api.opencalais.com";
120 $URI = "/enlighten/calais.asmx/Enlighten";
121
122 $headers = array('Content-Type' => 'application/x-www-form-urlencoded');
123 $method = 'POST';
124
125 $data = array(
126 'licenseID' => variable_get('calais_api_key', NULL),
127 'content' => $content,
128 'paramsXML' => calais_api_xml_params($node->type, $parameters),
129 );
130
131 $data_enc = http_build_query($data, '', '&');
132 $ret = drupal_http_request($HOST . $URI, $headers, $method, $data_enc);
133 $xml = $ret->data;
134 $rdfxml = calais_api_extract_rdf($xml);
135 $keywords = calais_api_parse_rdf($rdfxml);
136
137 return $keywords;
138 }
139
140 /**
141 * Get the XML Parameters required by the Calais Web-Service based on the node type.
142 *
143 * @return an XML Document
144 */
145 function calais_api_xml_params($node_type, $parameters) {
146
147 $global_params = array(
148 'allowSearch' => variable_get('calais_api_allow_searching', false) ? 'true' : 'false',
149 'allowDistribution' => variable_get('calais_api_allow_distribution', false) ? 'true' : 'false',
150 );
151
152 $options = array_merge($global_params, $parameters);
153
154 return calais_api_build_xml_params($options);
155 }
156
157 /**
158 * Build the XML Parameters required by the Calais Web-Service
159 *
160 * Valid parameters are specified in the options array as key/value pairs with the
161 * parameter name being the key and the parameter setting being the value
162 * e.g. array('allowSearch' => 'false')
163 *
164 * @return XML document of Calais parameters.
165 * @see http://opencalais.mashery.com/page/documentation#inputparameters for valid parameters.
166 */
167 function calais_api_build_xml_params($options) {
168 $defaults = array(
169 'contentType' => 'TEXT/HTML',
170 'outputFormat' => 'XML/RDF',
171 'allowSearch' => 'false',
172 'allowDistribution' => 'false',
173 'externalID' => time(),
174 'submitter' => "Drupal Calais",
175 );
176
177 $attributes = array_merge($defaults, $options);
178
179 $ret = <<<EOD
180 <c:params xmlns:c="http://s.opencalais.com/1/pred/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
181 <c:processingDirectives c:contentType="$attributes[contentType]" c:outputFormat="$attributes[outputFormat]">
182 </c:processingDirectives>
183 <c:userDirectives c:allowDistribution="$attributes[allowDistribution]"
184 c:allowSearch="$attributes[allowSearch]"
185 c:externalID="$attributes[externalID]"
186 c:submitter="$attributes[submitter]">
187 </c:userDirectives>
188 <c:externalMetadata>
189 </c:externalMetadata>
190 </c:params>
191 EOD;
192 return $ret;
193 }
194
195 /**
196 * You need to understand how Calais responses are structured to parse its
197 * RDF.
198 *
199 * Samle element:
200 * [http://d.opencalais.com/pershash-1/fc0ac3c8-5d95-3625-bbd2-6e6fc3f1d703] => Array (
201 * 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' =>
202 * array (
203 * 0 => 'http://s.opencalais.com/1/type/em/e/Person',
204 * ),
205 * 'http://s.opencalais.com/1/pred/name' =>
206 * array (
207 * 0 => 'Diana',
208 * )
209 * )
210 */
211 function calais_api_parse_rdf($rdf_xml) {
212
213 $parser = ARC2::getRDFXMLParser();
214 $parser->parse(null, $rdf_xml);
215 $indexedTriples = $parser->getSimpleIndex();
216
217 $keywords = new StdClass();
218
219 foreach ($indexedTriples as $indx) {
220 $isKeyword = false;
221
222 foreach ($indx as $key => $value) {
223
224 if ($key == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') {
225 $keywordType = $value[0];
226 // take the last part only
227 $keywordType = preg_replace('/.*\//ims', '', $keywordType);
228 //$keywordType = strtolower($keywordType);
229 // Value of keyword will come in the next iteration, sorry.
230 $isKeyword = True;
231 }
232
233 if ($isKeyword == true && $key == 'http://s.opencalais.com/1/pred/name') {
234 // nullify to avoid double-passes
235 $isKeyword = False;
236 $keywordValue = $value[0];
237 if (!is_array($keywords->$keywordType)) {
238 $keywords->$keywordType = array();
239 }
240 //Gotta do "eval" to be able to use $arr[] = $nealemenet notation;
241 $keywordValue = db_escape_string($keywordValue);
242 eval("\$keywords->$keywordType". "[] = '$keywordValue';");
243 }
244 }
245 }
246
247 return $keywords;
248 }
249
250 /**
251 * For whatever reason Calais returns XML wrapped around the RDF, instead of
252 * pure RDF-XML, so we need to remove it.
253 *
254 * @return unknown
255 */
256 function calais_api_extract_rdf($data) {
257
258 $xml = html_entity_decode($data);
259
260 $success = preg_match('/<rdf:RDF.+\/rdf:RDF>/ims', $xml, $matches);
261 $rdfxml = $matches[0];
262
263 return $rdfxml;
264 }
265
266 /**
267 * Get a list of the entities that Calais API defines:
268 * http://opencalais.mashery.com/page/calaissemanticmetadata
269 *
270 * TODO: When Calais updates to have a static list at a URL or via API call, return that instead.
271 *
272 * @return flat array listing of Calais entities
273 */
274 function calais_api_get_all_entities() {
275 return array(
276 'Anniversary',
277 'City',
278 'Company',
279 'Continent',
280 'Country',
281 'Currency',
282 'EmailAddress',
283 'Facility',
284 'FaxNumber',
285 'Holiday',
286 'IndustryTerm',
287 'NaturalDisaster',
288 'NaturalFeature',
289 'Organization',
290 'Person',
291 'PhoneNumber',
292 'ProvinceOrState',
293 'Region',
294 'Technology',
295 'URL',
296 );
297 }
298

  ViewVC Help
Powered by ViewVC 1.1.2