| 1 |
<?php
|
| 2 |
/**
|
| 3 |
* @file
|
| 4 |
* Storage and retrieval of Drupal node content as plain HTML files
|
| 5 |
*
|
| 6 |
* Intent:
|
| 7 |
*
|
| 8 |
* Every time a Drupal node is saved, a corresponding file is updated. This
|
| 9 |
* means that content can be retained even somethime in the future when the
|
| 10 |
* database is unavailable.
|
| 11 |
*
|
| 12 |
* @see static_help.htm for details
|
| 13 |
*
|
| 14 |
* @package coders
|
| 15 |
* @author Dan Morrison http://coders.co.nz/
|
| 16 |
* @version $Id$
|
| 17 |
*
|
| 18 |
*/
|
| 19 |
|
| 20 |
set_include_path( dirname(__FILE__) .'/coders_php_library'. PATH_SEPARATOR . get_include_path());
|
| 21 |
|
| 22 |
/**
|
| 23 |
* @name Debug Flag
|
| 24 |
* Used for testing only
|
| 25 |
* @{
|
| 26 |
*/
|
| 27 |
if (! function_exists('debug')) {
|
| 28 |
require_once 'debug.inc';
|
| 29 |
}
|
| 30 |
debug_set_level(1);
|
| 31 |
/**
|
| 32 |
* @}
|
| 33 |
*/
|
| 34 |
|
| 35 |
include_once ('xml-transform.inc');
|
| 36 |
include_once ('file-routines.inc');
|
| 37 |
require_once ('tidy-functions.inc');
|
| 38 |
|
| 39 |
|
| 40 |
/**
|
| 41 |
* Refresh behaviours.
|
| 42 |
* Access the filesystem on save, on load, or on date check
|
| 43 |
*/
|
| 44 |
define("STATIC_PASSIVE", 1);
|
| 45 |
define("STATIC_AGGRESSIVE", 2);
|
| 46 |
define("STATIC_INTELLIGENT", 4);
|
| 47 |
|
| 48 |
|
| 49 |
/**
|
| 50 |
* Return help text describing this module
|
| 51 |
*
|
| 52 |
* @param $section string Context this help is being called from
|
| 53 |
* @return string
|
| 54 |
*/
|
| 55 |
function static_help($section) {
|
| 56 |
|
| 57 |
if (! extension_loaded( "dom" )) {
|
| 58 |
$message=t(" <em>Static serialization requires PHP DOM support (a PHP5 extension) . This is currently unavailable, and static writing will not work.</em>");
|
| 59 |
}
|
| 60 |
$description = t("Synchonizes all nodes with filesystem files. <b>experimental</b>") . $message;
|
| 61 |
|
| 62 |
switch ($section) {
|
| 63 |
case 'admin/modules#description' :
|
| 64 |
return $description;
|
| 65 |
case 'admin/modules/static' :
|
| 66 |
return $description;
|
| 67 |
case 'admin/settings/static' :
|
| 68 |
return $description;
|
| 69 |
case 'admin/help#static' :
|
| 70 |
return $message . file_get_contents(dirname(__FILE__) ."/static_help.htm");
|
| 71 |
}
|
| 72 |
return false;
|
| 73 |
}
|
| 74 |
|
| 75 |
/**
|
| 76 |
* Implementation of hook_menu() .
|
| 77 |
*/
|
| 78 |
function static_menu($may_cache) {
|
| 79 |
if (TRUE||$may_cache) {
|
| 80 |
$items[] = array(
|
| 81 |
'path' => 'admin/settings/static',
|
| 82 |
'title' => t('Static HTML Settings'),
|
| 83 |
'description' => t('Configure where and how the static HTML pages are mirrored.'),
|
| 84 |
'callback' => 'drupal_get_form',
|
| 85 |
'weight' => 1,
|
| 86 |
'callback arguments' => array('static_settings'),
|
| 87 |
'access' => user_access('administer site configuration'),
|
| 88 |
'type' => MENU_NORMAL_ITEM,
|
| 89 |
);
|
| 90 |
}
|
| 91 |
return $items;
|
| 92 |
}
|
| 93 |
|
| 94 |
|
| 95 |
/**
|
| 96 |
* Display the options and settings.
|
| 97 |
*
|
| 98 |
* @return FAPI Form
|
| 99 |
*/
|
| 100 |
function static_settings() {
|
| 101 |
$form = array();
|
| 102 |
|
| 103 |
$form['static_file_storage_path'] = array(
|
| 104 |
'#type' => 'textfield',
|
| 105 |
'#title' => t('Static file location'),
|
| 106 |
'#default_value' => ensure_trailing_slash( variable_get('static_file_storage_path', file_directory_path() .'/static/') ),
|
| 107 |
'#description' => t("
|
| 108 |
Where the static files are stored.
|
| 109 |
"),
|
| 110 |
);
|
| 111 |
|
| 112 |
$form['static_synchronization_behaviour'] = array(
|
| 113 |
'#type' => 'select',
|
| 114 |
'#title' => t("Synchronization Behaviour"),
|
| 115 |
'#default_value' => variable_get('static_synchronization_behaviour', STATIC_PASSIVE),
|
| 116 |
'#options' => array(
|
| 117 |
STATIC_PASSIVE => 'Write file on node save, never read' ,
|
| 118 |
STATIC_AGGRESSIVE => 'Read file every node access, write when updating',
|
| 119 |
STATIC_INTELLIGENT => 'Write on save, Read only if file time has changed',
|
| 120 |
) ,
|
| 121 |
'#description' => t("
|
| 122 |
How much reading and writing goes on. <ul>
|
| 123 |
<li>If 'never read' is chosen, static HTML just acts as a backup, retrieval system for when your database becomes unavailable. Very solid future-proofing for your content. It also means there is no danger of your data structures (custom node types and features) getting messed with.</li>
|
| 124 |
<li>If 'aggressive read/write' is chosen, you are using Drupal as an <em>interface</em> to what is effectively a filesystem-based website, as far as the node content is concerned.</li>
|
| 125 |
<li>If intelligent read/write is chosen (optimal) this works under Drupal as normal, saving useful backups, <em>and</em> allowing third-party apps to update content at the same time. This is the clever option.</li>
|
| 126 |
</ul>
|
| 127 |
Either of the options that read from disk <em>may</em> have a hard time reconstructing a node object back from a flat file if it is anything beyond a standard 'page'. Certain details may be lost in the round-trip process.
|
| 128 |
"),
|
| 129 |
);
|
| 130 |
|
| 131 |
$form['static_node_types'] = array(
|
| 132 |
'#type' => 'checkboxes',
|
| 133 |
'#title' => t('Node Types to apply to'),
|
| 134 |
'#default_value' => variable_get('static_node_types', array()),
|
| 135 |
'#options' => node_get_types('names'),
|
| 136 |
'#description' => t("
|
| 137 |
A list of node types you want to use with this module.
|
| 138 |
All the selected types will be synchronized with static files.
|
| 139 |
"),
|
| 140 |
);
|
| 141 |
|
| 142 |
$form['static_show_link'] = array(
|
| 143 |
'#type' => 'checkbox',
|
| 144 |
'#title' => t('Show link to archive file on page'),
|
| 145 |
'#default_value' => variable_get('static_show_link', FALSE),
|
| 146 |
);
|
| 147 |
|
| 148 |
$form['static_use_import_html_settings'] = array(
|
| 149 |
'#type' => 'checkbox',
|
| 150 |
'#title' => t('Use import HTML processing'),
|
| 151 |
'#default_value' => variable_get('static_use_import_html_settings', FALSE),
|
| 152 |
'#description' => t("
|
| 153 |
Static HTML is designed to read and write its own 'pure' XHTML files
|
| 154 |
in a lowest-common-denominator, semantically tagged way.
|
| 155 |
What comes in is expected to be the same as what goes out.
|
| 156 |
<br/>
|
| 157 |
However, it can also use the import_html translation pipeline on-the-fly.
|
| 158 |
This would be useful if uploading or editing raw HTML files behind Drupals back.
|
| 159 |
Doing so is likely to produce imperfect XHTML, so we must run the full import_html process each time such a file is read.
|
| 160 |
This is the same as running a full import_html process on that file, using the xsl template and all the preferences currently set
|
| 161 |
in the import_html settings.
|
| 162 |
<br/>
|
| 163 |
Enabling this may cause performance to suffer a bit, but the Synchronization Behaviour setting should cache the results for us.
|
| 164 |
"),
|
| 165 |
'#disabled' => (! module_exists('import_html')),
|
| 166 |
);
|
| 167 |
|
| 168 |
return system_settings_form($form);
|
| 169 |
|
| 170 |
}
|
| 171 |
|
| 172 |
|
| 173 |
function static_settings_form_validate($form_id, &$edit) {
|
| 174 |
// Ensure file path exists and is writable.
|
| 175 |
if (!is_dir($edit['static_file_storage_path'])) {
|
| 176 |
mkdir($edit['static_file_storage_path'], NULL, TRUE);
|
| 177 |
}
|
| 178 |
if (!is_writable($edit['static_file_storage_path'])) {
|
| 179 |
form_set_error('static_file_storage_path', "Storage path is not writable");
|
| 180 |
}
|
| 181 |
}
|
| 182 |
|
| 183 |
function static_link($type, $node) {
|
| 184 |
if (static_node_applies($node)) {
|
| 185 |
$filepath = static_node_path($node);
|
| 186 |
|
| 187 |
if (file_create_path($filepath)) {
|
| 188 |
$links[] = array(
|
| 189 |
'title' => 'archive',
|
| 190 |
'href' => file_create_url($filepath),
|
| 191 |
);
|
| 192 |
}
|
| 193 |
// file_create_url() returns a fully-justified URL, which I don't like
|
| 194 |
// however, with clean_urls off, ?q=/files/static/filename.htm isn't supported either.
|
| 195 |
|
| 196 |
}
|
| 197 |
return $links;
|
| 198 |
}
|
| 199 |
|
| 200 |
|
| 201 |
/**
|
| 202 |
* Hook Implimentation
|
| 203 |
*/
|
| 204 |
function static_nodeapi(& $node, $op, $teaser, $page) {
|
| 205 |
if (! static_node_applies($node)) {return;};
|
| 206 |
|
| 207 |
// Catch recursion. Loading a page can trigger an update,
|
| 208 |
// with will in turn trigger a save. Which updates the timestamp to make a load neccessary...
|
| 209 |
static $already_doing_this;
|
| 210 |
if ($already_doing_this[$node->nid .'-'. $op]) {return;}
|
| 211 |
$already_doing_this[$node->nid .'-'. $op] = TRUE;
|
| 212 |
|
| 213 |
switch ($op) {
|
| 214 |
case 'insert' :
|
| 215 |
case 'update' :
|
| 216 |
static_node_save($node);
|
| 217 |
break;
|
| 218 |
case 'load' :
|
| 219 |
return (array)static_node_load($node);
|
| 220 |
}
|
| 221 |
}
|
| 222 |
|
| 223 |
/**
|
| 224 |
* Return true if the admin has selected this node as participating in this
|
| 225 |
* functionality.
|
| 226 |
* Choose by node-type, or by virtue of a taxonomy classification (TODO)
|
| 227 |
*/
|
| 228 |
function static_node_applies($node) {
|
| 229 |
$active_node_types = variable_get('static_node_types', array());
|
| 230 |
if ($active_node_types[$node->type]) { return TRUE; }
|
| 231 |
|
| 232 |
return FALSE;
|
| 233 |
}
|
| 234 |
|
| 235 |
/**
|
| 236 |
* Return the path to save a node as.
|
| 237 |
*
|
| 238 |
* However - should 'this/thing' end up as 'this/thing.htm'
|
| 239 |
* or 'this/thing/index.htm' - as would be needed sometimes?
|
| 240 |
* 'this/thing/' will always imply 'index.htm' is expected
|
| 241 |
*/
|
| 242 |
function static_node_path($node) {
|
| 243 |
$base = variable_get('static_file_storage_path', 'files/static/');
|
| 244 |
|
| 245 |
$path = $node->path ? $node->path : 'node/'. $node->nid;
|
| 246 |
|
| 247 |
if (strrpos($path, '.') > strrpos($path, '/') ) {
|
| 248 |
// path has no suffix
|
| 249 |
// check if it's looking like overwriting a directory
|
| 250 |
if (is_dir($base . $path)) {
|
| 251 |
return $base . $path .'/'. variable_get('import_html_default_document', "index.htm");
|
| 252 |
}
|
| 253 |
}
|
| 254 |
else if (strrpos($path, '/') == strlen($path)-1) {
|
| 255 |
// ends with a slash
|
| 256 |
return $base . $path . variable_get('import_html_default_document', "index.htm");
|
| 257 |
}
|
| 258 |
return $base . preg_replace('|\.[^\.\/]+$|', '', $path) .".htm";
|
| 259 |
}
|
| 260 |
|
| 261 |
function static_node_save(&$node) {
|
| 262 |
debug("Saving node as static HTML file ". $node->path, 2);
|
| 263 |
|
| 264 |
$html = static_node_to_html($node);
|
| 265 |
if (! is_string($html)) {return FALSE;} // failure
|
| 266 |
|
| 267 |
$filepath = static_node_path($node);
|
| 268 |
debug("Saving node to file ". l($filepath, $filepath) , 2);
|
| 269 |
#debug_pre($html);
|
| 270 |
|
| 271 |
$filedir = dirname($filepath);
|
| 272 |
if (!$filedir) {trigger_error("static.module Trying to save a node with no path, this should never happen."); return;}
|
| 273 |
|
| 274 |
if (!is_dir($filedir)) { mkdirs($filedir); }
|
| 275 |
if (!is_dir($filedir)) { trigger_error("directory '$filedir' doesn't exist and couldn't be created"); }
|
| 276 |
$success = file_put_contents($filepath, $html);
|
| 277 |
chmod($filepath, 0775); // add group write, it's annoying otherwise
|
| 278 |
|
| 279 |
if ($success) {
|
| 280 |
drupal_set_message(t("A static version of this page has been archived as !archive_link", array('!archive_link' => l($filepath, file_create_url($filepath)) )));
|
| 281 |
}
|
| 282 |
|
| 283 |
return $success;
|
| 284 |
}
|
| 285 |
|
| 286 |
|
| 287 |
function static_node_load(&$node) {
|
| 288 |
debug("Possibly loading node from static! ". $node->path, 2);
|
| 289 |
$behaviour = variable_get('static_synchronization_behaviour', STATIC_PASSIVE);
|
| 290 |
if ($behaviour == STATIC_PASSIVE) {
|
| 291 |
debug("Static HTML in passive mode. Not doing anything.", 2);
|
| 292 |
return;
|
| 293 |
}
|
| 294 |
$filepath = static_node_path($node);
|
| 295 |
|
| 296 |
if (! is_file($filepath)) {
|
| 297 |
debug("No static backup to retrieve", 2);
|
| 298 |
return;
|
| 299 |
}
|
| 300 |
|
| 301 |
debug(t("Filesystem file was dated %fs_time while the database node is dated %db_time", array('%fs_time' => filectime( $filepath ), '%db_time' => $node->changed)), 2);
|
| 302 |
|
| 303 |
if (($behaviour == STATIC_INTELLIGENT) && ($node->changed >= filectime( $filepath ))) {
|
| 304 |
// no recent change
|
| 305 |
debug("No recent change in $filepath . Node time: $node->changed >= File time: ". filectime( $filepath ) , 2);
|
| 306 |
return;
|
| 307 |
}
|
| 308 |
|
| 309 |
debug("Loading node statically from ". $filepath, 2);
|
| 310 |
|
| 311 |
if (! module_exists('import_html')) {
|
| 312 |
// Enable the library, even if the module is disabled.
|
| 313 |
require_once('import_html.module');
|
| 314 |
}
|
| 315 |
|
| 316 |
|
| 317 |
// We should trust these files to be pure, no more validating or translating needed
|
| 318 |
// HOWEVER
|
| 319 |
// If reading from a non XHTML source (like raw XML)
|
| 320 |
// extra processing (found in import_html) could be called here.
|
| 321 |
|
| 322 |
if (variable_get('static_use_import_html_settings', FALSE)) {
|
| 323 |
debug("Using import_html transformation to initialize node from raw HTML file ". $filepath, 2);
|
| 324 |
$nodes = _import_html_process_html_page($filepath, $node->path);
|
| 325 |
$scanned_node = array_pop($nodes);
|
| 326 |
}
|
| 327 |
else {
|
| 328 |
// trust the file is nice and tidy like we left it.
|
| 329 |
$xmldoc = parse_in_xml_file($filepath, false);
|
| 330 |
if ($xmldoc) {
|
| 331 |
$scanned_node = import_html_xhtml_to_node($xmldoc);
|
| 332 |
}
|
| 333 |
}
|
| 334 |
|
| 335 |
if ($scanned_node) {
|
| 336 |
|
| 337 |
// First do a rough over-write of all values
|
| 338 |
foreach ($scanned_node as $key => $val) {
|
| 339 |
$node->$key = $val;
|
| 340 |
}
|
| 341 |
|
| 342 |
// Then possibly replace it with more advanced settings.
|
| 343 |
// import/export rules and callbacks are defined in a big reference array
|
| 344 |
|
| 345 |
$field_defs = static_def();
|
| 346 |
|
| 347 |
// merge defined fields back into node object
|
| 348 |
// Can't just replace it, as it's modified by ref.
|
| 349 |
|
| 350 |
debug("Merging values loaded from file over the existing node.", 2);
|
| 351 |
watchdog('static_html', "Updating node, Merging values loaded from recently modified file $filepath over the existing node $node->nid $node->path.", 2);
|
| 352 |
foreach ($field_defs as $key => $def) {
|
| 353 |
if ( ! element_child($key) ) { continue ; }
|
| 354 |
|
| 355 |
// Change the label if needed.
|
| 356 |
// Some of the properties need renaming back - 'identifier' in the doc is 'key' in the node
|
| 357 |
if ( isset($def['#identifier']) && isset($scanned_node->$def['#identifier'])) {
|
| 358 |
$node->$key = $scanned_node->$def['#identifier'];
|
| 359 |
debug("Replaced value of '". $key ."' with value of '". $def['#identifier'] ."' from file ", 2);
|
| 360 |
continue;
|
| 361 |
}
|
| 362 |
// Or if it's indexed by #key in the source, copy straight across
|
| 363 |
if ( $def['#key'] && isset($scanned_node->$key)) {
|
| 364 |
$node->$key = $scanned_node->$key;
|
| 365 |
debug("Replaced value of '". $key ."' from file " , 2);
|
| 366 |
}
|
| 367 |
}
|
| 368 |
# debug_pre($node);
|
| 369 |
// Now it's loaded, and different, save it back to the database
|
| 370 |
// so we don't have to read again. This will absorb changes and update the timestamp.
|
| 371 |
// ... which will in turn prompt a re-save to file etc :( recursive.
|
| 372 |
node_save($node);
|
| 373 |
|
| 374 |
}
|
| 375 |
else {
|
| 376 |
drupal_set_message(t("Failed to parse file from $filepath. It should contain info about this node."));
|
| 377 |
}
|
| 378 |
|
| 379 |
return $node;
|
| 380 |
}
|
| 381 |
|
| 382 |
|
| 383 |
function static_node_to_html($node) {
|
| 384 |
//importexport api should be our friend here.
|
| 385 |
// but I really don't understand it yet
|
| 386 |
|
| 387 |
if (! extension_loaded( "dom" )) {
|
| 388 |
drupal_set_message("Static serialization requires PHP DOM support (a PHP5 extension)", 'error');
|
| 389 |
return;
|
| 390 |
}
|
| 391 |
|
| 392 |
// Construct HTML page by XML
|
| 393 |
// Pretty tedious, but foolproof if I do it right.
|
| 394 |
// No support for non-XML compliant versions of PHP, sorry.
|
| 395 |
$doc = new domdocument('1.0', 'UTF-8');
|
| 396 |
$doc->formatoutput = true;
|
| 397 |
|
| 398 |
$html = $doc->createelementns('http://www.w3.org/1999/xhtml', 'html');
|
| 399 |
$doc->appendchild( $html );
|
| 400 |
|
| 401 |
$head = $doc->createelement('head');
|
| 402 |
$html->appendchild( $head );
|
| 403 |
|
| 404 |
// Three-step process to ensure text in titles is escaped (no & problems)
|
| 405 |
// createtextnode is safer than # $doc->createelement('title', $node->title)
|
| 406 |
$title = $doc->createelement('title');
|
| 407 |
$title->appendchild($doc->createtextnode($node->title));
|
| 408 |
$head->appendchild($title);
|
| 409 |
|
| 410 |
$body = $doc->createelement('body');
|
| 411 |
$html->appendchild( $body );
|
| 412 |
|
| 413 |
$body->appendchild( $doc->createtextnode("\n") ); // Just layout
|
| 414 |
|
| 415 |
$heading = $doc->createelement('h1');
|
| 416 |
$heading->appendchild($doc->createtextnode($node->title));
|
| 417 |
$body->appendchild($heading);
|
| 418 |
$body->appendchild( $doc->createtextnode("\n") ); // Just layout
|
| 419 |
|
| 420 |
// The import content is constructed in its own XML fragment
|
| 421 |
// before being inserted in the page. This is a temporary document variable.
|
| 422 |
$content_doc = new domdocument();
|
| 423 |
|
| 424 |
// This runs the filters over the content, preparing the body for display.
|
| 425 |
// Also, inevitably, adding some theme cruft for CCK bits and things.
|
| 426 |
node_view($node);
|
| 427 |
|
| 428 |
dpm($node);
|
| 429 |
// When serializing, we can either dump a screen-view, or try to
|
| 430 |
// annotate it our way.
|
| 431 |
if(variable_get('static_save_semantically', TRUE)){
|
| 432 |
// Avoid the preformatting as much as possible,
|
| 433 |
// Try to place the respective data bits in their own divs, instead of blobbing it altogether.
|
| 434 |
// CCK now allows us to do that with the $node->content array.
|
| 435 |
// We will get less formatting and no labels, but more semantics
|
| 436 |
|
| 437 |
// Look up what we know about the Construction of this node,
|
| 438 |
// so as to decide how to save/encode its bits.
|
| 439 |
$content_def=content_types($node->type);
|
| 440 |
# dpm($content_def);
|
| 441 |
|
| 442 |
// Iterate the 'content' array looking for fields to add/render
|
| 443 |
foreach(element_children($node->content) as $element_type) {
|
| 444 |
$content_element = $node->content[$element_type];
|
| 445 |
dpm("Need to add $element_type to the static HTML dump");
|
| 446 |
|
| 447 |
if(($node_values = $node->$element_type) && is_array($node_values)){
|
| 448 |
|
| 449 |
// This appears to be shaped like a CCK field
|
| 450 |
|
| 451 |
// TODO May use the text_processing value here
|
| 452 |
# $field_def = $content_def['fields'][$element_type];
|
| 453 |
// currently don't care however
|
| 454 |
|
| 455 |
foreach($node_values as $value_ix => $element_data){
|
| 456 |
// Add a div with this data
|
| 457 |
$field = $doc->createelement('div');
|
| 458 |
|
| 459 |
// As we are being clean and semantic,
|
| 460 |
// I can discard the redundant 'field_' label from the classname.
|
| 461 |
// The import process will recognise it with or without.
|
| 462 |
if(strpos($element_type,'field_') === 0){
|
| 463 |
$field_label = substr($element_type, 6);
|
| 464 |
}
|
| 465 |
else {
|
| 466 |
$field_label = $element_type;
|
| 467 |
}
|
| 468 |
|
| 469 |
$field->setattribute('class', $field_label);
|
| 470 |
|
| 471 |
// Should I insert Raw or cooked value? See the field_def
|
| 472 |
$field->appendchild( $doc->createtextnode($element_data['view']) );
|
| 473 |
$body->appendchild($field);
|
| 474 |
}
|
| 475 |
}
|
| 476 |
else {
|
| 477 |
// Not an array, may be older-style note annotations - body, image_attach or others
|
| 478 |
// Can't be clever, just tag and inline the cooked text.
|
| 479 |
// ... But tidy it first to be really paranoid
|
| 480 |
$tidied = xml_tidy_fragment( '<div id="'. $element_type .'">'. $content_element['#value'] .'</div>');
|
| 481 |
$content_doc->loadxml($tidied);
|
| 482 |
// Um, it seems firstchild not work for domdocuments in php5. Iterate to find the first node instead
|
| 483 |
foreach($content_doc->childNodes as $childnode){
|
| 484 |
$local_node = $doc->importnode($childnode, true);
|
| 485 |
$body->appendchild($local_node);
|
| 486 |
}
|
| 487 |
// Done carefully importing the tidy content string
|
| 488 |
}
|
| 489 |
$body->appendchild( $doc->createtextnode("\n") ); // Just formatting
|
| 490 |
}
|
| 491 |
}
|
| 492 |
else {
|
| 493 |
// Save the Original, raw-content text version. Pretty much as rendered to the screen.
|
| 494 |
// This may not always be parseable next time!
|
| 495 |
|
| 496 |
// Need to ensure that the text coming from Drupal is valid before serializing it.
|
| 497 |
$tidied = xml_tidy_fragment( '<div id="content">'. $node->body .'</div>');
|
| 498 |
$content_doc->loadxml($tidied);
|
| 499 |
|
| 500 |
// Um, it seems firstchild not work for domdocuments in php5. Iterate to find the first node instead
|
| 501 |
foreach($content_doc->childNodes as $childnode){
|
| 502 |
$local_node = $doc->importnode($childnode, true);
|
| 503 |
$body->appendchild($local_node);
|
| 504 |
}
|
| 505 |
}
|
| 506 |
|
| 507 |
// We now have an extremely valid, vanilla HTML page.
|
| 508 |
// Add more info to its header!
|
| 509 |
|
| 510 |
// Use the Dublin Core Schema if I can
|
| 511 |
$meta = $doc->createelement('link');
|
| 512 |
$meta->setattribute('rel', 'schema.DC');
|
| 513 |
$meta->setattribute('href', 'http://purl.org/dc/elements/1.1/');
|
| 514 |
$head->appendchild($meta);
|
| 515 |
|
| 516 |
$field_defs = static_def();
|
| 517 |
foreach ($field_defs as $key => $def) {
|
| 518 |
if ($node->$key) {
|
| 519 |
|
| 520 |
if ($def['#callback']) {
|
| 521 |
// If the data element has defined a special way to insert itself into the HTML
|
| 522 |
// version, allow it to do so. Call the function with a set of arguments.
|
| 523 |
// Doc is passed by ref, and may be manipulated by the callback.
|
| 524 |
$element = $def['#callback']($node, $doc);
|
| 525 |
|
| 526 |
// No return means it inserted itself by reference. cool. Stop now.
|
| 527 |
// Returning a string means we are expectected to now add that string value.
|
| 528 |
// do that next.
|
| 529 |
if (! $element) {
|
| 530 |
next; // Skip the default addition step
|
| 531 |
}
|
| 532 |
else {
|
| 533 |
// Set the value as cooked, The next step will insert this value as a meta
|
| 534 |
$node->$key = $element;
|
| 535 |
}
|
| 536 |
}
|
| 537 |
|
| 538 |
if ($def['#format']) {
|
| 539 |
# 'Cook' this value before saving it using the defined processing callback.
|
| 540 |
# The keyed element may even be an array or object, but the formatter callback must know how to deal with it.
|
| 541 |
# Use this to convert an ID reference to a real string, etc.
|
| 542 |
$node->$key = $def['#format']($node->$key);
|
| 543 |
}
|
| 544 |
|
| 545 |
if ($def['#identifier']) {
|
| 546 |
$meta = $doc->createelement('meta');
|
| 547 |
$meta->setattribute('name', $def['#identifier']);
|
| 548 |
$meta->setattribute('content', $node->$key);
|
| 549 |
$head->appendchild($meta);
|
| 550 |
}
|
| 551 |
|
| 552 |
}
|
| 553 |
}
|
| 554 |
|
| 555 |
$meta = $doc->createelement('meta');
|
| 556 |
$meta->setattribute('name', 'Generator');
|
| 557 |
$meta->setattribute('content', 'Drupal:static-archive');
|
| 558 |
$head->appendchild($meta);
|
| 559 |
|
| 560 |
$result = $doc->savexml();
|
| 561 |
// note, this is pure XHTML, with the singleton <meta /> tags, not html with the open ones.
|
| 562 |
// this is due to conflict with Drupal newline filter
|
| 563 |
// $result = xml_tidy_brs($result);
|
| 564 |
|
| 565 |
return $result;
|
| 566 |
}
|
| 567 |
|
| 568 |
/**
|
| 569 |
* Implementation of hook_def() .
|
| 570 |
* Try and do what importapi is doing.
|
| 571 |
* This is a mapping definition to translate node properties to property labels.
|
| 572 |
* '#alt_key_for' is the drupal object key, (the definition key goes missing
|
| 573 |
* when using get_element_children) .
|
| 574 |
* Defs that have an 'identifier' get treated as meta-info
|
| 575 |
* Defs that are 'key' get copied 1:1
|
| 576 |
*
|
| 577 |
* Defs that have #format set, call the named callback and save the resulting
|
| 578 |
* sring as a meta in the document head.
|
| 579 |
*
|
| 580 |
* Defs that have #serialize set, call the named callback and are expected to
|
| 581 |
* append the appropriate value to the document object themselves.
|
| 582 |
*
|
| 583 |
* Setting both a #serialize callback and a #format callback is not a good idea.
|
| 584 |
* They differ mainly in that #serialize takes both $node and $doc arguments and
|
| 585 |
* manipulates them, while #format takes a string and returns a nicer string.
|
| 586 |
*
|
| 587 |
*/
|
| 588 |
function static_def() {
|
| 589 |
$def = array(
|
| 590 |
'#type' => 'entity',
|
| 591 |
'#title' => t('Drupal node'),
|
| 592 |
);
|
| 593 |
|
| 594 |
$def['body'] = array(
|
| 595 |
'#title' => t('Node Body'),
|
| 596 |
'#key' => TRUE
|
| 597 |
);
|
| 598 |
$def['title'] = array(
|
| 599 |
'#title' => t('Node Title'),
|
| 600 |
'#key' => TRUE
|
| 601 |
);
|
| 602 |
$def['nid'] = array(
|
| 603 |
'#type' => 'int',
|
| 604 |
'#title' => t('Node ID'),
|
| 605 |
'#identifier' => 'drupal:nid',
|
| 606 |
);
|
| 607 |
$def['path'] = array(
|
| 608 |
'#title' => t('Path'),
|
| 609 |
'#identifier' => 'drupal:path',
|
| 610 |
);
|
| 611 |
$def['type'] = array(
|
| 612 |
'#title' => t('Drupal Node Type'),
|
| 613 |
'#identifier' => 'drupal:type',
|
| 614 |
);
|
| 615 |
|
| 616 |
$def['name'] = array(
|
| 617 |
'#type' => 'string',
|
| 618 |
'#title' => t('Creator'),
|
| 619 |
'#identifier' => 'DC:creator',
|
| 620 |
);
|
| 621 |
$def['teaser'] = array(
|
| 622 |
'#type' => 'string',
|
| 623 |
'#title' => t('Description'),
|
| 624 |
'#identifier' => 'DC:description',
|
| 625 |
'#format' => 'static_format_description'
|
| 626 |
);
|
| 627 |
|
| 628 |
$def['created'] = array(
|
| 629 |
'#type' => 'int',
|
| 630 |
'#title' => t('Date Created'),
|
| 631 |
'#identifier' => 'DC:created',
|
| 632 |
'#format' => 'static_format_timestamp_as_date',
|
| 633 |
);
|
| 634 |
$def['changed'] = array(
|
| 635 |
'#type' => 'int',
|
| 636 |
'#title' => t('Date Modified'),
|
| 637 |
'#identifier' => 'DC:modified',
|
| 638 |
'#format' => 'static_format_timestamp_as_date',
|
| 639 |
);
|
| 640 |
|
| 641 |
$def['taxonomy'] = array(
|
| 642 |
'#identifier' => 'DC:subject',
|
| 643 |
'#format' => 'static_format_taxonomy_as_keywords',
|
| 644 |
);
|
| 645 |
|
| 646 |
// Additional defs added by various modules
|
| 647 |
// This list will have to be updated over time.
|
| 648 |
|
| 649 |
// image_attach.module
|
| 650 |
$def['iid'] = array(
|
| 651 |
'#title' => t('Attached Image ID'),
|
| 652 |
'#type' => 'int',
|
| 653 |
'#identifier' => 'drupal:iid',
|
| 654 |
);
|
| 655 |
|
| 656 |
|
| 657 |
return $def;
|
| 658 |
}
|
| 659 |
|
| 660 |
/**
|
| 661 |
* Short callback function to serialize the node description (teaser) into
|
| 662 |
* something worth saving.
|
| 663 |
*
|
| 664 |
* A static_html serialization callback.
|
| 665 |
* @param The Source Node Object
|
| 666 |
* @param The XHTML Document this value is to be inserted into. Modify by
|
| 667 |
* reference
|
| 668 |
*
|
| 669 |
* @return If set, add the value as a META in the header of the document. If no
|
| 670 |
* return, assume the $doc has been updated as required, eg by adding an element
|
| 671 |
* to the body.
|
| 672 |
*/
|
| 673 |
function static_serialize_description($node=NULL, &$doc=NULL) {
|
| 674 |
return strip_tags($node->teaser);
|
| 675 |
}
|
| 676 |
function static_format_description($description) {
|
| 677 |
return strip_tags($description);
|
| 678 |
}
|
| 679 |
function static_format_timestamp_as_date($timestamp) {
|
| 680 |
return date('Y-m-d H:i:s', $timestamp);
|
| 681 |
}
|
| 682 |
|
| 683 |
function static_format_taxonomy_as_keywords($taxonomy_array) {
|
| 684 |
if (! is_array($taxonomy_array)) {return;}
|
| 685 |
|
| 686 |
$keywords = array();
|
| 687 |
// Although the node object on load has a more structured shape,
|
| 688 |
// on SAVE, when we get to see it, it may be just the plain
|
| 689 |
// $node->taxonomy[tags] = array('1'=>'Drupal', '33'=>'Development');
|
| 690 |
//
|
| 691 |
// I need to absorb either method taxonomy has saved it.
|
| 692 |
//
|
| 693 |
if ($tagnames = $taxonomy_array['tags']) {
|
| 694 |
// Just a tid=>label array
|
| 695 |
foreach ((array)$tagnames as $tid => $term) {
|
| 696 |
$keywords[] = $term;
|
| 697 |
}
|
| 698 |
}
|
| 699 |
else {
|
| 700 |
// Nice full term definitions
|
| 701 |
foreach ((array)$taxonomy_array as $term) {
|
| 702 |
$keywords[] = $term->name;
|
| 703 |
}
|
| 704 |
}
|
| 705 |
return join(', ', $keywords);
|
| 706 |
}
|
| 707 |
|
| 708 |
/**
|
| 709 |
* Implementation of hook_install() .
|
| 710 |
*/
|
| 711 |
function static_install() {
|
| 712 |
// ensure this always runs AFTER core stuff has done its nodeapi;
|
| 713 |
// path_nodeapi('load') for example needs to have run before we call load
|
| 714 |
db_query("UPDATE {system} SET weight = 3 WHERE name = 'static'");
|
| 715 |
drupal_set_message(t("Static HTML has been enabled. This module will keep a plaintext, updated version of your nodes on the file system, once you have configured the storage policies in <a href='!static_settings_link'>the static settings page</a>>", array('!static_settings_link' => '/admin/settings/static') ));
|
| 716 |
|
| 717 |
}
|