| 1 |
<?php
|
| 2 |
//ELMS: HTML Export - Export your drupal site to HTML
|
| 3 |
//Copyright (C) 2008 The Pennsylvania State University
|
| 4 |
//
|
| 5 |
//Bryan Ollendyke
|
| 6 |
//bto108@psu.edu
|
| 7 |
//
|
| 8 |
//Keith D. Bailey
|
| 9 |
//kdb163@psu.edu
|
| 10 |
//
|
| 11 |
//12 Borland
|
| 12 |
//University Park, PA 16802
|
| 13 |
//
|
| 14 |
//This program is free software; you can redistribute it and/or modify
|
| 15 |
//it under the terms of the GNU General Public License as published by
|
| 16 |
//the Free Software Foundation; either version 2 of the License, or
|
| 17 |
//(at your option) any later version.
|
| 18 |
//
|
| 19 |
//This program is distributed in the hope that it will be useful,
|
| 20 |
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 21 |
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 22 |
//GNU General Public License for more details.
|
| 23 |
//
|
| 24 |
//You should have received a copy of the GNU General Public License along
|
| 25 |
//with this program; if not, write to the Free Software Foundation, Inc.,
|
| 26 |
//51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
| 27 |
|
| 28 |
/**
|
| 29 |
* Implementation of hook_help
|
| 30 |
*/
|
| 31 |
function html_export_help($section) {
|
| 32 |
switch ($section) {
|
| 33 |
case 'admin':
|
| 34 |
return t("HTML Export lets you export your drupal site to static HTML.");
|
| 35 |
}
|
| 36 |
}
|
| 37 |
|
| 38 |
/**
|
| 39 |
* Implementation of hook_menu
|
| 40 |
*/
|
| 41 |
function html_export_menu() {
|
| 42 |
$items = array();
|
| 43 |
$items['admin/settings/html_export'] = array(
|
| 44 |
'title' => 'HTML Export',
|
| 45 |
'description' => 'Export your drupal site to static html page',
|
| 46 |
'page callback' => 'drupal_get_form',
|
| 47 |
'page arguments' => array('html_export_settings'),
|
| 48 |
'access arguments' => array('access administration pages'),
|
| 49 |
'type' => MENU_NORMAL_ITEM,
|
| 50 |
);
|
| 51 |
return $items;
|
| 52 |
}
|
| 53 |
/**
|
| 54 |
* Implementation of hook_settings
|
| 55 |
*/
|
| 56 |
function html_export_settings() {
|
| 57 |
$form["html_export"] = array(
|
| 58 |
'#type' => 'radios',
|
| 59 |
'#title' => t("Publish site to HTML?"),
|
| 60 |
'#default_value' => 0,
|
| 61 |
'#options' => array(0 => 'No',1 => 'Yes'),
|
| 62 |
'#required' => true,
|
| 63 |
);
|
| 64 |
$form['#submit'] = array('html_export_settings_submit');
|
| 65 |
return system_settings_form($form);
|
| 66 |
}
|
| 67 |
/**
|
| 68 |
* Implementation of hook_settings_submit
|
| 69 |
*/
|
| 70 |
function html_export_settings_submit($form_id, $form_values) {
|
| 71 |
if($form_values["values"]["html_export"] == 1) {
|
| 72 |
$clean = variable_get('clean_url',0);
|
| 73 |
//turn clean URLs off temporarily if they are on
|
| 74 |
if($clean){
|
| 75 |
variable_set('clean_url',0);
|
| 76 |
}
|
| 77 |
$root = substr($_SERVER['HTTP_REFERER'],0,strpos($_SERVER['HTTP_REFERER'],$_GET['q']));
|
| 78 |
//drupal_set_message($root);
|
| 79 |
//remove the ?q= if clean URLs are off
|
| 80 |
if(strpos($root,'?q=') != 0){
|
| 81 |
$root = substr($root,0,strpos($root,'?q='));
|
| 82 |
}
|
| 83 |
//create a folder html_export to put the directory in
|
| 84 |
$dir = file_create_path(file_directory_path() . '/html_export');
|
| 85 |
file_check_directory($dir, 1);
|
| 86 |
$export_path = $dir . '/export' . time();
|
| 87 |
file_check_directory(file_create_path($export_path),1);
|
| 88 |
file_check_directory(file_create_path($export_path . '/' . file_directory_path()),1);
|
| 89 |
file_check_directory(file_create_path($export_path . '/sites'),1);
|
| 90 |
file_check_directory(file_create_path($export_path . '/modules'),1);
|
| 91 |
file_check_directory(file_create_path($export_path . '/themes'),1);
|
| 92 |
file_check_directory(file_create_path($export_path . '/misc'),1);
|
| 93 |
|
| 94 |
$export_path = str_replace('index.php','',$_SERVER['PATH_TRANSLATED']) . $export_path;
|
| 95 |
|
| 96 |
//run the copyr function, modified to work with zip archive; copy the files,themes,sites,and misc directories
|
| 97 |
//_html_export_copyr(str_replace('index.php','',$_SERVER['PATH_TRANSLATED']) . file_directory_path(),$export_path . '/' . file_directory_path());
|
| 98 |
_html_export_copyr(str_replace('index.php','',$_SERVER['PATH_TRANSLATED']) . 'sites',$export_path . '/sites');
|
| 99 |
_html_export_copyr(str_replace('index.php','',$_SERVER['PATH_TRANSLATED']) . 'modules',$export_path . '/modules');
|
| 100 |
_html_export_copyr(str_replace('index.php','',$_SERVER['PATH_TRANSLATED']) . 'themes',$export_path . '/themes');
|
| 101 |
_html_export_copyr(str_replace('index.php','',$_SERVER['PATH_TRANSLATED']) . 'misc',$export_path . '/misc');
|
| 102 |
|
| 103 |
//grab all the nodes in the system that are published and then build out a list of url's to rename in the rendered code.
|
| 104 |
//similar to url rewrite and will need to take that into account eventually
|
| 105 |
$result = db_query("SELECT nid FROM {node} WHERE status=1 ORDER BY nid DESC");
|
| 106 |
$nids = array();
|
| 107 |
while($node = db_fetch_array($result)){
|
| 108 |
$url = url('node/' . $node['nid']);
|
| 109 |
if(strpos(' ' . $url,'/?q=') != 0){
|
| 110 |
$url = substr($url,4 + strpos($url,'/?q='));
|
| 111 |
}
|
| 112 |
if($url == 'node/' . $node['nid']){
|
| 113 |
$nids['node/' . $node['nid']] = 'page' . $node['nid'] . '.html';
|
| 114 |
}else{
|
| 115 |
$tmp_url = $url;
|
| 116 |
//this removes the fake extension if one exists
|
| 117 |
$tmp_url = str_replace(".html","",$tmp_url);
|
| 118 |
$tmp_url = str_replace(".htm","",$tmp_url);
|
| 119 |
$tmp_url = str_replace(".shtml","",$tmp_url);
|
| 120 |
$tmp_url = str_replace(".php","",$tmp_url);
|
| 121 |
$tmp_url = str_replace(".asp","",$tmp_url);
|
| 122 |
//this will remove everything that isn't a letter or number and replace it with a dash
|
| 123 |
//this will allow custom url paths to still remain yet be translated correctly
|
| 124 |
$tmp_url=preg_replace('/[^0-9a-z ]+/i', '-', $tmp_url);
|
| 125 |
$tmp_url=preg_replace('/[^\w\d\s]+/i', '-', $tmp_url);
|
| 126 |
|
| 127 |
$nids[$url] = $tmp_url . '.html';
|
| 128 |
$nids['node/' . $node['nid']] = $tmp_url . '.html';
|
| 129 |
|
| 130 |
}
|
| 131 |
}
|
| 132 |
//run through all the nodes and render pages to add to the zip file
|
| 133 |
$result = db_query("SELECT nid FROM {node} WHERE status=1 ORDER BY nid DESC");
|
| 134 |
while($node = db_fetch_array($result)){
|
| 135 |
$drupal_site = drupal_http_request($root . "index.php?q=node/" . $node['nid']);
|
| 136 |
//drupal_set_message($root . "index.php?q=node/" . $node['nid']);
|
| 137 |
$data = $drupal_site->data;
|
| 138 |
//strip out file paths that have the full server in them
|
| 139 |
$data = str_replace($root . base_path(),"",$data);
|
| 140 |
$data = str_replace($root,"",$data);
|
| 141 |
|
| 142 |
//strip out just the node/ if it's left over and replace it with the correct form of the link so that they actually find each other
|
| 143 |
foreach($nids as $key => $nidpath){
|
| 144 |
//get rid of a base path if there is one
|
| 145 |
if(base_path() != '/'){
|
| 146 |
$data = str_replace(base_path(),'',$data);
|
| 147 |
}
|
| 148 |
//account for links back to home where they are just a backslash cause it's at the root
|
| 149 |
$data = str_replace('index.php/?q=' . $key,$nidpath,$data);
|
| 150 |
$data = str_replace('index.php?q=' . $key,$nidpath,$data);
|
| 151 |
$data = str_replace('/?q=' . $key,$nidpath,$data);
|
| 152 |
$data = str_replace('?q=' . $key,$nidpath,$data);
|
| 153 |
}
|
| 154 |
$data = str_replace('?q=','',$data);
|
| 155 |
$data = str_replace('<a href="/"','<a href="index.html"',$data);
|
| 156 |
$data = str_replace('<a href=""','<a href="index.html"',$data);
|
| 157 |
$file = fopen($export_path . "/" . $nids['node/' . $node['nid']],"w");
|
| 158 |
fwrite($file,$data);
|
| 159 |
fclose($file);
|
| 160 |
}
|
| 161 |
|
| 162 |
$drupal_site = drupal_http_request($root . "index.php");
|
| 163 |
$data = $drupal_site->data;
|
| 164 |
//strip out file paths that have the full server in them
|
| 165 |
//$data = str_replace($root . base_path(),"",$data);
|
| 166 |
//$data = str_replace($root,"",$data);
|
| 167 |
//strip out just the node/ if it's left over and replace it with the correct form of the link so that they actually find each other
|
| 168 |
foreach($nids as $key => $nidpath){
|
| 169 |
if(base_path() != '/'){
|
| 170 |
$data = str_replace(base_path(),'',$data);
|
| 171 |
}
|
| 172 |
//account for links back to home where they are just a backslash cause it's at the root
|
| 173 |
$data = str_replace('index.php/?q=' . $key,$nidpath,$data);
|
| 174 |
$data = str_replace('index.php?q=' . $key,$nidpath,$data);
|
| 175 |
$data = str_replace('/?q=' . $key,$nidpath,$data);
|
| 176 |
$data = str_replace('?q=' . $key,$nidpath,$data);
|
| 177 |
}
|
| 178 |
$data = str_replace('?q=','',$data);
|
| 179 |
//try to account for links to nowhere because they should point Home
|
| 180 |
$data = str_replace('<a href="/"','<a href="index.html"',$data);
|
| 181 |
$data = str_replace('<a href=""','<a href="index.html"',$data);
|
| 182 |
$file = fopen($export_path . "/index.html","w");
|
| 183 |
fwrite($file,$data);
|
| 184 |
fclose($file);
|
| 185 |
//turn clean URLs back on if it was off temporarily
|
| 186 |
if($clean){
|
| 187 |
variable_set('clean_url',1);
|
| 188 |
}
|
| 189 |
//need to generate a list of modules and themes to copy as well as files directory except for html_export folder
|
| 190 |
drupal_set_message("If you don't see any errors the site was exported successfully! <a href='" . base_path() . substr($export_path,strpos($export_path,$dir)) . "/index.html' target='_blank'>Click</a> here to access the export.");
|
| 191 |
}
|
| 192 |
}
|
| 193 |
|
| 194 |
function _html_export_copyr($source, $dest){
|
| 195 |
// Simple copy for a file
|
| 196 |
if (is_file($source)) {
|
| 197 |
return copy($source, $dest);
|
| 198 |
}
|
| 199 |
// Make destination directory
|
| 200 |
if (!is_dir($dest)) {
|
| 201 |
mkdir($dest);
|
| 202 |
}
|
| 203 |
|
| 204 |
// Loop through the folder
|
| 205 |
$dir = dir($source);
|
| 206 |
while (false !== $entry = $dir->read()) {
|
| 207 |
//if this is the files folder then skip the pointers, the html_export directory (server == dead), and .htaccess files
|
| 208 |
//if not then Skip pointers to folders, .DS_Store, *.php, and .htaccess
|
| 209 |
if ($entry == '.' || $entry == '..' || $entry == 'README.txt' || $entry == 'LICENSE.txt' || $entry == '.DS_Store' || $entry == '.htaccess' || $entry == 'Thumbs.db' || strpos($entry,'.engine') != 0 || strpos($entry,'.php') != 0 || strpos($entry,'.inc') != 0 || strpos($entry,'.include') != 0 || strpos($entry,'.info') != 0 || strpos($entry,'.install') != 0 || strpos($entry,'.module') != 0){
|
| 210 |
continue;
|
| 211 |
}
|
| 212 |
// Deep copy directories, ignore the html_export ones
|
| 213 |
if ($dest !== "$source/$entry" && strpos($source,'html_export') == 0 ) {
|
| 214 |
_html_export_copyr("$source/$entry", "$dest/$entry");
|
| 215 |
}
|
| 216 |
}
|
| 217 |
// Clean up
|
| 218 |
$dir->close();
|
| 219 |
return true;
|
| 220 |
}
|