/[drupal]/contributions/modules/import_html/import_html_ui.inc
ViewVC logotype

Contents of /contributions/modules/import_html/import_html_ui.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.12 - (show annotations) (download) (as text)
Sat Mar 14 04:30:48 2009 UTC (8 months, 2 weeks ago) by dman
Branch: MAIN
CVS Tags: HEAD
Changes since 1.11: +12 -9 lines
File MIME type: text/x-php
Trying to move everything into a fresh branch, tagged 6--1
1 <?php
2 /**
3 * @file
4 * Forms and UI functions for import_html.
5 *
6 * In preparation for D6, UI, call-once funcs are in this include file,
7 * although for D5, there is no win.
8 *
9 * @ingroup import_html Import HTML
10 * @author Dan Morrison http://coders.co.nz/
11 * @version $Id: import_html_ui.inc,v 1.11.2.2 2009/03/14 03:18:51 dman Exp $
12 */
13
14 ///////////////////////////////////////////
15 // SETTINGS and PROFILES
16
17 /**
18 * Display the import_html options and settings.
19 *
20 * Settings menu callback
21 *
22 * Display the default profile
23 */
24 function import_html_admin_settings() {
25 $form = array();
26 // check htmltidy is present and correct
27 include_once 'tidy-functions.inc';
28 $form['HTMLTidy'] = html_tidy_settings();
29
30 // Warn about known problem with open_basedir
31 if ($open_basedir = ini_get('open_basedir')) {
32 $form['open_basedir']=array(
33 '#value' => t("<p><strong>Warning:</strong> This server has open_basedir restrictions set [%open_basedir]. It's possible that external source directories may be inaccessible. <a href='%help_url'>Check the help for more details</a>.</p>", array('%open_basedir' => $open_basedir, '%help_url' => url('admin/help/import_html', '', 'open_basedir')))
34 );
35 }
36
37 $profile_id = import_html_current_profile_id();
38 $form['default_profile'] = import_html_profile_settings($profile_id);
39 $form['#submit'] = array('import_html_profile_settings_submit' => array());
40 $form['#validate'] = array('import_html_profile_settings_validate' => array());
41
42 $form['import_html_proceed'] = array(
43 '#value' => '<h2>'. l(t("Proceed to import"), IMPORT_HTML_ADMIN_PATH .'/import_site') .'</h2><br/>',
44 );
45
46 // Links to other profiles
47 $form['other_profiles'] = array(
48 '#type' => 'markup',
49 '#value' => '<h3>'. t('Other Profiles:') .'</h3>'. import_html_profiles_page(),
50 );
51
52 return $form;
53 }
54
55
56
57 /**
58 * Page to display a list of current profiles, or to edit a named one
59 */
60 function import_html_profiles_page($profile_id = null) {
61 if (!$profile_id) {
62 // Just return links to all other profiles
63 $import_html_profiles = variable_get('import_html_profiles', array());
64 $links = array();
65 foreach ($import_html_profiles as $profile_id => $profile) {
66 $links[] = array(
67 'title' => $profile_id,
68 'href' => IMPORT_HTML_ADMIN_PATH .'/profile/'. $profile_id,
69 );
70 }
71 $links[] = array(
72 'title' => t('New'),
73 'href' => IMPORT_HTML_ADMIN_PATH .'/profile/'.'new',
74 );
75 return theme('links', $links);
76 }
77 // Else, render this given profile
78 return drupal_get_form('import_html_profile_settings', $profile_id);
79 }
80
81
82 /**
83 * An import_html profile settings form.
84 *
85 */
86 function import_html_profile_settings($profile_id) {
87
88 $profile = import_html_profile($profile_id);
89 if (! $profile) {
90 // Fill in defaults (should only be needed first time, if that
91 $profile = import_html_profile_defaults();
92 }
93
94 $form = array();
95
96 if ($profile_id != 'new') {
97 $form['summary'] = array('#value' => import_html_profile_summary($profile));
98 }
99 $form['profile_id'] = array(
100 '#title' => t("Profile ID"),
101 '#type' => 'textfield',
102 '#required' => true,
103 '#default_value' => '',
104 );
105 if ($profile_id != 'new') {
106 $form['profile_id']['#default_value'] = $profile_id;
107 $form['profile_id']['#disabled'] = true;
108 }
109
110 $form['extraction'] = array(
111 '#type' => 'fieldset',
112 '#title' => t('Import and Content Analysis Options'),
113 '#collapsible' => TRUE,
114 '#collapsed' => TRUE,
115 );
116 $form['extraction'][] = array(
117 '#value' => t("How the content is extracted from the source documents."),
118 );
119
120 // This is abstracted so it can be re-used in other places (wrapper.module)
121 $form['extraction']['translation_template'] = import_html_template_selector($profile['translation_template']);
122
123 $form['extraction']['content_tag_id'] = array(
124 '#type' => 'textfield',
125 '#title' => t('Content Tag ID'),
126 '#default_value' => $profile['content_tag_id'],
127 '#size' => 20,
128 '#maxlength' => 100,
129 '#description' => t("
130 If your source document is structured well enough to have the actual
131 content in a named div or something (&lt;div id='main'&gt;...content )
132 what is that name?
133 <br/>
134 The default template will search for 'main', 'copy', 'story', 'text', and
135 'bodyCopy'. If your input uses another label, set it here.
136 <br/>
137 This only takes effect if the selected XSL template uses this parameter,
138 and may be overridden or ignored by custom templates.
139 "),
140 );
141
142
143 $form['extraction']['content_type'] = array(
144 '#type' => 'select',
145 '#title' => t("Node Type for new pages"),
146 '#options' => node_get_types('names'),
147 '#default_value' => $profile['content_type'],
148 '#description' => t("
149 Note, if importing into a CCK content type, or anything that doesn't
150 store its body in the node 'body', some customization will have to be
151 done on your XSL template.
152 ")
153 );
154
155 // Just a select, the filter radios are much to bulky for here.
156 $form['extraction']['preferred_filter'] = array(
157 '#type' => 'select',
158 '#title' => t("Default Input Filter"),
159 '#default_value' => $profile['preferred_filter'],
160 '#options' => import_html_get_preferred_filter(TRUE),
161 '#description' => t("
162 After importing HTML, the end result may still have a standard Drupal
163 filter applied to it internally.
164 Use 'Unfiltered HTML' if you want the content verbatim.
165 'Full HTML' <em>may</em> be correct here, although it may create
166 unwanted hard breaks if the input is already word-wrapped internally.
167 Modify only if you want to strip the source right down, execute php,
168 or do other filter magic on the input.
169 "),
170 );
171
172 $form['extraction']['default_document'] = array(
173 '#type' => 'textfield',
174 '#title' => t("Default Document"),
175 '#default_value' => $profile['default_document'],
176 '#size' => 40,
177 '#maxlength' => 40,
178 '#description' => t("
179 What is the default document name for pages served in directories on
180 the current site (index.htm, index.html, default.asp, index.php)?
181 When documents of this name are imported, they can be given both
182 the old name (my/dir/index.htm) and a friendly directory name (my/dir) .
183 When the navigation menu is built, this file is served when the
184 directory link is clicked.
185 This should end up emulating normal website navigation behaviour.
186 <br/>
187 May be a comma-separated list, the first option is the default.
188 "),
189 );
190
191
192 $form['extraction']['file_exclusions'] = array(
193 '#type' => 'textarea',
194 '#title' => t("File Exclusion Pattern"),
195 '#default_value' => $profile['file_exclusions'],
196 '#description' => t("
197 When listing files to import, some sources should probably be left behind.
198 Enter a set of regular expressions to check against the file <b>path</b>
199 that should <em>not</em> show up on the import screen.
200 <ul><li>
201 <code>^_</code> = begins with '_'
202 </li><li>
203 <code>/_</code> = in a dir that begins with '_'
204 </li><li>
205 <code>CVS/?</code> = called CVS, or has CVS in the path
206 </li></ul>
207 "),
208 );
209
210 $form['extraction']['strip_tables'] = array(
211 '#type' => 'checkbox',
212 '#title' => t("Remove table markup"),
213 '#return_value' => TRUE,
214 '#default_value' => $profile['strip_tables'],
215 '#description' => t("
216 A common task when updating old sites is removing the
217 legacy formatting tables. While we are going through the
218 pages, this process can do it all for you.
219 Beware - enabling this will remove ALL tables, so don't
220 try it if there are genuine data tables you wish to retain!
221 "),
222 );
223 $form['extraction']['strip_scripts'] = array(
224 '#type' => 'checkbox',
225 '#title' => t("Remove old javascripts"),
226 '#return_value' => TRUE,
227 '#default_value' => $profile['strip_scripts'],
228 '#description' => t("
229 There are many reasons why importing old inline javascripts
230 may be a bad idea. Doing so will remove some functionality
231 but also clean up some crap. Up to you.
232 "),
233 );
234 $form['extraction']['force_tidy'] = array(
235 '#type' => 'checkbox',
236 '#title' => t("Force HTML Tidy-up"),
237 '#return_value' => TRUE,
238 '#default_value' => $profile['force_tidy'],
239 '#description' => t("
240 Run HTML Tidy with all strict options over the input before
241 parsing it as XML. Normally you want this on BUT turn it off
242 if trying to read from non-HTML sources (like arbitrary XML)
243 or it will mutilate it.
244 You can feed in any valid XML and use XSL template to massage
245 it into simple XHTML, but in that case do NOT use HTML tidy.
246 "),
247 );
248
249
250 $form['replication'] = array(
251 '#type' => 'fieldset',
252 '#title' => t("Replication Options"),
253 '#collapsible' => TRUE,
254 '#collapsed' => TRUE,
255 );
256 $form['replication'][] = array(
257 '#value' => t("How and where the imported content is reproduced on this site.")
258 );
259
260 if (!module_exists('path')) {
261 drupal_set_message(t("The <b>path</b> module is not enabled! If you try to import pages without paths, I can do it, but links won't work and you'll find it hard to find the pages again.'"), 'error');
262 }
263
264 $form['replication']['trim_suffixes'] = array(
265 '#type' => 'checkbox',
266 '#title' => t("Trim Suffixes"),
267 '#return_value' => TRUE,
268 '#default_value' => $profile['trim_suffixes'],
269 '#description' => t("
270 When importing files, new url_aliases will be generated based on the
271 old path. Optionally, the new primary alias can have the suffix
272 chopped of. So <code>old/path/file.htm</code> wil be accessed as just
273 <code>old/path/file</code> and <code>old/section/index.htm</code>
274 will become <code>old/section</code>.
275 Uncheck this if you do NOT want this tidy behaviour.
276 <br/>Note, this aliasing is needed to detect when an import is
277 <em>replacing</em> a previously imported page. Disabling it may
278 cause you to end up with duplicates if importing the same file again.
279 <br/>This just defines the name given to the page, it does not
280 automatically update any links <em>to</em> the page (see below) .
281 "),
282 );
283
284 $form['replication']['legacy_aliases'] = array(
285 '#type' => 'checkbox',
286 '#title' => t("Legacy Aliases"),
287 '#return_value' => TRUE,
288 '#default_value' => $profile['legacy_aliases'],
289 '#description' => t("
290 Optionally, a secondary alias can be made that DOES have the old
291 full suffix. This supports old links and hrefs.
292 <b>The new node may end up with two aliases</b> (thats OK) but the tidy
293 one will be used internally by default.
294 Uncheck this if you do NOT want old URL support.
295 <br/>
296 These two options are independant of each other. If both are off
297 the new page will have no url_alias.
298 "),
299 );
300
301 $form['replication']['relink_files'] = array(
302 '#type' => 'checkbox',
303 '#title' => t("Try to Relink Files by discarding suffixes"),
304 '#return_value' => TRUE,
305 '#default_value' => $profile['relink_files'],
306 '#description' => t("
307 <b>If</b> you choose not to support the Legacy Aliases suffixes
308 on your new pages, <em>existing embedded links to those pages will break</em>.
309 This includes inline links from within the content we are importing.
310 <br/>If a page linked to /home/about.htm and you have renamed it
311 to home/about, without legacy support, the old link will be broken.
312 This tool is a batch job process, not an entire site spider and
313 relinker.
314 <br/>
315 <b>But</b> if you understand exactly what is going on,
316 you can choose to try and rewrite all local links to their shortname
317 equivalent (basically dropping off the suffixes of linked files)
318 during import.
319 This will convert any existing <code>href=\"local/path/file.htm\"</code>
320 into just <code>href=\"local/path/file\"</code>. Use with care, but
321 this option plus \"Trim Suffixes\" should be able to work together to
322 rename all files. You\'ll want to run a link checker afterwards!
323 "),
324 );
325
326 $form['replication']['allow_bad_urls'] = array(
327 '#type' => 'checkbox',
328 '#title' => t("Allow bad URLs and filenames"),
329 '#return_value' => TRUE,
330 '#default_value' => $profile['allow_bad_urls'],
331 '#description' => t("
332 It's bad mojo to have spaces or non-alphanumeric characters
333 in your filenames and in your URLs. Normally I try to fix this
334 by replacing potentially damaging characters with an underscore.
335 If, however you have to support legacy stuff like
336 'Random filename #3 - & about what?.pdf' then we can try to leave them alone.
337 May not always work the way you expect.
338 "),
339 );
340
341
342 $form['replication']['file_storage_path'] = array(
343 '#type' => 'textfield',
344 '#title' => t("Extra File Storage Path"),
345 '#default_value' => ensure_trailing_slash($profile['file_storage_path']),
346 '#description' => t("
347 Where, beneath siteroot, should extra imported <strong>files</strong>
348 be stored. Images and suchlike will be put into a heirachy mirroring
349 their old location, but all under this directory.
350 <br/>
351 Effectively, this moves file storage towards the Drupal CMS way of
352 storing user files, although it does not enter them as 'attachments'.
353 "),
354 );
355 $form['replication']['rewrite_links'] = array(
356 '#type' => 'checkbox',
357 '#title' => t("Relink references to resources to the new file storage location"),
358 '#return_value' => TRUE,
359 '#default_value' => $profile['rewrite_links'],
360 '#description' => t("
361 As the actual files are being stored elsewhere, the HTML links need to be
362 rewritten to allow for that. This should be on by default.
363 Turn off only if you are intending to re-create a
364 messy site with files in old places and have set the file storage path to blank.
365 See the transformation file
366 <code>rewrite_href_and_src.xsl</code> for fine-tuning if there is trouble.
367 <small>Only relative and site-relative links are rewritten.
368 Hard-coded, fully-justified (external) URLS are not messed with,
369 even if they used to resolve to the same host.</small>
370 ")
371 );
372
373 $form['replication']['import_site_prefix'] = array(
374 '#type' => 'textfield',
375 '#title' => t("Import Site Prefix"),
376 '#default_value' => ensure_trailing_slash($profile['import_site_prefix']),
377 '#size' => 20,
378 '#maxlength' => 100,
379 '#description' => t("
380 Where, beneath siteroot, should extra imported <strong>pages</strong>
381 <em>appear to be</em> served from?
382 This is a prefix that is applied to the generated URL-aliases.
383 <br/>
384 If you wish to keep the imported
385 pages separate from the main site, setting this value to look like a
386 subdirectory will make the imported site act like it's in a subsection.
387 Otherwise leave it blank and new page URLs will be added under the top
388 level navigation.
389 "),
390 );
391
392 // Even more settings. These ones are able to be overridden during the process also,
393 // so they are often slightly more transient preferences
394 $form['replication']['subsettings'] = import_html_subsettings_form($profile);
395
396 $form['advanced'] = array(
397 '#type' => 'fieldset',
398 '#title' => t("Advanced Import Tuning"),
399 '#collapsible' => TRUE,
400 '#collapsed' => TRUE,
401 );
402
403 $form['advanced']["help"] = array(
404 '#value' => t("How to handle problems when importing"),
405 );
406
407 $form['advanced']['handle_duplicates'] = array(
408 '#type' => 'select',
409 '#title' => t("Duplicate Handling"),
410 '#default_value' => $profile['handle_duplicates'],
411 '#options' => array(IMPORT_HTML_MERGE => 'overwrite/merge' , IMPORT_HTML_SKIP => 'skip'),
412 '#description' => t("
413 If attempting to import a file into a path that already exists
414 in the system (such as by repeating an import process), do what?
415 "),
416 );
417
418 $form['advanced']['handle_no_title'] = array(
419 '#type' => 'select',
420 '#title' => t("No Title"),
421 '#default_value' => $profile['handle_no_title'],
422 '#options' => array(
423 IMPORT_HTML_GUESS => 'use the document path',
424 IMPORT_HTML_SKIP => 'skip, do not import',
425 IMPORT_HTML_DEFAULT => 'set to a placeholder value'
426 ),
427 '#description' => t("
428 If parsing the input document fails to return any title at all, do what?"
429 ),
430 );
431
432 $form['advanced']['debug_level'] = array(
433 '#type' => 'select',
434 '#title' => t("Debug Level"),
435 '#default_value' => $profile['debug_level'],
436 '#options' => array(0, 1, 2, 3),
437 '#description' => t("
438 To trace and test the import process, increasing the debug level here
439 will produce a huge amount of process tracing,
440 including dumps of the raw versions of the input files
441 as it gets massaged into the end result.
442 <br/>
443 Note that enabling debug can itself introduce errors as the trace logs
444 and pretty-printer can cause memory overages.
445 "),
446 );
447 $form['advanced']['keep_temp_files'] = array(
448 '#type' => 'checkbox',
449 '#title' => t("Keep Temp Files"),
450 '#return_value' => TRUE,
451 '#default_value' => $profile['keep_temp_files'],
452 '#description' => t("
453 When processing, files are copied temporarily into a temp directory.
454 These are usually deleted immediately after tidying and parsing,
455 but if you want to trace problems, enable this option and check the
456 files/import directory.
457 "),
458 );
459
460
461 $form['advanced']['import_html_other_logic'] = array(
462 '#value' => t("<p>
463 Other logic, like whether to use the TITLE tag or the H1 tag found in a
464 file is defined in the XSL template, which you can tune yourself.
465 </p>"
466 ),
467 );
468
469 $form['save'] = array(
470 '#type' => 'submit',
471 '#value' => t('Save Settings'),
472 );
473 $form['delete'] = array(
474 '#type' => 'submit',
475 '#value' => t('Delete Profile'),
476 );
477 $form['#redirect'] = IMPORT_HTML_ADMIN_PATH .'/profile';
478
479 return $form;
480 }
481
482
483
484 /**
485 * Returns a widget for selecting an import template from the ones provided by
486 * import_html
487 */
488 function import_html_template_selector($current_template) {
489 $templates = file_scan_directory(drupal_get_path('module', 'import_html') .'/templates', '.xsl', array('.', '..'), 0, TRUE);
490 $options = array_map(
491 create_function('$o', 'return $o->name;'),
492 $templates
493 );
494 if(! $current_template) {
495 $current_template = drupal_get_path('module', 'import_html') ."/templates/html2simplehtml.xsl";
496 }
497
498 return array(
499 '#type' => 'select',
500 '#title' => t('Import template to use'),
501 '#options' => $options,
502 '#default_value' => $current_template,
503 '#prefix' => '<div id="'. _WRAPPER_IMPORT_TEMPLATE .'-selector" class="sub-option">',
504 '#suffix' => '</div>',
505 '#description' => t("
506 This file must be a valid XSL Template that can convert from your source
507 HTML to the simplified semantic XHTML schema (see docs and examples).
508 Templates should be placed in the import_html module <code>/templates/</code> directory.
509 <br/>
510 Sample import templates can be found in %module_dir.
511 ", array('%module_dir' => drupal_get_path('module', 'import_html') .'/templates' )),
512 );
513 }
514
515 /**
516 * FAPI callback
517 */
518 function import_html_profile_settings_validate($form_id, $form_values) {
519 // Initializing the XSL doc just to check is inefficient, but this is just a config screen.
520 if (!_import_html_get_xsl_doc($form_values['translation_template'])) {
521 form_set_error('translation_template', t("XSL File %translation_template unable to be parsed", array('%translation_template' => $form_values['translation_template'])));
522 }
523 }
524
525 /**
526 * FAPI callback
527 */
528 function import_html_profile_settings_submit($form_id, $form_values) {
529 if ($form_values['profile_id'] == 'new') {
530 drupal_set_message(t('Not saving details called "new"', array()));
531 return;
532 }
533 if ($form_values['op'] == t('Delete Profile')) {
534 import_html_profile($form_values['profile_id'], FALSE);
535 drupal_set_message(t('%profile_id Profile Deleted', array('%profile_id' => $form_values['profile_id'])));
536 return;
537 }
538
539 // Save only the variables I care about
540 $keys = array_keys(import_html_profile_defaults());
541 foreach ($keys as $key) {
542 $profile[$key] = $form_values[$key];
543 }
544 // Save it as a blob
545 import_html_profile($form_values['profile_id'], $profile);
546 drupal_set_message(t('Import HTML %profile_id Profile Updated', array('%profile_id' => $form_values['profile_id'])));
547 }
548
549
550 /**
551 * This is a subset of the general settings - these options are also reproduced
552 * on later action pages where they can be tweaked closer to the action. So
553 * the form is saved in a block of its own.
554 */
555 function import_html_subsettings_form($profile) {
556 $form=array();
557
558 // Taxonomy box:
559 // note, this doesn't show up for vocabs with only one term. Sorta annoying'
560 if ($taxonomy = module_invoke('taxonomy', 'form_all', 1)) {
561 # $taxonomy[0]='none';
562 $form['import_category'] = array(
563 '#type' => 'select',
564 '#title' => t('Add imported nodes to the following category(s)'),
565 '#default_value' => $profile['import_category'],
566 '#prefix' => '<div class="criterion">',
567 '#size' => 10,
568 '#suffix' => '</div>',
569 '#options' => $taxonomy,
570 '#multiple' => TRUE,
571 );
572 }
573 else {
574 $form['import_category'] = array(
575 '#value' => t("<p>There are no vocabularies available to tag the imported content with. <a href='!add_vocab'>Create one</a> if you like.</p>", array('!add_vocab' => url('admin/content/taxonomy'))),
576 );
577 }
578
579 // Status selection:
580 $form['import_status'] = array(
581 '#type' => 'checkbox',
582 '#title' => t('Imported nodes are Published?'),
583 '#default_value' => $profile['import_status'],
584 '#description' => t("
585 Sets the node status. Check to have nodes published.
586 "),
587 );
588
589 // promote selection: added by L0rne
590 $form['import_html_import_promote'] = array(
591 '#type' => 'checkbox',
592 '#title' => t('Imported Nodes are Promoted to front page?'),
593 '#default_value' => $profile['import_promote'],
594 '#description' => t("
595 Sets whether or not imported nodes are promoted to the front page.
596 "),
597 );
598
599 global $user;
600 $form['import_user'] = array(
601 '#type' => 'textfield',
602 '#title' => t('User to create nodes as'),
603 '#maxlength' => 60,
604 '#autocomplete_path' => 'user/autocomplete',
605 '#default_value' => $profile['import_user'],
606 );
607
608 if (module_exists('menu')) {
609 $form["create_menus"] = array(
610 '#type' => 'checkbox',
611 '#title' => t("Add each page to menu"),
612 '#return_value' => TRUE,
613 '#default_value' => $profile['create_menus'],
614 '#description' => t( 'requires menu.module'. (module_exists('menu')?'(installed)':' (Which is NOT available)')),
615 );
616 $options = menu_parent_options(0);
617 $form["menu_parent_id"] = array(
618 '#type' => 'select',
619 '#title' => t('Menu Parent'),
620 '#default_value' => $profile['menu_parent_id'],
621 '#options' => $options,
622 '#description' => t("
623 Where in the menu system should the new heirachy
624 of pages be built?
625 <br/>
626 If you leave this as 'Navigation' the new pages
627 may start to collide with your Admin menus,
628 so consider making a placeholder menu first.
629 "),
630 );
631 }
632 else {
633 $form["create_menus"] = array(
634 '#type' => 'markup',
635 '#value' => t("
636 Menu is not enabled, so the heirachical structure
637 of the imported files will NOT be retained.
638 "),
639 '#description' => t("
640 <p>If you enable the menu.module,
641 the structure of the imported files can be imported as well.</p>
642 <p>Note, that in order for this to work, all the apparent parents
643 of a page must have menu items as well. If importing a subsection,
644 Placeholders for the higher sections will be created, but they may be
645 hollow shells.</p>
646 ")
647 );
648 }
649 return $form;
650 }
651
652
653 function import_html_subsettings_submit($form_id, $edit) {
654 // Handle incidental settings when submitted from other pages - like during the
655 // import process.
656
657 $profile = import_html_profile($edit['profile_id']);
658 $variables = array(
659 'import_html_siteroot',
660 'import_html_current_subsection',
661 'import_site_prefix',
662 'import_category',
663 'import_status',
664 'import_promote',
665 'import_user',
666 'create_menus',
667 'menu_parent_id',
668 );
669 foreach ($variables as $v) {
670 if (isset($edit[$v]) && ($profile[$v] != $edit[$v])) {
671 $profile[$v] = $edit[$v];
672 }
673 }
674
675 import_html_profile($edit['profile_id'] , $profile);
676 return FALSE;
677 }
678
679 /**
680 * Returns a text summary of all the current settings
681 */
682 function import_html_profile_summary($profile) {
683 $dest_file_root = url($profile['file_storage_path'], NULL, NULL, TRUE);
684 $dest_virtual_path = url($profile['import_site_prefix'], NULL, NULL, TRUE);
685 $menu_parent = menu_get_item($profile['menu_parent_id']);
686
687 $substitutions = array(
688 '%import_html_siteroot' => $profile['siteroot'] ,
689 '%import_html_current_subsection' => $profile['current_subsection'],
690 '!dest_virtual_path' => l($dest_virtual_path, $dest_virtual_path ),
691 '!dest_file_root' => l($dest_file_root, $dest_file_root),
692 '!settings_url' => url(IMPORT_HTML_ADMIN_PATH .'/settings'),
693 '!translation_template' => l(basename($profile['translation_template']), $profile['translation_template']),
694 '%import_html_default_document' => $profile['default_document'],
695 '%import_html_create_menus' => $profile['create_menus']?'will':'will not',
696 '%menu_parent_name' => $menu_parent['title'],
697 '%import_html_content_type' => $profile['content_type'],
698 '%import_html_import_site_prefix' => $profile['import_site_prefix'],
699 '%import_html_import_status' => $profile['import_status']?'will':'will not',
700 '%import_html_import_promote' => $profile['import_promote']?'will':'will not',
701 );
702
703 $summary = array();
704 if ($profile['siteroot']) {
705 $summary[] = t("
706 Source files will be scanned from
707 <br/><code>%import_html_siteroot%import_html_current_subsection</code>", $substitutions);
708 }
709 $summary[] = t("
710 Pages will be re-created underneath
711 <br/><code>!dest_virtual_path%import_html_current_subsection</code>
712 <br> as <strong>%import_html_content_type</strong> type nodes.", $substitutions);
713
714 $summary[] = t("
715 Import template for semantic data extraction is <code>!translation_template</code>
716 ", $substitutions);
717
718 $summary[] = $profile['rewrite_links']
719 ? t("Links found within the sources <b>will</b> be rewritten to try and allow for the new paths")
720 : t("Links from within the imported pages will <b>not</b> be rewritten, and may get lost.");
721
722 if ($profile['strip_script_tags'])
723 $summary[] = t("All script tags found in the source will be discarded");
724
725 $summary[] = t("Non-page files imported will be saved beneath <code>!dest_file_root %import_html_current_subsection</code> ", $substitutions);
726
727 $summary[] = t("Default document <code>%import_html_default_document</code> will be used to represent folders.", $substitutions);
728
729 $summary[] = t("New pages %import_html_create_menus be added to the menu underneath %menu_parent_name.", $substitutions);
730 if ($profile['menu_parent_id'] == 1) {
731 $summary[] = t("<strong>Warning:</strong> The parent menu is set to the default 'Navigation' menu. This is OK, but will place new pages directly into the top of your menu which can be messy. You may instead wish to create a unique menu holder or menu item and use that as the import menu root instead. 'Primary Links' is also a good choice.", $substitutions);
732 }
733 $summary[] = t("Imported pages %import_html_import_status be published by default and %import_html_import_promote be promoted to the front page.", $substitutions);
734
735 $output = t("<p>These preferences and more are changeable <a href='!settings_url'>in the settings</a>.</p>", $substitutions);
736
737 $title = t('%profile_label import profile:', array('%profile_label' => $profile['profile_id']));
738 return theme('box', $title, theme("item_list", $summary) . $output );
739 }
740
741
742
743 ///////////////////////////////////////////
744 // IMPORT PROCESS
745
746 /**
747 * A multi-part 'wizard' style form.
748 * Step 1 - intro and set parameters
749 * Step 2 - display files and select them
750 * - submit that to run the import
751 */
752 function import_html_process_form($form_values = NULL) {
753 // Keep track of the steps
754 if (!isset($form_values)) {
755 $step = 1;
756 }
757 else {
758 $step = $form_values['step'] + 1;
759 }
760
761 $form['step'] = array(
762 '#type' => 'hidden',
763 '#value' => $step,
764 );
765
766 $profile_id = import_html_current_profile_id();
767 $profile = import_html_profile($profile_id);
768
769 switch ($step) {
770 case 1:
771 // Display current options and patch prior to filesystem scan
772 $form['step1'] = import_html_select_source_form($profile);
773 $form['summary'] = array('#value' => import_html_profile_summary($profile) ) ;
774
775 if (!module_exists('path')) {
776 drupal_set_message(t("The <b>path</b> module is not enabled! If you try to import pages without paths, I can do it, but links won't work and you'll find it hard to find the pages again.'"), 'warning');
777 }
778
779 break;
780
781 case 2:
782 // Tidy and analyse the data submitted from step 1
783 $base_path = ensure_trailing_slash(foreslash($form_values['source_siteroot'])); // win32 safe.
784 variable_set('import_html_siteroot', $base_path);
785 $form_values['base_path'] = $base_path;
786
787 $current_subsection = ensure_trailing_slash($form_values['import_html_current_subsection']);
788 $current_subsection = preg_replace('|^/|', '', $current_subsection);
789 variable_set('import_html_current_subsection', $current_subsection);
790 $form_values['current_subsection'] = $current_subsection;
791
792 // Create the file selection form
793 // TODO can I avoid doing this file list again when I'm already submitting what I know?
794 $form['step2'] = import_html_list_filesystem($form_values, $profile);
795 break;
796
797 case 3:
798 // Files have been selected. They should get added to the queue
799 # dpm("import_html_process_form Submitted files, now on step 3. I still need to remake the file list again to ensure they are valid?");
800 break;
801 }
802
803 // This part is important!
804 $form['#multistep'] = TRUE;
805 $form['#redirect'] = FALSE;
806
807 return $form;
808 }
809
810 /**
811 * Handle each step of the import process - by passing the form submit down to per-page handlers
812 */
813 function import_html_process_form_validate($form_id, $form_values) {
814
815 // Massage the input to be forgiving and figure what we really mean
816 $base_path = ensure_trailing_slash(foreslash($form_values['source_siteroot'])); // win32 safe.
817 $form_values['base_path'] = $base_path;
818 $current_subsection = ensure_trailing_slash($form_values['import_html_current_subsection']);
819 $current_subsection = preg_replace('|^/|', '', $current_subsection);
820 $form_values['current_subsection'] = $current_subsection;
821
822 switch ($form_values['step']) {
823 case 1:
824 // Check the step 1 submissions
825 return import_html_select_source_form_validate($form_id, $form_values);
826 case 2:
827 // Check the step 2 submissions - all the files have been chosen
828 return import_html_list_filesystem_validate($form_id, $form_values);
829 }
830 }
831
832 /**
833 * Handle each step of the import process - by passing the form submit down to per-page handlers
834 */
835 function import_html_process_form_submit($form_id, $form_values) {
836 switch ($form_values['step']) {
837 case 1:
838 // Process the step 1 submissions (no action just yet)
839 # dsm('submitted path selection form');
840 break;
841 case 2:
842 // Received the step 2 submissions - Queue the selected files
843 $import_files = $form_values['file_rel_path'];
844 // Minor re-hash of the values sent back as checkboxes needed
845 $selected_files = array_unique(array_values($import_files));
846 unset($selected_files[0]);
847 // debug is getting in the way!
848 if ($debug_level = import_html_variable('debug_level') && (count($selected_files) > 5) ) {
849 drupal_set_message("Temporarily disabling debug level for large imports", 'warning');
850 import_html_variable('debug_level', 0);
851 }
852 switch ($form_values['op']) {
853 case t('Queue Files') :
854 import_html_queue_files($selected_files, $form_values);
855 break;
856 case t('Import Files') :
857 $results = import_html_import_files($selected_files, $form_values);
858 break;
859 }
860 import_html_variable('debug_level', $debug_level);
861 drupal_set_message(t("Imported %count items", array('%count' => count($results))));
862
863 break;
864 case 3:
865 dsm('How did I get here?');
866 break;
867 }
868 }
869
870
871 function import_html_select_source_form($profile) {
872 $form = array(
873 );
874
875 $form['htmlsource'] = array(
876 '#type' => 'fieldset',
877 '#title' => t("Select HTML file source"),
878 );
879
880 $form['htmlsource']['source_siteroot'] = array(
881 '#type' => 'textfield',
882 '#title' => t("Site Root on the Server"),
883 '#default_value' => variable_get('import_html_siteroot', "/var/www/htdocs/"),
884 '#description' => t("
885 Where to read from.
886 <br/><b>If the files are on the server</b> (local to Drupal)
887 Enter the absolute or drupal-root-relative location of the site to import.
888 You must have access permissions.
889 Relative paths and aliases will be calculated from here.
890 <br/>eg: <code>/var/www/old_site/htdocs</code>, or <code>sites/default/files/copy-of-old-site</code>
891 "),
892 );
893
894 $form['htmlsource']['import_html_current_subsection'] = array(
895 '#type' => 'textfield',
896 '#title' => t("Subsection to list"),
897 '#default_value' => variable_get('import_html_current_subsection', '' ),
898 '#size' => 20,
899 '#description' => t("
900 For large sites, its more convenient to just list and process sections.
901 If a subdirectory is specified, only that will be displayed."
902 ),
903 );
904
905 $form[] = array(
906 '#type' => 'submit',
907 '#value' => t('Next'),
908 );
909
910 // Allow user to change some of the big settings here also
911 // Set a callback so these settings get saved generically, persistantly
912 $form['profile_id'] = array(
913 '#type' => 'hidden',
914 '#value' => $profile['profile_id'],
915 );
916 $form['#submit']['import_html_subsettings_submit']=array();
917 $form['import_html_settings'] = import_html_subsettings_form($profile);
918 $form['import_html_settings']['#type'] = 'fieldset';
919 $form['import_html_settings']['#collapsible'] = TRUE;
920 $form['import_html_settings']['#collapsed'] = TRUE;
921 $form['import_html_settings']['#title'] = t("More Settings");
922
923 return $form;
924 }
925
926 function import_html_select_source_form_validate($form_id, $form_values) {
927 $working_path = $form_values['base_path'] . $form_values['current_subsection'];
928 if (! is_dir($working_path)) {
929 form_set_error('current_subsection', t("Directory %dir does not exist or is unreadable.", array('%dir' => $working_path)));
930 return ;
931 }
932 }
933
934
935
936 /**
937 * Given a local filepath, display all the files I can find in it.
938 *
939 * Exclude hidden files and directories ( starting with "." or "_" ) .
940 * Guess what type of content they are
941 * (html, resources like images and styles or other)
942 * based on suffix.
943 * Enable handy multi-selections to enhance the form
944 *
945 * A form api form def used in step 2 of import_html_page()
946 *
947 * @param $base_path
948 *
949 * @return Drupal form
950 */
951 function import_html_list_filesystem($form_values, $profile) {
952
953 $base_path = $form_values['base_path'];
954 $current_subsection = $form_values['current_subsection'];
955
956 debug("Listing contents of $base_path [$current_subsection]", 2);
957 $working_path = $base_path . $current_subsection;
958 $form = array();
959
960 // Meh, list everything and filter them using user prefs
961 $dir_structure = file_scan_directory(trim_trailing_slash($working_path), ".*");
962 #debug_pre($dir_structure, 4);
963
964 // file_scan_directory returns a flat array. Convert it into a tree structure, then render it.
965 $tree = import_html_sort_list_into_tree($dir_structure, $base_path, $profile['file_exclusions']);
966
967 // note this info in the form for the rendering function to display later
968 $form['file_count'] = array( '#type' => 'value', '#value' => count($dir_structure));
969 $form['current_subsection'] = array( '#type' => 'value', '#value' => $current_subsection);
970
971 // Note the current context as the submit process needs to know
972 $form['base_path'] = array( '#type' => 'value', '#value' => $form_values['base_path']);
973 $form['current_subsection'] = array( '#type' => 'value', '#value' => $form_values['current_subsection']);
974 $form['profile_id'] = array('#type' => 'value', '#value' => $form_values['profile_id']);
975
976 $dest_file_root = url($profile['file_storage_path'], NULL, NULL, TRUE);
977 $dest_virtual_path = url($profile['import_site_prefix'], NULL, NULL, TRUE);
978
979 // UI thingie. This is a placeholder.
980 // Actual functionality gets via js added post-load.
981 // @see filetype_selectors.js
982 $form['selectors'] = array(
983 '#weight' => -2,
984 '#value' => "<p id='import-html-selectors'>Select Files to import</p>",
985 );
986
987
988 // Need to convert the sructured tree into a structured form
989 $tree_form = _import_html_tree_to_form($tree, "", "/");
990 $form['filesystem'] = $tree_form;
991 $form['#theme'] = 'import_html_list_filesystem';
992
993 $form['action'] = array(
994 '#value' => '<p>'. t("Either run the import now, or queue the selected files for processing later - may be required for large jobs") .'</p>',
995 );
996 $form['queue'] = array(
997 '#type' => 'submit',
998 '#value' => t('Queue Files'),
999 );
1000 $form['import'] = array(
1001 '#type' => 'submit',
1002 '#value' => t('Import Files'),
1003 );
1004
1005 return $form;
1006 }
1007
1008
1009 function import_html_list_filesystem_validate($form_id, $form_values) {
1010 $import_files = $form_values['file_rel_path'];
1011 // Minor re-hash of the values sent back as checkboxes needed
1012 $selected_files = array_unique(array_values($import_files));
1013 unset($selected_files[0]);
1014
1015 if ( empty($selected_files)) {
1016 form_set_error('file_rel_path', t("No Files Selected. Nothing to import"));
1017 return;
1018 }
1019 }
1020
1021 /**
1022 * From a flat list of file defs, build a tree structure, and annotate it like
1023 * the forms API does.
1024 * todo DEPRECATE and include the formtree extension
1025 */
1026 function import_html_sort_list_into_tree($dir_structure, $base_path, $file_exclusions = "^_\nCVS\n^\\.") {
1027
1028 $ex = trim(preg_replace('|\r|', "", $file_exclusions));
1029 $exclusions = explode("\n", $ex);
1030
1031 $tree = array();
1032 $count = 0;
1033 foreach ($dir_structure as $file_path => $file_info) {
1034 $rel_path = substr($file_path, strlen($base_path));
1035
1036 // skip if it's a hidden/excluded file
1037 foreach ($exclusions as $regexp) {
1038 if (preg_match('|'. $regexp .'|', $rel_path)) { continue 2; }
1039 }
1040
1041 $ancestors = preg_split("/[\/\:]+/", $rel_path);
1042 $twig = & $tree; // find the current twig, starting from the top
1043 $path = '';
1044 while ($dad = array_shift($ancestors)) {
1045 // walk down the path
1046 if (!array_key_exists($dad, $twig)) {
1047 // add new branch
1048 $twig['#type'] = 'disk_folder';
1049 $twig['#value']= $path;
1050 $twig['#description']= basename($path);
1051 $twig['#filename'] = $base_path . $path;
1052
1053 $twig[$dad] = array();
1054 $count++;
1055 }
1056 $path .= $dad .'/';
1057 $twig = & $twig[$dad];
1058 }
1059 $twig['#type'] = 'disk_file';
1060 $twig['#description']= $file_info->basename;
1061 $twig['#filename'] = $file_path;
1062 $twig['#value'] = $rel_path;
1063 }
1064 return $tree;
1065 }
1066
1067
1068 /**
1069 * Recursive form construction function to format a tree-like structure of heirarchical items
1070 *
1071 * The function is self-referentially recursive - depth-first formatting of a tree menu.
1072 *
1073 * @param $tree array The tree layout so far;
1074 * @param $name string The human name of the item were are adding right now
1075 * @param $prefix string The path so far;
1076 * @param $url string The full address of the item we are adding now
1077 *
1078 * @return form object
1079 */
1080 function _import_html_tree_to_form($tree, $name, $prefix = "", $url = "?") {
1081 $path = (($prefix != "/") ? $prefix ."/" : "") . $name;
1082 $id = "ID_" . preg_replace("/[^\w]/", "", $path);
1083 // $id is a safe name - a unique id derived from the item path
1084 // -- OK, these are becoming stupidly long - any reason not to just use a counter?
1085
1086 // To sorta-but-not-really 'tree' and allow multiple values for identically named checkboxes,
1087 // we keep a running list of numbers. FAPI doesn't read in multiples unless we told it to.
1088 // We cheat by setting the '#parents' manually before rendering, and using a counter
1089 static $checkbox_count;
1090
1091 // special case: if name is blank (we are at root) show the working dir
1092 if (! $tree['#value']) $tree['#value'] = "<big>". variable_get('import_html_siteroot', "") ."</big>";
1093
1094 $element = array();
1095 $element['#return_value'] = $tree['#value'];
1096 $item_type = ($tree['#type'] == 'disk_folder') ? "container" : import_html_guess_file_class($tree['#filename']);
1097 $element['#item-type'] = $item_type; // made-up attribute to pass to theme
1098 $element['#item-path'] = $item_type; // made-up attribute to pass to theme
1099 $element['#attributes']['class'] = $item_type;
1100
1101 if ($tree['#type'] == 'disk_folder') {
1102 $element['#type'] = 'fieldset';
1103 $element['#title'] = $tree['#value'];
1104 # $element['#id'] = "${id}-container";
1105 $element['#attributes']['class'] = "tree-branch";
1106 $element['#theme'] = 'import_html_filesystem_tree_container';
1107 // checkbox toggle for the children group
1108 $element['file_rel_dir'] = array(
1109 '#type' => 'checkbox',
1110 '#title' => $tree['#value'],
1111 '#id' => "${id}-toggle",
1112 '#item-type' => 'container',
1113 '#return-value' => $tree['#value'],
1114 // Need to allow multiples, but they are essentially flat, not tree-d
1115 '#parents' => array('file_rel_dir', $checkbox_count++),
1116 );
1117 $element['file_rel_dir']['#attributes']['class'] = "tree-branch-toggle";
1118 }
1119 else {
1120 $element['#type'] = 'checkbox';
1121 $element['#title'] = $tree['#description'];
1122 $element['#theme'] = 'import_html_filesystem_tree_item';
1123 $element['#id'] = "${id}-checkbox";
1124 $element['#attributes']['class'] = "tree-leaf";
1125
1126 // Need to allow multiples, but they are essentially flat, not tree-d
1127 $element['#parents'] = array('file_rel_path', $checkbox_count++);
1128 }
1129
1130 // Construct the content
1131
1132 if (element_children($tree)) {
1133 // This node has content to be recursed into. Add it as a child
1134 ksort($tree);
1135 $element['children'] = array();
1136 // Just a container array. Themed later
1137 $element['children']['#id'] = "${id}-content";
1138 $element['children']['#attributes']['class'] = "tree-content";
1139 foreach ($tree as $twigname => $twig) {
1140 if (element_property($twigname)) {continue;}
1141 $element['children']['file_rel_path'][] = _import_html_tree_to_form($twig, $twigname, $path);
1142 }
1143 }
1144 return $element;
1145 }
1146
1147 /**
1148 * should put the selectors magic here?
1149 */
1150 function theme_import_html_list_filesystem($element) {
1151 drupal_add_js(drupal_get_path('module', 'import_html') .'/toggle_treeview.js');
1152 drupal_add_css(drupal_get_path('module', 'import_html') .'/treeview.css');
1153 drupal_add_js(drupal_get_path('module', 'import_html') .'/filetype_selectors.js');
1154 return drupal_render($element);
1155 }
1156
1157 /**
1158 * Basically the theme_checkbox, but without form-item div cruft
1159 */
1160 function theme_import_html_filesystem_tree_item($element) {
1161 $checked = $element['#value'] ? ' checked="checked" ' : ' ';
1162 $atts = drupal_attributes($element['#attributes']);
1163 $checkbox = "<input type='checkbox' name='{$element['#name']}' id='{$element['#id']}' value='{$element['#return_value']}' $checked $atts />";
1164
1165 $label_atts = array(
1166 'class' => $element['#item-type'] .'-item file-label',
1167 'title' => $element['#return_value'],
1168 'for' => $element['#id'],
1169 );
1170 if ($element['#item-type'] == 'container') {
1171 $label_atts['class'] = 'tree-branch-label';
1172 }
1173
1174 $checkbox = '<label '. drupal_attributes($label_atts) .'>'. $checkbox .' '. $element['#title'] .'</label>';
1175 return $checkbox;
1176 }
1177
1178 /**
1179 * filesystem tree container is a fieldset, contains one toggle checkbox and a collection of 'children'
1180 * If you want the normal rendering back (description, value) put it in yourself.
1181 */
1182 function theme_import_html_filesystem_tree_container($element) {
1183 $output = theme('import_html_filesystem_tree_item', $element['file_rel_dir'] ); // the whole-group checkbox
1184 $output .=
1185 '<div'. drupal_attributes($element['children']['#attributes']) .'>'
1186 . drupal_render($element['children'] )
1187 .'</div>'
1188 ;
1189
1190 return $output;
1191 }
1192
1193
1194 /////////////////////////////////////////////
1195 // DEMO
1196 /////////////////////////////////////////////
1197
1198
1199
1200 /**
1201 * Show an interface to import just one file
1202 */
1203 function import_html_demo_form() {
1204 $form = array();
1205
1206 $form = array(
1207 '#method' => 'post',
1208 );
1209
1210 $form['Select'] = array(
1211 '#type' => 'fieldset',
1212 '#title' => t("Select HTML file source"),
1213 );
1214 $form['Select']['source_url'] = array(
1215 '#type' => 'textfield',
1216 '#title' => t("URL to import"),
1217 '#default_value' => drupal_get_path('module', 'import_html') . '/templates/input_example_03.htm',
1218 '#size' => 80,