/[drupal]/contributions/modules/wordpress_import/wordpress_import.module
ViewVC logotype

Contents of /contributions/modules/wordpress_import/wordpress_import.module

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download) (as text)
Sat Dec 29 03:41:19 2007 UTC (22 months, 4 weeks ago) by yrocq
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +14 -2 lines
File MIME type: text/x-php
#204535 by yrocq : Wordpress posts with 'draft' or 'private' state are imported unpublished.
1 <?php
2 /* $Id: wordpress_import.module,v 1.3 2007/12/29 01:19:24 yrocq Exp $ */
3 /*
4 Wordpress Import Drupal module : Import a Wordpress WXR file into Drupal
5 Copyright (C) 2007 Yann Rocq <yann@rocq.net> - http://www.rocq.net/yann/
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22 /**
23 * Implementation of hook_help()
24 */
25
26 function wordpress_import_help($section='') {
27 $output = '';
28
29 switch ($section) {
30 case "admin/help#wordpress_import":
31 $output = '<p>'. t("This module import a WXR file generated by Worpdress blog software into Drupal"). '</p>';
32 break;
33 }
34
35 return $output;
36 }
37
38 /**
39 * Implementation of hook_menu()
40 */
41
42 function wordpress_import_menu() {
43 global $user;
44 $items = array();
45
46 $items[] = array(
47 'path' => 'admin/content/wordpress_import',
48 'title' => t('Wordpress import'),
49 'description' => t('Import Wordpress WXR file'),
50 'callback' => 'wordpress_import_home',
51 'access' => ($user->uid == 1),
52 'type' => MENU_NORMAL_ITEM
53 );
54
55 return $items;
56 }
57
58 /**
59 * Implementation of hook_form()
60 */
61
62 function wordpress_import_form($form_values=NULL)
63 {
64 // This is a multistep form
65
66 if (!isset($form_values)) {
67 $step = 1;
68 }
69 else {
70 $step = $form_values['step'] + 1;
71 }
72
73 $form['step'] = array(
74 '#type' => 'hidden',
75 '#value' => $step,
76 );
77
78 switch ($step)
79 {
80 // Step 1 : Upload form
81
82 case 1:
83 unset($_SESSION['wordpress_import']);
84 $form['informations'] = array(
85 '#value' => t('This module will import a WXR file generated by Wordpress. You MUST make a backup copy of your Drupal Database before proceeding')
86 );
87 $form['upload'] = array(
88 '#type' => 'file',
89 '#title' => t('Upload your WXR file'),
90 '#size' => 40
91 );
92 break;
93
94 // Step 2 : Options setting
95
96 case 2:
97 $wordpress = wordpress_import_items($_SESSION['wordpress_import']['file_path']);
98 $users = wordpress_import_get_users($wordpress);
99
100 // Users mapping
101
102 $form['users_map'] = array(
103 '#title' => t('Users mapping'),
104 '#type' => 'fieldset',
105 '#description' => t('Map each wordpress user to a drupal user'),
106 '#collapsible' => TRUE,
107 '#tree' => TRUE
108 );
109
110 foreach ($users as $users_value)
111 {
112 $form['users_map'][$users_value] = array(
113 '#type' => 'select',
114 '#title' => $users_value,
115 '#options' => wordpress_import_drupal_users_list()
116 );
117 }
118
119
120 // Options
121
122 $form['options'] = array(
123 '#type' => 'fieldset',
124 '#title' => t('Options'),
125 '#collapsible' => TRUE
126 );
127
128 $form['options']['alias'] = array(
129 '#type' => 'checkbox',
130 '#title' => 'Create path aliases',
131 '#description' => t('This option try to preserve the path of the wordpress original posts. It is useful only if the url of your drupal site root is the same as the worpdress site (!root) and if clean urls are activated', array('!root' => $wordpress['link']))
132 );
133 break;
134
135 case 3:
136 $form['submit'] = array('#value' => t('The Wordpress blog has been successfully imported'));
137 break;
138 }
139
140 // Form settings
141
142 if ($step < 3)
143 {
144 $form['submit'] = array('#type' => 'submit', '#value' => t('Next'));
145 }
146
147 $form['#attributes']['enctype'] = 'multipart/form-data';
148 $form['#multistep'] = TRUE;
149 $form['#redirect'] = FALSE;
150
151 return $form;
152 }
153
154 /**
155 * Implementation of hook_validate()
156 */
157
158 function wordpress_import_form_validate($form_id, $form_values)
159 {
160 switch ($form_values['step']) {
161 case 1:
162 if (!file_check_upload())
163 {
164 form_set_error('upload', t('You must provide a WXR file.'));
165 }
166 break;
167 }
168 }
169
170 /**
171 * Implementation of hook_submit()
172 */
173
174 function wordpress_import_form_submit($form_id, $form_values)
175 {
176 switch ($form_values['step']) {
177 // Step 1 : upload file
178 case '1':
179 $wordpress_path = file_create_path('wordpress');
180 file_check_directory($wordpress_path,TRUE);
181 $file_info = file_save_upload('upload', $wordpress_path.'/import.xml');
182 $_SESSION['wordpress_import']['file_path'] = $file_info->filepath;
183 break;
184
185 // Step 2 : import blog
186
187 case 2:
188 $_SESSION['wordpress_import']['users_map'] = $form_values['users_map'];
189 $_SESSION['wordpress_import']['alias'] = $form_values['alias'];
190
191 wordpress_import_import_blog($_SESSION['wordpress_import']);
192 break;
193 }
194 }
195
196 /**
197 * Display Wordpress import form
198 */
199
200 function wordpress_import_home()
201 {
202 $output = drupal_get_form('wordpress_import_form');
203 return $output;
204 }
205
206 /**
207 * Import blog
208 * @param array $params parameters
209 */
210
211 function wordpress_import_import_blog($params)
212 {
213 $wordpress = wordpress_import_items($params['file_path']);
214 $params = array_merge($params, wordpress_import_import_categories($wordpress));
215 $params['tags_vocabulary'] = wordpress_import_create_tags_vocabulary();
216 wordpress_import_posts($wordpress, $params);
217 }
218
219 /**
220 * Import categories
221 * @param array $wordpress wordpress data
222 * @return array categories mapping and taxonomy id
223 */
224
225 function wordpress_import_import_categories($wordpress)
226 {
227 // Create new taxonomy for wordpress categories
228
229 $category_vocabulary = array(
230 'name' => t('Wordpress category'),
231 'hierarchy' => 2,
232 'nodes' => array('page' => 'page', 'story' => 'story') // TODO: Adapt to chosen types
233 );
234
235 taxonomy_save_vocabulary($category_vocabulary);
236
237 // Import categories
238
239 foreach ($wordpress['categories'] as $key => $value) {
240 $category_name = wordpress_import_get_tag($value, 'wp:cat_name');
241 $category_parent = wordpress_import_get_tag($value, 'wp:category_parent');
242 $category_term = array(
243 'name' => $category_name,
244 'vid' => $category_vocabulary['vid']
245 );
246
247 taxonomy_save_term($category_term);
248
249 // Save mapping between Wordpress and Drupal categories
250
251 $category_mapping[$category_name] = $category_term['tid'];
252 }
253
254 return array (
255 'categories_map' => $category_mapping,
256 'categories_vocabulary' => $category_vocabulary['vid']
257 );
258 }
259
260 /**
261 * Create taxonomy for tags
262 * @return integer new taxonomy id
263 */
264
265 function wordpress_import_create_tags_vocabulary()
266 {
267 $tag_vocabulary = array(
268 'name' => t('Wordpress tag'),
269 'tags' => 1,
270 'nodes' => array('page' => 'page', 'story' => 'story') // TODO: Adapt to chosen types
271 );
272
273 taxonomy_save_vocabulary($tag_vocabulary);
274
275 return $tag_vocabulary['vid'];
276 }
277
278 /**
279 * Import posts and create clean urls if required
280 * @param array $wordpress wordpress data
281 * @param array $params parameters
282 */
283
284 function wordpress_import_posts($wordpress, $params)
285 {
286 $params['format'] = wordpress_import_create_format();
287
288 foreach ($wordpress['posts'] as $post_value) {
289 $post_info = wordpress_import_post($post_value, $params);
290
291 if ($post_info && $params['alias'])
292 {
293 $link = wordpress_import_get_tag($post_value, 'link');
294 $link = substr($link,strlen($wordpress['link']));
295 $link = rtrim($link,'/');
296 path_set_alias('node/'.$post_info['nid'], $link);
297 }
298 }
299 }
300
301 /**
302 * Import a post
303 * @param array $post post data
304 * @param array $params parameters
305 * @return array post infos
306 */
307
308 function wordpress_import_post($post, $params)
309 {
310 $uid = $params['users_map'][wordpress_import_get_tag( $post, 'dc:creator' )];
311
312 // Don't import the post if the user hasn't been selected
313
314 if(!$uid)
315 return FALSE;
316
317 // Get post type : blog entry (post) or page
318 // Posts are promoted in front page but not the pages
319
320 $post_type = wordpress_import_get_tag($post,'wp:post_type');
321
322 switch ($post_type)
323 {
324 case 'post':
325 $type = 'story';
326 $promote = '1';
327 break;
328 case 'page':
329 $type = 'page';
330 $promote = '0';
331 break;
332 default:
333 return;
334 }
335
336 // Parsing categories
337 // TODO : support multilevel categories
338
339 preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
340 $categories = $categories[1];
341
342 foreach ($categories as $key => $value) {
343 $category = str_replace(array ('<![CDATA[', ']]>'), '', $value);
344 $drupal_category = $params['categories_map'][$category];
345
346 if (!empty($drupal_category)) {
347 if (is_array($drupal_category))
348 {
349 $drupal_categories[$params['categories_vocabulary']] = $drupal_categories;
350 }
351 else
352 {
353 $drupal_categories[$params['categories_vocabulary']] = array($drupal_category);
354 }
355 }
356 else
357 {
358 $drupal_tags[] = $category;
359 }
360 }
361
362 // Parsing tags
363
364 preg_match_all('|<category domain=\"tag\">(.*?)</category>|is', $post, $tags);
365
366 $tags=$tags[1];
367
368 foreach ($tags as $key => $value) {
369 $drupal_tags[] = str_replace(array ('<![CDATA[', ']]>'), '', $value);
370 }
371
372 if(is_array($drupal_tags))
373 {
374 $drupal_categories['tags'][$params['tags_vocabulary']] = join(',', $drupal_tags);
375 }
376
377 // Defining status
378
379 switch (wordpress_import_get_tag($post,'wp:status'))
380 {
381 case 'draft':
382 case 'private':
383 $status = FALSE;
384 break;
385 default:
386 $status = TRUE;
387 }
388
389 // Inserting node
390
391 $node = array(
392 'type' => $type,
393 'teaser' => wordpress_import_get_tag($post,'content:encoded'),
394 'uid' => $uid,
395 'title' => html_entity_decode(wordpress_import_get_tag($post,'title'), ENT_COMPAT, 'UTF-8'),
396 'body' => wordpress_import_get_tag($post,'content:encoded'),
397 'format' => $params['format'],
398 'status' => $status,
399 'promote' => $promote,
400 'created' => strtotime(wordpress_import_get_tag($post,'wp:post_date')),
401 'comment' => wordpress_import_get_tag($post,'wp:comment_status')=='open'?COMMENT_NODE_READ_WRITE:COMMENT_NODE_READ_ONLY,
402 'taxonomy'=> $drupal_categories
403 );
404
405 $node = (object)$node;
406 node_save($node);
407 $return['nid'] = $node->nid;
408
409 // Importing comments
410
411 $drupal_comment = array();
412 preg_match_all('|<wp:comment>(.*?)</wp:comment>|is', $post, $comments);
413 $comments = $comments[1];
414
415 $comment_id = 0;
416
417 foreach ($comments as $comment_key => $comment)
418 {
419 switch(wordpress_import_get_tag($comment,'wp:comment_approved'))
420 {
421 case '0':
422 $status = COMMENT_NOT_PUBLISHED;
423 break;
424 case '1':
425 $status = COMMENT_PUBLISHED;
426 break;
427 case 'spam':
428 continue 2;
429 }
430
431 $timestamp = strtotime(wordpress_import_get_tag($comment,'wp:comment_date'));
432 $comment_content = wordpress_import_get_tag($comment,'wp:comment_content');
433
434 $drupal_comment[$timestamp] = array(
435 'nid' => $node->nid,
436 'pid' => 0,
437 'comment' => $comment_content,
438 'subject' => trim(truncate_utf8(decode_entities(strip_tags($comment_content)), 29, TRUE)),
439 'name' => decode_entities(strip_tags(wordpress_import_get_tag($comment,'wp:comment_author'))),
440 'mail' => wordpress_import_get_tag($comment,'wp:comment_author_email'),
441 'homepage' => wordpress_import_get_tag($comment,'wp:comment_author_url'),
442 'timestamp' => $timestamp,
443 'hostname' => wordpress_import_get_tag($comment,'wp:comment_author_IP'),
444 'status' => $status,
445 'format' => $param['format']
446 );
447 }
448
449 // Sorting comments by timestamp so the thread won't be messed up in Drupal
450
451 ksort($drupal_comment);
452
453 // Saving comments
454
455 foreach ($drupal_comment as $drupal_comment_key => $drupal_comment_value)
456 {
457 $comment_id = comment_save($drupal_comment_value);
458
459 // Updating fields that haven't been set by comment_save
460
461 db_query("UPDATE {comments} SET hostname= '%s', timestamp= '%d', status= '%d' WHERE cid= %d", $drupal_comment_value['hostname'], $drupal_comment_value['timestamp'], $drupal_comment_value['status'], $comment_id);
462 }
463
464 return $return;
465 }
466
467 // Drupal help functions
468
469 /**
470 *
471 */
472
473 function wordpress_import_directory()
474 {
475 return file_create_path(file_directory_path().'/'.'wordpress');
476 }
477
478 /**
479 * Get list of users for the import form
480 * @return array users
481 */
482
483 function wordpress_import_drupal_users_list()
484 {
485 $sql = "SELECT * FROM {users}";
486 $result = db_query($sql);
487
488 $users[0] = t('Do not import');
489
490 while ($user = db_fetch_array($result))
491 {
492 if (!empty($user['uid']))
493 {
494 $users[$user['uid']] = $user['name'];
495 }
496 }
497 return $users;
498 }
499
500 /**
501 * Create an input format for wordpress or fetch an existing one.
502 * @return integer format id
503 */
504
505 function wordpress_import_create_format()
506 {
507 $filter_name = 'Wordpress';
508 $result = db_fetch_object(db_query("SELECT format FROM {filter_formats} WHERE name='%s'", $filter_name));
509 if ($result) {
510 $format = $result->format;
511 }
512 else
513 {
514 db_query("INSERT INTO {filter_formats} (name,cache) VALUES ('%s',1)", $filter_name);
515 $format = db_result(db_query("SELECT MAX(format) AS format FROM {filter_formats}"));
516 db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (%d, '%s', %d, %d)", $format, 'filter', 2, 0);
517 }
518
519 return $format;
520 }
521
522 // Wordpress importing functions
523 // Adapted from wordpress code (http://wordpress.org/) released under GNU GENERAL PUBLIC LICENSE
524
525 /**
526 * Parse a WXR file into an array
527 * @param string $file file path
528 * @return array wordpress data
529 */
530
531 function wordpress_import_items($file)
532 {
533 $wordpress = array();
534
535 $fp = fopen($file, 'r');
536 if ($fp) {
537 while ( !feof($fp) ) {
538 $importline = rtrim(fgets($fp));
539 if ( false !== strpos($importline, '<wp:category>') ) {
540 preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
541 $wordpress['categories'][] = $category[1];
542 continue;
543 }
544 if ( false !== strpos($importline, '<wp:tag>') ) {
545 preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
546 $wordpress['tags'][] = $tag[1];
547 continue;
548 }
549 if ( false !== strpos($importline, '<item>') ) {
550 $wordpress['posts'][$num] = '';
551 $doing_entry = true;
552 continue;
553 }
554 if ( false !== strpos($importline, '</item>') )
555 {
556 $num++;
557 $doing_entry = false;
558 continue;
559 }
560
561 if ( $doing_entry ) {
562 $wordpress['posts'][$num] .= $importline . "\n";
563 }
564 else
565 {
566 if ( false !== strpos($importline, '<link>') ) {
567 preg_match('|<link>(.*?)</link>|is', $importline, $link);
568 $wordpress['link'] = $link[1];
569 if (substr($wordpress['link'],-1) != '/')
570 {
571 $wordpress['link'] .= '/';
572 }
573 }
574 }
575 }
576 fclose($fp);
577 }
578
579 return $wordpress;
580 }
581
582 /**
583 * Import author from wordpress data
584 *
585 * @param array $wordpress wordpress data
586 * @return array author list
587 */
588
589
590 function wordpress_import_get_users($wordpress)
591 {
592 $temp = array();
593
594 foreach ($wordpress['posts'] as $post) {
595 if ('' != trim($post)) {
596 ++ $i;
597 $author_name = wordpress_import_get_tag( $post, 'dc:creator' );
598 $author[$author_name] = $author_name;
599 }
600 }
601
602 return $author;
603 }
604
605 /**
606 * Extract data from a xml tag
607 *
608 * @param string $string xml snippet
609 * @param string $tag tag to extract
610 * @return string content of the tag
611 */
612
613 function wordpress_import_get_tag($string, $tag)
614 {
615 preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return);
616 $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]);
617 // $return = $wpdb->escape( trim( $return ) );
618 // TODO : echapper le tag
619 return $return;
620 }

  ViewVC Help
Powered by ViewVC 1.1.2