| 1 |
<?php
|
| 2 |
// $Id: transliteration.module,v 1.7 2009/10/09 12:19:44 smk Exp $
|
| 3 |
|
| 4 |
/**
|
| 5 |
* @file
|
| 6 |
* Provides transliteration for UTF-8 text input and sanitzes file names.
|
| 7 |
*
|
| 8 |
* Uses data from the Text::Unidecode Perl library.
|
| 9 |
* @see http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm
|
| 10 |
*/
|
| 11 |
|
| 12 |
/**
|
| 13 |
* Sanitize a file path.
|
| 14 |
*
|
| 15 |
* Additionally removes invalid characters from a file name (which may include
|
| 16 |
* sub-directories) after transliteration.
|
| 17 |
*
|
| 18 |
* @param $filename
|
| 19 |
* A file name.
|
| 20 |
* @param $source_langcode
|
| 21 |
* Optional ISO 639 language code that denotes the language of the input.
|
| 22 |
* Used to apply language-specific variations and defaults to the current
|
| 23 |
* display language. If transliteration takes place during output (instead
|
| 24 |
* of creation) and the source language is not known at that time, it is
|
| 25 |
* recommended to set this argument to 'en' to produce consistent results
|
| 26 |
* for all enabled languages.
|
| 27 |
* @return
|
| 28 |
* Cleaned file name.
|
| 29 |
*/
|
| 30 |
function transliteration_clean_filename($filename, $source_langcode = NULL) {
|
| 31 |
// Transliterate.
|
| 32 |
$filename = transliteration_get($filename, '', $source_langcode);
|
| 33 |
// Replace whitespace.
|
| 34 |
$filename = str_replace(' ', '_', $filename);
|
| 35 |
// Remove remaining unsafe characters.
|
| 36 |
$filename = preg_replace('![^0-9A-Za-z_./-]!', '', $filename);
|
| 37 |
// Force lowercase to prevent issues on case-insensitive file systems.
|
| 38 |
$filename = strtolower($filename);
|
| 39 |
|
| 40 |
return $filename;
|
| 41 |
}
|
| 42 |
|
| 43 |
/**
|
| 44 |
* Transliterate UTF-8 text to ASCII.
|
| 45 |
*
|
| 46 |
* Takes an input string in any language and character set, and tries to
|
| 47 |
* represent it in ASCII characters by conveying, in Roman letters, the
|
| 48 |
* pronunciation expressed by the text in some other writing system.
|
| 49 |
*
|
| 50 |
* @param $input
|
| 51 |
* UTF-8 text input.
|
| 52 |
* @param $unknown
|
| 53 |
* Replacement string for characters that do not have a suitable ASCII
|
| 54 |
* equivalent.
|
| 55 |
* @param $source_langcode
|
| 56 |
* Optional ISO 639 language code that denotes the language of the input.
|
| 57 |
* Used to apply language-specific variations and defaults to the current
|
| 58 |
* display language. If transliteration takes place during output (instead
|
| 59 |
* of creation) and the source language is not known at that time, it is
|
| 60 |
* recommended to set this argument to 'en' to produce consistent results
|
| 61 |
* for all enabled languages.
|
| 62 |
* @return
|
| 63 |
* Transliterated text.
|
| 64 |
*/
|
| 65 |
function transliteration_get($input, $unknown = '?', $source_langcode = NULL) {
|
| 66 |
module_load_include('inc', 'transliteration');
|
| 67 |
return transliteration_process($input, $unknown, $source_langcode);
|
| 68 |
}
|
| 69 |
|
| 70 |
/**
|
| 71 |
* Implementation of hook_init().
|
| 72 |
*
|
| 73 |
* Transliterate and clean the names of files currently being uploaded.
|
| 74 |
*/
|
| 75 |
function transliteration_init() {
|
| 76 |
if (!empty($_FILES['files'])) {
|
| 77 |
module_load_include('inc', 'transliteration');
|
| 78 |
|
| 79 |
// Figure out language, which is available for node form submits.
|
| 80 |
$langcode = NULL;
|
| 81 |
if (!empty($_POST['language'])) {
|
| 82 |
$languages = language_list();
|
| 83 |
if (isset($languages[$_POST['language']])) {
|
| 84 |
$langcode = $_POST['language'];
|
| 85 |
}
|
| 86 |
}
|
| 87 |
foreach ($_FILES['files']['name'] as $field => $filename) {
|
| 88 |
// Keep a copy of the unaltered file name.
|
| 89 |
$_FILES['files']['orig_name'][$field] = $filename;
|
| 90 |
$_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
|
| 91 |
}
|
| 92 |
}
|
| 93 |
}
|
| 94 |
|