/[drupal]/contributions/modules/transliteration/transliteration.inc
ViewVC logotype

Diff of /contributions/modules/transliteration/transliteration.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.2.2.3, Sat Sep 5 22:25:01 2009 UTC revision 1.2.2.4, Fri Oct 9 12:11:06 2009 UTC
# Line 1  Line 1 
1  <?php  <?php
2  // $Id: transliteration.inc,v 1.2.2.2 2009/06/07 19:05:50 smk Exp $  // $Id: transliteration.inc,v 1.2.2.3 2009/09/05 22:25:01 smk Exp $
3    
4  /**  /**
5   * Sanitize a file name.   * Sanitize a file name.
6   *   *
7   * Transliterates the file name and removes all problematic characters.   * Transliterates the file name and removes invalid characters.
8   *   *
9   * @param string $filename   * @param $filename
10   *   A file name.   *   A file name.
11   * @param string $langcode   * @param $source_langcode
12   *   Optional ISO 639 language code used to import language specific   *   Optional ISO 639 language code that denotes the language of the input.
13   *   replacements. Defaults to the current display language.   *   Used to apply language-specific variations and defaults to the current
14   *   *   display language. If transliteration takes place during output (instead
15   * @return string   *   of creation) and the source language is not known at that time, it is
16     *   recommended to set this argument to 'en' to produce consistent results
17     *   for all enabled languages.
18     * @return
19   *   Cleaned file name.   *   Cleaned file name.
20   */   */
21  function transliteration_clean_filename($filename, $langcode = NULL) {  function transliteration_clean_filename($filename, $source_langcode = NULL) {
22    // Trim any leading/trailing dots.    // Trim any leading/trailing dots.
23    $filename = trim($filename, '.');    $filename = trim($filename, '.');
24    // Transliterate to ASCII.    // Transliterate to ASCII.
25    $filename = transliteration_process($filename, '', $langcode);    $filename = transliteration_process($filename, '', $source_langcode);
26    // Replace whitespace.    // Replace whitespace.
27    $filename = str_replace(' ', '_', $filename);    $filename = str_replace(' ', '_', $filename);
28    // Remove any remaining non-safe characters.    // Remove any remaining non-safe characters.
# Line 31  function transliteration_clean_filename( Line 34  function transliteration_clean_filename(
34  }  }
35    
36  /**  /**
37   * Transliterate UTF-8 input to plain ASCII.   * Transliterate UTF-8 text to ASCII.
38   *   *
39   * Based on Mediawiki's UtfNormal::quickIsNFCVerify().   * Based on Mediawiki's UtfNormal::quickIsNFCVerify().
40   *   *
41   * @param string $string   * @param $string
42   *   UTF-8 text input.   *   UTF-8 text input.
43   * @param string $unknown   * @param $unknown
44   *   Replacement for unknown characters and illegal UTF-8 sequences.   *   Replacement string for characters that do not have a suitable ASCII
45   * @param string $langcode   *   equivalent.
46   *   Optional ISO 639 language code used to import language specific   * @param $source_langcode
47   *   replacements. Defaults to the current display language.   *   Optional ISO 639 language code that denotes the language of the input.
48   *   *   Used to apply language-specific variations and defaults to the current
49   * @return string   *   display language. If transliteration takes place during output (instead
50   *   Plain ASCII output.   *   of creation) and the source language is not known at that time, it is
51   * @see transliteration_get()   *   recommended to set this argument to 'en' to produce consistent results
52     *   for all enabled languages.
53     * @return
54     *   Transliterated text.
55   */   */
56  function transliteration_process($string, $unknown = '?', $langcode = NULL) {  function transliteration_process($string, $unknown = '?', $source_langcode = NULL) {
57    // Screen out some characters that eg won't be allowed in XML.    // Screen out some characters that eg won't be allowed in XML.
58    $string = preg_replace('/[\x00-\x08\x0b\x0c\x0e-\x1f]/', $unknown, $string);    $string = preg_replace('/[\x00-\x08\x0b\x0c\x0e-\x1f]/', $unknown, $string);
59    
# Line 164  function transliteration_process($string Line 170  function transliteration_process($string
170          else if ($n <= 0xfd) {          else if ($n <= 0xfd) {
171            $ord = ($n - 252) * 1073741824 + (ord($sequence{1}) - 128) * 16777216 + (ord($sequence{2}) - 128) * 262144 + (ord($sequence{3}) - 128) * 4096 + (ord($sequence{4}) - 128) * 64 + (ord($sequence{5}) - 128);            $ord = ($n - 252) * 1073741824 + (ord($sequence{1}) - 128) * 16777216 + (ord($sequence{2}) - 128) * 262144 + (ord($sequence{3}) - 128) * 4096 + (ord($sequence{4}) - 128) * 64 + (ord($sequence{5}) - 128);
172          }          }
173          $result .= _transliteration_replace($ord, $unknown, $langcode);          $result .= _transliteration_replace($ord, $unknown, $source_langcode);
174          $head = '';          $head = '';
175        }        }
176        elseif ($c < "\x80") {        elseif ($c < "\x80") {
# Line 189  function transliteration_process($string Line 195  function transliteration_process($string
195  }  }
196    
197  /**  /**
198   * Lookup and replace a character from the transliteration database.   * Load the transliteration database and replace a Unicode character.
  *  
  * @param integer $ord  
  *   A unicode ordinal character code.  
  * @param string $unknown  
  *   Replacement for unknown characters.  
  * @param string $langcode  
  *   Optional ISO 639 language code used to import language specific  
  *   replacements. Defaults to the current display language.  
199   *   *
200   * @return string   * @param $ord
201   *   Plain ASCII replacement character.   *   A ordinal Unicode character code.
202   * @see transliteration_get()   * @param $unknown
203     *   Replacement string for characters that do not have a suitable ASCII
204     *   equivalent.
205     * @param $langcode
206     *   Optional ISO 639 language code that denotes the language of the input.
207     *   Used to apply language-specific optimizations.  Defaults to the current
208     *   display language.
209     * @return
210     *   ASCII replacement character.
211   */   */
212  function _transliteration_replace($ord, $unknown = '?', $langcode = NULL) {  function _transliteration_replace($ord, $unknown = '?', $langcode = NULL) {
213    if (!isset($langcode)) {    if (!isset($langcode)) {

Legend:
Removed from v.1.2.2.3  
changed lines
  Added in v.1.2.2.4

  ViewVC Help
Powered by ViewVC 1.1.2