/[drupal]/contributions/translations/is/extractor.php
ViewVC logotype

Contents of /contributions/translations/is/extractor.php

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (show annotations) (download) (as text)
Wed Nov 9 21:09:11 2005 UTC (4 years ago) by pjetur
Branch: MAIN
CVS Tags: HEAD
Branch point for: DRUPAL-5, DRUPAL-4-6
File MIME type: text/x-php
Adding Icelandic (is) translation directory
1 #!/usr/local/bin/php
2 <?php
3 // $Id: extractor.php,v 1.7 2005/01/08 19:17:33 goba Exp $
4
5 /*
6 Extracts translatable strings from t(), t(,array()) and format_plural()
7 function calls, plus adds some file specific strings. Only literal strings
8 with no embedded variables can be extracted. Outputs a POT file on
9 STDOUT, errors on STDERR
10
11 Copyright (c) 2003, 2004 Alfa21 Outsourcing
12 Created by Jacobo Tarrio <jtarrio [at] alfa21.com>
13 with contributions from Gabor Hojtsy <goba [at] php.net>
14 Licensed under the terms of the GNU General Public License
15 */
16
17 set_time_limit(0);
18 if (!defined("STDERR")) {
19 define("STDERR", fopen("php://stderr", "w"));
20 }
21
22 $argv = $GLOBALS['argv'];
23 array_shift ($argv);
24 if (!count($argv)) {
25 print "Usage: extractor.php file1 [file2 [...]]\n\n";
26 return 1;
27 }
28
29
30 $strings = $file_versions = array();
31
32 foreach ($argv as $file) {
33 $code = file_get_contents($file);
34
35 // Extract raw tokens
36 $raw_tokens = token_get_all($code);
37
38 // Remove whitespace and HTML
39 $tokens = array();
40 $lineno = 1;
41 foreach ($raw_tokens as $tok) {
42 if ((!is_array($tok)) || (($tok[0] != T_WHITESPACE) && ($tok[0] != T_INLINE_HTML))) {
43 if (is_array($tok)) {
44 $tok[] = $lineno;
45 }
46 $tokens[] = $tok;
47 }
48 if (is_array($tok)) {
49 $lineno += count(split("\n", $tok[1])) - 1;
50 } else {
51 $lineno += count(split("\n", $tok)) - 1;
52 }
53 }
54
55 find_t_calls($tokens, $file);
56 find_watchdog_calls($tokens, $file);
57 find_format_plural_calls($tokens, $file);
58
59 find_perm_hook($code, $file);
60 find_node_types_hook($code, $file);
61 find_module_name($code, $file);
62 find_language_names($code, $file);
63 find_version_number($code, $file);
64
65 add_date_strings($file);
66 add_format_interval_strings($file);
67 }
68
69
70 foreach ($strings as $str => $fileinfo) {
71 $occured = $filelist = array();
72 foreach ($fileinfo as $file => $lines) {
73 $occured[] = "$file:" . join(";", $lines);
74 if (isset($file_versions[$file])) {
75 $filelist[] = $file_versions[$file];
76 }
77 }
78
79 $output = "#: " . join(" ", $occured) . "\n";
80 $filename = ((count($occured) > 1) ? 'general' : $file);
81
82 if (strpos($str, "\0") === FALSE) {
83 $output .= "msgid \"$str\"\n";
84 $output .= "msgstr \"\"\n";
85 }
86 else {
87 list ($singular, $plural) = explode("\0", $str);
88 $output .= "msgid \"$singular\"\n";
89 $output .= "msgid_plural \"$plural\"\n";
90 $output .= "msgstr[0] \"\"\n";
91 $output .= "msgstr[1] \"\"\n";
92 }
93 $output .= "\n";
94
95 store($filename, $output, $filelist);
96 }
97
98 write_files();
99
100 function write_files() {
101 $output = store(0, 0, array(), 1);
102 foreach ($output as $file => $content) {
103 if (count($content) <= 11 && $file != 'general') {
104 @$output['general'][1] = array_unique(array_merge($output['general'][1], $content[1]));
105 if (!isset($output['general'][0])) {
106 $output['general'][0] = $content[0];
107 }
108 unset($content[0]);
109 unset($content[1]);
110 foreach ($content as $msgid) {
111 $output['general'][] = $msgid;
112 }
113 unset($output[$file]);
114 }
115 }
116 foreach ($output as $file => $content) {
117 $tmp = preg_replace('<[/]?([a-z]*/)*>', '', $file);
118 $file = str_replace('.', '-', $tmp) .'.pot';
119 $filelist = $content[1]; unset($content[1]);
120 if (count($filelist) > 1) {
121 $filelist = "Generated from files:\n# " . join("\n# ", $filelist);
122 }
123 elseif (count($filelist) == 1) {
124 $filelist = "Generated from file: " . join("", $filelist);
125 }
126 else {
127 $filelist = "No version information was available in the source files.";
128 }
129 $fp = fopen($file, 'w');
130 fwrite($fp, str_replace("--VERSIONS--", $filelist, join("", $content)));
131 fclose($fp);
132 }
133 }
134
135 function store($file = 0, $input = 0, $filelist = array(), $get = 0) {
136 static $storage = array();
137 if (!$get) {
138 if (isset($storage[$file])) {
139 $storage[$file][1] = array_unique(array_merge($storage[$file][1], $filelist));
140 $storage[$file][] = $input;
141 }
142 else {
143 $storage[$file] = array();
144 $storage[$file][0] = write_header($file);
145 $storage[$file][1] = $filelist;
146 $storage[$file][2] = $input;
147 }
148 }
149 else {
150 return $storage;
151 }
152 }
153
154 function write_header($file) {
155 $output = "# LANGUAGE translation of Drupal (". $file .")\n";
156 $output .= "# Copyright YEAR NAME <EMAIL@ADDRESS>\n";
157 $output .= "# --VERSIONS--\n";
158 $output .= "#\n";
159 $output .= "#, fuzzy\n";
160 $output .= "msgid \"\"\n";
161 $output .= "msgstr \"\"\n";
162 $output .= "\"Project-Id-Version: PROJECT VERSION\\n\"\n";
163 $output .= "\"POT-Creation-Date: " . date("Y-m-d H:iO") . "\\n\"\n";
164 $output .= "\"PO-Revision-Date: YYYY-mm-DD HH:MM+ZZZZ\\n\"\n";
165 $output .= "\"Last-Translator: NAME <EMAIL@ADDRESS>\\n\"\n";
166 $output .= "\"Language-Team: LANGUAGE <EMAIL@ADDRESS>\\n\"\n";
167 $output .= "\"MIME-Version: 1.0\\n\"\n";
168 $output .= "\"Content-Type: text/plain; charset=utf-8\\n\"\n";
169 $output .= "\"Content-Transfer-Encoding: 8bit\\n\"\n";
170 $output .= "\"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\\n\"\n\n";
171
172 return $output;
173 }
174
175 function format_quoted_string($str) {
176 $quo = substr($str, 0, 1);
177 $str = substr($str, 1, -1);
178 if ($quo == '"') {
179 $str = stripcslashes($str);
180 } else {
181 $str = strtr($str, array("\\'" => "'", "\\\\" => "\\"));
182 }
183 return addcslashes($str, "\0..\37\\\"");
184 }
185
186 function marker_error($file, $line, $marker, $tokens) {
187 fwrite(STDERR, "Invalid marker content in $file:$line\n* $marker(");
188 array_shift($tokens); array_shift($tokens);
189 $par = 1;
190 while (count($tokens) && $par) {
191 if (is_array($tokens[0])) {
192 fwrite(STDERR, $tokens[0][1]);
193 } else {
194 fwrite(STDERR, $tokens[0]);
195 if ($tokens[0] == "(") {
196 $par++;
197 }
198 if ($tokens[0] == ")") {
199 $par--;
200 }
201 }
202 array_shift($tokens);
203 }
204 fwrite(STDERR, "\n\n");
205 }
206
207 /*
208 Detect all occurances of one of these sequences:
209 T_STRING("t") + "(" + T_CONSTANT_ENCAPSED_STRING + ")"
210 T_STRING("t") + "(" + T_CONSTANT_ENCAPSED_STRING + ","
211 */
212 function find_t_calls($tokens, $file) {
213 global $strings;
214
215 while (count($tokens) > 3) {
216
217 list($ctok, $par, $mid, $rig) = $tokens;
218 if (!is_array($ctok)) {
219 array_shift($tokens);
220 continue;
221 }
222 list($type, $string, $line) = $ctok;
223
224 if (($type == T_STRING) && ($string == "t") && ($par == "(")) {
225
226 if (in_array($rig, array(")", ","))
227 && (is_array($mid) && ($mid[0] == T_CONSTANT_ENCAPSED_STRING))) {
228
229 $strings[format_quoted_string($mid[1])][$file][] = $line;
230 }
231
232 // t() found, but inside is something which is not a string literal
233 else {
234 marker_error($file, $line, "t", $tokens);
235 }
236 }
237 array_shift($tokens);
238 }
239 }
240
241 /*
242 Detect all occurances this sequence:
243 T_STRING("format_plural") + "(" + ..anything (might be more tokens).. +
244 "," + T_CONSTANT_ENCAPSED_STRING +
245 "," + T_CONSTANT_ENCAPSED_STRING + ")"
246 */
247 function find_format_plural_calls($tokens, $file) {
248 global $strings;
249
250 while (count($tokens) > 7) {
251
252 list($ctok, $par1) = $tokens;
253 if (!is_array($ctok)) {
254 array_shift($tokens);
255 continue;
256 }
257 list($type, $string, $line) = $ctok;
258
259 if (($type == T_STRING) && ($string == "format_plural") && ($par1 == "(")) {
260
261 // Eat up everything that is used as the first parameter
262 $nt = $tokens;
263 array_shift($nt); array_shift($nt);
264 $depth = 0;
265 while (!($nt[0] == "," && $depth == 0)) {
266 if ($nt[0] == "(") {
267 $depth++;
268 }
269 elseif ($nt[0] == ")") {
270 $depth--;
271 }
272 array_shift($nt);
273 }
274
275 // Get further parameters
276 list($comma1, $singular, $comma2, $plural, $par2) = $nt;
277
278 if (($comma2 == ",") && ($par2 == ")") &&
279 (is_array($singular) && ($singular[0] == T_CONSTANT_ENCAPSED_STRING)) &&
280 (is_array($plural) && ($plural[0] == T_CONSTANT_ENCAPSED_STRING))) {
281
282 $strings[format_quoted_string($singular[1]) .
283 "\0" .
284 format_quoted_string($plural[1])][$file][] = $line;
285 }
286
287 // format_plural() found, but the parameters are not correct
288 else {
289 marker_error($file, $line, "format_plural", $tokens);
290 }
291 }
292 array_shift($tokens);
293 }
294 }
295
296 /*
297 Detect all occurances of this sequence:
298 T_STRING("watchdog") + "(" + T_CONSTANT_ENCAPSED_STRING + ","
299 */
300 function find_watchdog_calls($tokens, $file) {
301 global $strings;
302
303 while (count($tokens) > 3) {
304
305 list($ctok, $par, $mid, $rig) = $tokens;
306 if (!is_array($ctok)) {
307 array_shift($tokens);
308 continue;
309 }
310 list($type, $string, $line) = $ctok;
311
312 if (($type == T_STRING) && ($string == "watchdog") && ($par == "(")) {
313
314 if (($rig == ",")
315 && (is_array($mid) && ($mid[0] == T_CONSTANT_ENCAPSED_STRING))) {
316
317 $strings[format_quoted_string($mid[1])][$file][] = $line;
318 }
319
320 // watchdog() found, but inside is something which is not a string literal
321 else {
322 marker_error($file, $line, "watchdog", $tokens);
323 }
324 }
325 array_shift($tokens);
326 }
327 }
328
329 // This will get confused if a similar pattern is found in a comment...
330 function find_perm_hook($code, $file) {
331 global $strings;
332
333 if (preg_match('!^(.+function \\w+_perm\\(\\) \\{\s+return)([^\\}]+)\\}!Us', $code, $hook_code)) {
334 $lines = substr_count($hook_code[1], "\n") + 1;
335 preg_match_all('!(["\'])([a-z ]+)\1!', $hook_code[2], $items, PREG_PATTERN_ORDER);
336 foreach ($items[2] as $item) {
337 $strings[$item][$file][] = $lines;
338 }
339 }
340 }
341
342 // This will also get confused if a similar pattern is found in a comment...
343 function find_node_types_hook($code, $file) {
344 global $strings;
345
346 if (preg_match('!^(.+function \\w+_node_types\\(\\) \\{\s+return)([^\\}]+)\\}!Us', $code, $hook_code)) {
347 $lines = substr_count($hook_code[1], "\n") + 1;
348 preg_match_all('!(["\'])([0-9a-z-]+)\1!', $hook_code[2], $items, PREG_PATTERN_ORDER);
349 foreach ($items[2] as $item) {
350 $strings[$item][$file][] = $lines;
351 }
352 }
353 }
354
355 // This will get confused if a similar pattern is found in a comment...
356 function find_module_name($code, $file) {
357 global $strings;
358
359 if (preg_match('!function (\\w+)_help\\(!', $code, $module_name) &&
360 $module_name[1] != 'menu_get_active') {
361 $strings[$module_name[1]][$file][] = 0;
362 }
363 }
364
365 function find_language_names($code, $file) {
366 global $strings;
367
368 if (preg_match("!locale\\.inc$!", $file) &&
369 preg_match("!^(.+function _locale_get_iso639_list\\(\\) {)([^\\}]+)\\}!Us", $code, $langcodes)) {
370 $lines = substr_count($langcodes[1], "\n") + 1;
371 preg_match_all('!array\\((["\'])([^\'"]+)\1!', $langcodes[2], $items, PREG_PATTERN_ORDER);
372 foreach ($items[2] as $item) {
373 $strings[$item][$file][] = $lines;
374 }
375 }
376 }
377
378 // Get the exact version number from the file, so we can push that into the pot
379 function find_version_number($code, $file) {
380 global $file_versions;
381
382 // Prevent CVS from replacing this pattern with actual info
383 if (preg_match('!\\$I' . 'd: ([^\\$]+) Exp \\$!', $code, $version_info)) {
384 $file_versions[$file] = $version_info[1];
385 }
386 }
387
388 // Add date strings if locale.module is parsed
389 function add_date_strings($file) {
390 global $strings;
391
392 if (preg_match('!(^|/)locale.module$!', $file)) {
393 for ($i = 1; $i <= 12; $i++) {
394 $stamp = mktime(0, 0, 0, $i, 1, 1971);
395 $strings[date("F", $stamp)][$file][] = 0;
396 $strings[date("M", $stamp)][$file][] = 0;
397 }
398
399 for ($i = 0; $i <= 7; $i++) {
400 $stamp = $i * 86400;
401 $strings[date("D", $stamp)][$file][] = 0;
402 $strings[date("l", $stamp)][$file][] = 0;
403 }
404 }
405 }
406
407 // Add format_interval special strings if common.inc is parsed
408 function add_format_interval_strings($file) {
409 global $strings;
410
411 if (preg_match('!(^|/)common.inc$!', $file)) {
412 $components = array(
413 '1 year' => '%count years',
414 '1 week' => '%count weeks',
415 '1 day' => '%count days',
416 '1 hour' => '%count hours',
417 '1 min' => '%count min',
418 '1 sec' => '%count sec');
419
420 foreach($components as $singular => $plural) {
421 $strings[$singular."\0".$plural][$file][] = 0;
422 }
423 }
424 }
425
426 return;
427
428 // These are never executed, you can run extractor.php on itself to test it
429 $a = t("Test string 1" );
430 $b = t("Test string 2 %string", array("%string" => "how do you do"));
431 $c = t('Test string 3');
432 $d = t("Special\ncharacters");
433 $e = t('Special\ncharacters');
434 $f = t("Embedded $variable");
435 $g = t('Embedded $variable');
436 $h = t("more \$special characters");
437 $i = t('even more \$special characters');
438 $j = t("Mixed 'quote' \"marks\"");
439 $k = t('Mixed "quote" \'marks\'');
440 $l = t('This is some repeating text');
441 $m = t("This is some repeating text");
442 $n = t(embedded_function_call());
443 $o = format_plural($days, "one day", "%count days");
444 $p = format_plural(embedded_function_call($count), "one day", "%count days");
445
446 function embedded_function_call() { return 12; }
447
448 function extractor_perm() {
449 return array("access extrator data", 'administer extractor data');
450 }
451
452 function extractor_help($section = 'default') {
453 watchdog('help', t('Help called'));
454 return t('This is some help');
455 }
456
457 function extractor_node_types() {
458 return array("extractor-cooltype", "extractor-evencooler");
459 }
460
461 ?>

  ViewVC Help
Powered by ViewVC 1.1.2