/[drupal]/contributions/modules/geo/includes/shp2sql.inc
ViewVC logotype

Contents of /contributions/modules/geo/includes/shp2sql.inc

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (show annotations) (download) (as text)
Thu Sep 25 21:51:33 2008 UTC (14 months ago) by vauxia
Branch: MAIN
CVS Tags: DRUPAL-6--1-0-ALPHA1, HEAD
Branch point for: DRUPAL-6--1
Changes since 1.1: +10 -4 lines
File MIME type: text/x-php
Restore PostGIS functinality:
  - Return PostgreSQL-supported column definitions for db_create_table in
    shp2sql code
	- Account for SRID's now that we have a database that notices
  - Remove no-longer-supported db_* functions from postgis.inc
  - Abstract field_query code in views handlers, and implement an indexed
    ( postgis ) and non-indexed ( mysql ) version in the backend code.
1 <?php // $Id: shp2sql.inc,v 1.1 2008/08/20 15:06:39 vauxia Exp $
2
3 /**
4 * CREDITS
5 * This shp and dbf parsing code is based on the following documentation:
6 * http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
7 * http://en.wikipedia.org/wiki/SHP_file
8 * http://www.phpdig.net/ref/rn45re878.html
9 * http://www.clicketyclick.dk/databases/xbase/format/dbf.html
10 *
11 * I also borrowed heavily from existing code by Juan Carlos Gonzalez Ulloa:
12 * http://sourceforge.net/projects/shp2mysql-php
13 */
14 function geo_shp2sql($filename, $table_name = '', $create_table = TRUE, $srid = GEO_SRID_DEFAULT) {
15 if (!$table_name) {
16 $parts = pathinfo($filename);
17 $table_name = $parts['filename'];
18 }
19 $table_name = preg_replace('/[^a-z0-9_]/', '', strtolower($table_name));
20
21 if (!is_readable($filename) || (!is_resource($zip = zip_open($filename)))) {
22 drupal_set_message(t('zip file nonexistant or unreadable.'), 'error');
23 return FALSE;
24 }
25
26 // Catalog the contents of the zip file.
27 $files = array();
28 while ($res = zip_read($zip)) {
29 $ext = strtolower(substr(strrchr(zip_entry_name($res), '.'), 1));
30 $files[$ext] = $res;
31 }
32
33 // The zip file must minimally contain a shp, dbf and shx file.
34 if (!isset($files['shp']) || !isset($files['dbf']) || !isset($files['shx'])) {
35 drupal_set_message(t('This does not appear to be an archive that contains valid shp data.'), 'error');
36 return FALSE;
37 }
38
39 // See if we can get the projection information from the prj file.
40 if (isset($files['prj'])) {
41 // TODO Parse srid from prj file, for conversion if necessary.
42 }
43
44 if (isset($files['dbt'])) {
45 drupal_set_message(t('This database contains longtext/memo fields, which are unsupported. Proceeding to enter other data anyway.'), 'warning');
46 }
47
48 // Get headers and table definition from the dbf file.
49 $schema = array('description' => t('Geo data table'), 'fields' => array());
50 $headers = _dbf_headers($files['dbf'], $schema);
51
52 // Shape file headers are stored in the first 100 bytes of the shp/shx files.
53 $headers += _shp_headers(zip_entry_read($files['shp'], 100));
54
55 // Create a new table based on the data we've obtained thus far.
56 if ($create_table) {
57 $ret = array();
58 if (db_table_exists($table_name)) {
59 db_drop_table($ret, $table_name);
60 }
61 db_create_table($ret, $table_name, $schema);
62 }
63
64 // Add the geo column that will contain shape data.
65 $spec = array(
66 'type' => strtolower($headers['geo_type']),
67 'not null' => TRUE,
68 'description' => t('Geometry field'),
69 'srid' => $srid,
70 );
71 geo('add_field', $ret, $table_name, 'geo', $spec);
72 $fields = join(', ', array_keys($headers['field_lengths'])) .', geo';
73
74 $row_count = 0;
75 while ($shp = _shp_get_record($files['shp'])) {
76 $values = array();
77 $dbf_data = zip_entry_read($files['dbf'], $headers['record_size']);
78 $dbf_data = substr($dbf_data, 1); // Remove "record deleted" flag.
79 foreach($headers['field_lengths'] as $name => $length) {
80 $value = substr($dbf_data, 0, $length);
81 $dbf_data = substr($dbf_data, $length);
82
83 // Use a predefined function to filter and process each value.
84 $process = $headers['field_filters'][$name];
85 $values[] = $process($value);
86 }
87
88 // Add the geometry text.
89 $values[] = "GeomFromText('". $shp['data']['wkt'] ."', $srid)";
90
91 // We've already handled sanitization of table_name, column names and
92 // values, so we're going to skip the substitutions.
93 db_query("INSERT INTO {$table_name} ( $fields )
94 VALUES (". join(', ', $values) .")");
95
96 $row_count++;
97 }
98
99 drupal_set_message(t('Import complete. @count rows imported into @table', array('@count' => $row_count, '@table' => $table_name)));
100 }
101
102 function _shp_geo_types($key = null) {
103 $geo_types = array(
104 0 => 'none',
105 1 => 'point',
106 3 => 'linestring', //TODO this is specified as 'polyline' in the standard.
107 5 => 'polygon',
108 8 => 'multipoint',
109 11 => 'pointz',
110 13 => 'polylinez',
111 15 => 'polygonz',
112 18 => 'multipointz',
113 21 => 'pointm',
114 23 => 'polylinem',
115 25 => 'polygonm',
116 28 => 'multipointm',
117 31 => 'multipatch',
118 );
119
120 if ($key) return $geo_types[$key];
121 return $geo_types;
122 }
123
124 function _dbf_headers(&$fp, &$schema) {
125
126 // Crack open the dbf file for processing.
127 $data = zip_entry_read($fp, 32);
128
129 // Set basic headers.
130 $headers = unpack('H2db_id/Cy/Cm/Cd/Lcount/Sheader_size/Srecord_size', $data);
131 $date = $headers['m'] .'/'. $headers['d'] .'/'. $headers['y'] + 1900;
132 $headers['date'] = strtotime($date);
133
134 // Get the remainder of the dbf headers.
135 $field_data = zip_entry_read($fp, $headers['header_size'] - 32);
136
137 $type_map = array(
138 'C' => 'char',
139 'N' => 'int',
140 'L' => 'int',
141 'D' => 'date',
142 'M' => 'text',
143 'F' => 'float',
144 'B' => 'blob',
145 'Y' => 'decimal',
146 'P' => 'blob',
147 'I' => 'int',
148 'Y' => 'decimal',
149 'T' => 'datetime',
150 'V' => 'varchar',
151 'X' => 'varchar',
152 '@' => 'timestamp',
153 '0' => 'decimal',
154 '+' => 'serial',
155 );
156
157 // Gather the column definitions from the field headers.
158 $mask = 'A11name/Atype/x4/Clength/Cprecision';
159 while (strlen($field_data) >= 32) {
160 $d = unpack($mask, $field_data);
161 $field_data = substr($field_data, 32);
162
163 // Field name.
164 $name = drupal_convert_to_utf8(strtolower(trim($d['name'])), 'ascii');
165 $name = preg_replace('/[^a-z0-9_]/', '', $name);
166
167 // Datatype.
168 $type = isset($type_map[$d['type']]) ? $type_map[$d['type']] : 'varchar';
169 if ($type == 'char' && $d['length'] > 3) $type = 'varchar';
170
171 // Default data processing function, which will escape and UTF8 convert.
172 $filter = '_shp_data_text';
173
174 // Use all-purpose PHP functions for standard numeric types.
175 if (in_array($type, array('float', 'decimal'))) $filter = 'floatval';
176 if (in_array($type, array('int', 'timestamp', 'serial'))) $filter = 'intval';
177 if ($type == 'date') $filter = '_shp_data_date';
178
179 // Special case for boolean, convert to int.
180 if ($d['type'] == 'L') $filter = '_shp_data_bool';
181
182 $headers['field_filters'][$name] = $filter;
183
184 // Datatype futzing.
185 $schema['fields'][$name] = array('type' => $type, 'length' => $d['length']);
186 if ($d['precision']) {
187 $schema['fields'][$name]['not null'] = TRUE;
188 $schema['fields'][$name]['default'] = 0.0;
189 }
190
191 // Remove precision from simple types.
192 if (in_array($type, array('int', 'float', 'real', 'timestamp'))) {
193 unset($schema['fields'][$name]['length']);
194 }
195
196 // Store field length for processing.
197 $headers['field_lengths'][$name] = $d['length'];
198 }
199
200 unset($headers['y'], $headers['m'], $headers['d'], $headers['header_size']);
201 return $headers;
202 }
203
204 function _shp_data_text($value) {
205 $value = db_escape_string(trim(drupal_convert_to_utf8($value, 'ascii')));
206 return "'$value'";
207 }
208
209 function _shp_data_bool($value) {
210 return (int) in_array($value, array('t', 'T', 'y', 'Y'));
211 }
212
213 function _shp_data_date($value) {
214 // Per the spec, we get the data in YYYYMMDD format.
215 $y = (int) substr($value, 0, 4);
216 $m = (int) substr($value, 4, 2);
217 $d = (int) substr($value, 6, 2);
218 return "'$y-$m-$d'";
219 }
220
221 function _shp_headers($data) {
222 $mask = 'x28/iversion/igeo_type/dmin_x/dmax_x/dmin_y/dmax_y/dmin_z/dmax_z/dmin_m/dmax_m';
223 $headers = unpack($mask, $data);
224 $headers['geo_type'] = _shp_geo_types($headers['geo_type']);
225 return $headers;
226 }
227
228 function _shp_get_record(&$fp) {
229 if (!$row_header = zip_entry_read($fp, 12)) return FALSE;
230
231 $row = unpack('Nnumber/Nlength/igeo_type', $row_header);
232 $row['geo_type'] = _shp_geo_types($row['geo_type']);
233
234 // Look for _shp_get_point(), _shp_get_linestring(), etc.
235 if (!function_exists($func = '_shp_get_'. $row['geo_type'])) {
236 drupal_set_message(t('Unsupported geo type %type found in file.', array('%type' => $row['geo_type'])), 'error');
237 return FALSE;
238 }
239
240 $row['data'] = $func($fp);
241 return $row;
242 }
243
244 function _shp_get_point_data(&$fp) {
245 $data = unpack('dx/dy', zip_entry_read($fp, 16));
246 return join(' ', $data);
247 }
248
249 function _shp_get_point(&$fp) {
250 return 'POINT('. _shp_get_point_data($fp) .')';
251 }
252
253 function _shp_get_linestring(&$fp) {
254 $data = array(
255 'bbox' => unpack('dmin_x/dmin_y/dmax_x/dmax_y', zip_entry_read($fp, 32)),
256 'point_count' => current(unpack('i', zip_entry_read($fp, 4))),
257 );
258
259 $wkt = 'LINESTRING(';
260 for ($i = 1; $i <= $data['point_count']; $i++) {
261 $wkt .= _shp_get_point_data($fp) .',';
262 }
263 $wkt = substr($wkt, 0, -1) . ')';
264
265 return $data;
266 }
267
268 function _shp_get_polygon(&$fp) {
269 $data = array(
270 'bbox' => array(
271 'min_x' => _shp_data_fetch('d', $fp),
272 'min_y' => _shp_data_fetch('d', $fp),
273 'max_x' => _shp_data_fetch('d', $fp),
274 'max_y' => _shp_data_fetch('d', $fp),
275 ),
276 'part_count' => _shp_data_fetch('i', $fp),
277 'point_count' => _shp_data_fetch('i', $fp),
278 );
279
280 $wkt = '';
281
282 $points_counted = 0; $point_count; $last_offset = 0;
283 $parts = array();
284 for ($i = 0; $i < ($data['part_count']); $i++) {
285 $parts[] = _shp_data_fetch('i', $fp);
286 }
287
288 $points_counted = 0;
289 foreach ($parts as $i => $offset) {
290 if ($next_offset = $parts[$i+1]) {
291 $points_counted += $point_count = ($next_offset - $offset);
292 }
293 else {
294 $point_count = $data['point_count'] - $points_counted;
295 }
296
297 if (!$point_count) continue;
298
299 // Gather the point data for each part segment.
300 $wkt .= '(';
301 for ($j = 1; $j <= $point_count; $j++) {
302 $wkt .= _shp_get_point_data($fp) .',';
303 }
304 $wkt = substr($wkt, 0, -1) . '),';
305 }
306 $data['wkt'] = 'POLYGON('. substr($wkt, 0, -1) .')';
307 return $data;
308 }
309
310 function _shp_data_fetch($type, &$fp, $offset = null) {
311 if (in_array($type, array('i', 'N'))) {
312 $length = 4;
313 }
314 elseif (in_array($type, array('d' ))) {
315 $length = 8;
316 }
317 if (is_string($fp)) {
318 return current(unpack($type, substr($fp, $offset, $length)));
319 }
320 return current(unpack($type, zip_entry_read($fp, $length)));
321 }

  ViewVC Help
Powered by ViewVC 1.1.2