| 1 |
<?php // $Id: shp2sql.inc,v 1.1 2008/08/20 15:06:39 vauxia Exp $
|
| 2 |
|
| 3 |
/**
|
| 4 |
* CREDITS
|
| 5 |
* This shp and dbf parsing code is based on the following documentation:
|
| 6 |
* http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
|
| 7 |
* http://en.wikipedia.org/wiki/SHP_file
|
| 8 |
* http://www.phpdig.net/ref/rn45re878.html
|
| 9 |
* http://www.clicketyclick.dk/databases/xbase/format/dbf.html
|
| 10 |
*
|
| 11 |
* I also borrowed heavily from existing code by Juan Carlos Gonzalez Ulloa:
|
| 12 |
* http://sourceforge.net/projects/shp2mysql-php
|
| 13 |
*/
|
| 14 |
function geo_shp2sql($filename, $table_name = '', $create_table = TRUE, $srid = GEO_SRID_DEFAULT) {
|
| 15 |
if (!$table_name) {
|
| 16 |
$parts = pathinfo($filename);
|
| 17 |
$table_name = $parts['filename'];
|
| 18 |
}
|
| 19 |
$table_name = preg_replace('/[^a-z0-9_]/', '', strtolower($table_name));
|
| 20 |
|
| 21 |
if (!is_readable($filename) || (!is_resource($zip = zip_open($filename)))) {
|
| 22 |
drupal_set_message(t('zip file nonexistant or unreadable.'), 'error');
|
| 23 |
return FALSE;
|
| 24 |
}
|
| 25 |
|
| 26 |
// Catalog the contents of the zip file.
|
| 27 |
$files = array();
|
| 28 |
while ($res = zip_read($zip)) {
|
| 29 |
$ext = strtolower(substr(strrchr(zip_entry_name($res), '.'), 1));
|
| 30 |
$files[$ext] = $res;
|
| 31 |
}
|
| 32 |
|
| 33 |
// The zip file must minimally contain a shp, dbf and shx file.
|
| 34 |
if (!isset($files['shp']) || !isset($files['dbf']) || !isset($files['shx'])) {
|
| 35 |
drupal_set_message(t('This does not appear to be an archive that contains valid shp data.'), 'error');
|
| 36 |
return FALSE;
|
| 37 |
}
|
| 38 |
|
| 39 |
// See if we can get the projection information from the prj file.
|
| 40 |
if (isset($files['prj'])) {
|
| 41 |
// TODO Parse srid from prj file, for conversion if necessary.
|
| 42 |
}
|
| 43 |
|
| 44 |
if (isset($files['dbt'])) {
|
| 45 |
drupal_set_message(t('This database contains longtext/memo fields, which are unsupported. Proceeding to enter other data anyway.'), 'warning');
|
| 46 |
}
|
| 47 |
|
| 48 |
// Get headers and table definition from the dbf file.
|
| 49 |
$schema = array('description' => t('Geo data table'), 'fields' => array());
|
| 50 |
$headers = _dbf_headers($files['dbf'], $schema);
|
| 51 |
|
| 52 |
// Shape file headers are stored in the first 100 bytes of the shp/shx files.
|
| 53 |
$headers += _shp_headers(zip_entry_read($files['shp'], 100));
|
| 54 |
|
| 55 |
// Create a new table based on the data we've obtained thus far.
|
| 56 |
if ($create_table) {
|
| 57 |
$ret = array();
|
| 58 |
if (db_table_exists($table_name)) {
|
| 59 |
db_drop_table($ret, $table_name);
|
| 60 |
}
|
| 61 |
db_create_table($ret, $table_name, $schema);
|
| 62 |
}
|
| 63 |
|
| 64 |
// Add the geo column that will contain shape data.
|
| 65 |
$spec = array(
|
| 66 |
'type' => strtolower($headers['geo_type']),
|
| 67 |
'not null' => TRUE,
|
| 68 |
'description' => t('Geometry field'),
|
| 69 |
'srid' => $srid,
|
| 70 |
);
|
| 71 |
geo('add_field', $ret, $table_name, 'geo', $spec);
|
| 72 |
$fields = join(', ', array_keys($headers['field_lengths'])) .', geo';
|
| 73 |
|
| 74 |
$row_count = 0;
|
| 75 |
while ($shp = _shp_get_record($files['shp'])) {
|
| 76 |
$values = array();
|
| 77 |
$dbf_data = zip_entry_read($files['dbf'], $headers['record_size']);
|
| 78 |
$dbf_data = substr($dbf_data, 1); // Remove "record deleted" flag.
|
| 79 |
foreach($headers['field_lengths'] as $name => $length) {
|
| 80 |
$value = substr($dbf_data, 0, $length);
|
| 81 |
$dbf_data = substr($dbf_data, $length);
|
| 82 |
|
| 83 |
// Use a predefined function to filter and process each value.
|
| 84 |
$process = $headers['field_filters'][$name];
|
| 85 |
$values[] = $process($value);
|
| 86 |
}
|
| 87 |
|
| 88 |
// Add the geometry text.
|
| 89 |
$values[] = "GeomFromText('". $shp['data']['wkt'] ."', $srid)";
|
| 90 |
|
| 91 |
// We've already handled sanitization of table_name, column names and
|
| 92 |
// values, so we're going to skip the substitutions.
|
| 93 |
db_query("INSERT INTO {$table_name} ( $fields )
|
| 94 |
VALUES (". join(', ', $values) .")");
|
| 95 |
|
| 96 |
$row_count++;
|
| 97 |
}
|
| 98 |
|
| 99 |
drupal_set_message(t('Import complete. @count rows imported into @table', array('@count' => $row_count, '@table' => $table_name)));
|
| 100 |
}
|
| 101 |
|
| 102 |
function _shp_geo_types($key = null) {
|
| 103 |
$geo_types = array(
|
| 104 |
0 => 'none',
|
| 105 |
1 => 'point',
|
| 106 |
3 => 'linestring', //TODO this is specified as 'polyline' in the standard.
|
| 107 |
5 => 'polygon',
|
| 108 |
8 => 'multipoint',
|
| 109 |
11 => 'pointz',
|
| 110 |
13 => 'polylinez',
|
| 111 |
15 => 'polygonz',
|
| 112 |
18 => 'multipointz',
|
| 113 |
21 => 'pointm',
|
| 114 |
23 => 'polylinem',
|
| 115 |
25 => 'polygonm',
|
| 116 |
28 => 'multipointm',
|
| 117 |
31 => 'multipatch',
|
| 118 |
);
|
| 119 |
|
| 120 |
if ($key) return $geo_types[$key];
|
| 121 |
return $geo_types;
|
| 122 |
}
|
| 123 |
|
| 124 |
function _dbf_headers(&$fp, &$schema) {
|
| 125 |
|
| 126 |
// Crack open the dbf file for processing.
|
| 127 |
$data = zip_entry_read($fp, 32);
|
| 128 |
|
| 129 |
// Set basic headers.
|
| 130 |
$headers = unpack('H2db_id/Cy/Cm/Cd/Lcount/Sheader_size/Srecord_size', $data);
|
| 131 |
$date = $headers['m'] .'/'. $headers['d'] .'/'. $headers['y'] + 1900;
|
| 132 |
$headers['date'] = strtotime($date);
|
| 133 |
|
| 134 |
// Get the remainder of the dbf headers.
|
| 135 |
$field_data = zip_entry_read($fp, $headers['header_size'] - 32);
|
| 136 |
|
| 137 |
$type_map = array(
|
| 138 |
'C' => 'char',
|
| 139 |
'N' => 'int',
|
| 140 |
'L' => 'int',
|
| 141 |
'D' => 'date',
|
| 142 |
'M' => 'text',
|
| 143 |
'F' => 'float',
|
| 144 |
'B' => 'blob',
|
| 145 |
'Y' => 'decimal',
|
| 146 |
'P' => 'blob',
|
| 147 |
'I' => 'int',
|
| 148 |
'Y' => 'decimal',
|
| 149 |
'T' => 'datetime',
|
| 150 |
'V' => 'varchar',
|
| 151 |
'X' => 'varchar',
|
| 152 |
'@' => 'timestamp',
|
| 153 |
'0' => 'decimal',
|
| 154 |
'+' => 'serial',
|
| 155 |
);
|
| 156 |
|
| 157 |
// Gather the column definitions from the field headers.
|
| 158 |
$mask = 'A11name/Atype/x4/Clength/Cprecision';
|
| 159 |
while (strlen($field_data) >= 32) {
|
| 160 |
$d = unpack($mask, $field_data);
|
| 161 |
$field_data = substr($field_data, 32);
|
| 162 |
|
| 163 |
// Field name.
|
| 164 |
$name = drupal_convert_to_utf8(strtolower(trim($d['name'])), 'ascii');
|
| 165 |
$name = preg_replace('/[^a-z0-9_]/', '', $name);
|
| 166 |
|
| 167 |
// Datatype.
|
| 168 |
$type = isset($type_map[$d['type']]) ? $type_map[$d['type']] : 'varchar';
|
| 169 |
if ($type == 'char' && $d['length'] > 3) $type = 'varchar';
|
| 170 |
|
| 171 |
// Default data processing function, which will escape and UTF8 convert.
|
| 172 |
$filter = '_shp_data_text';
|
| 173 |
|
| 174 |
// Use all-purpose PHP functions for standard numeric types.
|
| 175 |
if (in_array($type, array('float', 'decimal'))) $filter = 'floatval';
|
| 176 |
if (in_array($type, array('int', 'timestamp', 'serial'))) $filter = 'intval';
|
| 177 |
if ($type == 'date') $filter = '_shp_data_date';
|
| 178 |
|
| 179 |
// Special case for boolean, convert to int.
|
| 180 |
if ($d['type'] == 'L') $filter = '_shp_data_bool';
|
| 181 |
|
| 182 |
$headers['field_filters'][$name] = $filter;
|
| 183 |
|
| 184 |
// Datatype futzing.
|
| 185 |
$schema['fields'][$name] = array('type' => $type, 'length' => $d['length']);
|
| 186 |
if ($d['precision']) {
|
| 187 |
$schema['fields'][$name]['not null'] = TRUE;
|
| 188 |
$schema['fields'][$name]['default'] = 0.0;
|
| 189 |
}
|
| 190 |
|
| 191 |
// Remove precision from simple types.
|
| 192 |
if (in_array($type, array('int', 'float', 'real', 'timestamp'))) {
|
| 193 |
unset($schema['fields'][$name]['length']);
|
| 194 |
}
|
| 195 |
|
| 196 |
// Store field length for processing.
|
| 197 |
$headers['field_lengths'][$name] = $d['length'];
|
| 198 |
}
|
| 199 |
|
| 200 |
unset($headers['y'], $headers['m'], $headers['d'], $headers['header_size']);
|
| 201 |
return $headers;
|
| 202 |
}
|
| 203 |
|
| 204 |
function _shp_data_text($value) {
|
| 205 |
$value = db_escape_string(trim(drupal_convert_to_utf8($value, 'ascii')));
|
| 206 |
return "'$value'";
|
| 207 |
}
|
| 208 |
|
| 209 |
function _shp_data_bool($value) {
|
| 210 |
return (int) in_array($value, array('t', 'T', 'y', 'Y'));
|
| 211 |
}
|
| 212 |
|
| 213 |
function _shp_data_date($value) {
|
| 214 |
// Per the spec, we get the data in YYYYMMDD format.
|
| 215 |
$y = (int) substr($value, 0, 4);
|
| 216 |
$m = (int) substr($value, 4, 2);
|
| 217 |
$d = (int) substr($value, 6, 2);
|
| 218 |
return "'$y-$m-$d'";
|
| 219 |
}
|
| 220 |
|
| 221 |
function _shp_headers($data) {
|
| 222 |
$mask = 'x28/iversion/igeo_type/dmin_x/dmax_x/dmin_y/dmax_y/dmin_z/dmax_z/dmin_m/dmax_m';
|
| 223 |
$headers = unpack($mask, $data);
|
| 224 |
$headers['geo_type'] = _shp_geo_types($headers['geo_type']);
|
| 225 |
return $headers;
|
| 226 |
}
|
| 227 |
|
| 228 |
function _shp_get_record(&$fp) {
|
| 229 |
if (!$row_header = zip_entry_read($fp, 12)) return FALSE;
|
| 230 |
|
| 231 |
$row = unpack('Nnumber/Nlength/igeo_type', $row_header);
|
| 232 |
$row['geo_type'] = _shp_geo_types($row['geo_type']);
|
| 233 |
|
| 234 |
// Look for _shp_get_point(), _shp_get_linestring(), etc.
|
| 235 |
if (!function_exists($func = '_shp_get_'. $row['geo_type'])) {
|
| 236 |
drupal_set_message(t('Unsupported geo type %type found in file.', array('%type' => $row['geo_type'])), 'error');
|
| 237 |
return FALSE;
|
| 238 |
}
|
| 239 |
|
| 240 |
$row['data'] = $func($fp);
|
| 241 |
return $row;
|
| 242 |
}
|
| 243 |
|
| 244 |
function _shp_get_point_data(&$fp) {
|
| 245 |
$data = unpack('dx/dy', zip_entry_read($fp, 16));
|
| 246 |
return join(' ', $data);
|
| 247 |
}
|
| 248 |
|
| 249 |
function _shp_get_point(&$fp) {
|
| 250 |
return 'POINT('. _shp_get_point_data($fp) .')';
|
| 251 |
}
|
| 252 |
|
| 253 |
function _shp_get_linestring(&$fp) {
|
| 254 |
$data = array(
|
| 255 |
'bbox' => unpack('dmin_x/dmin_y/dmax_x/dmax_y', zip_entry_read($fp, 32)),
|
| 256 |
'point_count' => current(unpack('i', zip_entry_read($fp, 4))),
|
| 257 |
);
|
| 258 |
|
| 259 |
$wkt = 'LINESTRING(';
|
| 260 |
for ($i = 1; $i <= $data['point_count']; $i++) {
|
| 261 |
$wkt .= _shp_get_point_data($fp) .',';
|
| 262 |
}
|
| 263 |
$wkt = substr($wkt, 0, -1) . ')';
|
| 264 |
|
| 265 |
return $data;
|
| 266 |
}
|
| 267 |
|
| 268 |
function _shp_get_polygon(&$fp) {
|
| 269 |
$data = array(
|
| 270 |
'bbox' => array(
|
| 271 |
'min_x' => _shp_data_fetch('d', $fp),
|
| 272 |
'min_y' => _shp_data_fetch('d', $fp),
|
| 273 |
'max_x' => _shp_data_fetch('d', $fp),
|
| 274 |
'max_y' => _shp_data_fetch('d', $fp),
|
| 275 |
),
|
| 276 |
'part_count' => _shp_data_fetch('i', $fp),
|
| 277 |
'point_count' => _shp_data_fetch('i', $fp),
|
| 278 |
);
|
| 279 |
|
| 280 |
$wkt = '';
|
| 281 |
|
| 282 |
$points_counted = 0; $point_count; $last_offset = 0;
|
| 283 |
$parts = array();
|
| 284 |
for ($i = 0; $i < ($data['part_count']); $i++) {
|
| 285 |
$parts[] = _shp_data_fetch('i', $fp);
|
| 286 |
}
|
| 287 |
|
| 288 |
$points_counted = 0;
|
| 289 |
foreach ($parts as $i => $offset) {
|
| 290 |
if ($next_offset = $parts[$i+1]) {
|
| 291 |
$points_counted += $point_count = ($next_offset - $offset);
|
| 292 |
}
|
| 293 |
else {
|
| 294 |
$point_count = $data['point_count'] - $points_counted;
|
| 295 |
}
|
| 296 |
|
| 297 |
if (!$point_count) continue;
|
| 298 |
|
| 299 |
// Gather the point data for each part segment.
|
| 300 |
$wkt .= '(';
|
| 301 |
for ($j = 1; $j <= $point_count; $j++) {
|
| 302 |
$wkt .= _shp_get_point_data($fp) .',';
|
| 303 |
}
|
| 304 |
$wkt = substr($wkt, 0, -1) . '),';
|
| 305 |
}
|
| 306 |
$data['wkt'] = 'POLYGON('. substr($wkt, 0, -1) .')';
|
| 307 |
return $data;
|
| 308 |
}
|
| 309 |
|
| 310 |
function _shp_data_fetch($type, &$fp, $offset = null) {
|
| 311 |
if (in_array($type, array('i', 'N'))) {
|
| 312 |
$length = 4;
|
| 313 |
}
|
| 314 |
elseif (in_array($type, array('d' ))) {
|
| 315 |
$length = 8;
|
| 316 |
}
|
| 317 |
if (is_string($fp)) {
|
| 318 |
return current(unpack($type, substr($fp, $offset, $length)));
|
| 319 |
}
|
| 320 |
return current(unpack($type, zip_entry_read($fp, $length)));
|
| 321 |
}
|