2021-08-22 17:04:49 (UTC-03:00)
Marcel Rodrigues <marcelgmr@gmail.com>
add shapefile reader
diff --git a/shp.lua b/shp.lua new file mode 100644 index 0000000..f786a2d --- /dev/null +++ b/shp.lua @@ -0,0 +1,374 @@ +local ffi = require "ffi" +local bit = require "bit" + +local function rtrim(s, c) + local i = #s + while s:sub(i, i) == c do + i = i - 1 + end + return s:sub(1, i) +end + +local function read_byte(fp) + return fp:read(1):byte(1) +end + +local function read_leu16(fp) + return read_byte(fp) + bit.lshift(read_byte(fp), 8) +end + +local function read_leu32(fp) + return read_leu16(fp) + bit.lshift(read_leu16(fp), 16) +end + +local function read_beu16(fp) + return bit.lshift(read_byte(fp), 8) + read_byte(fp) +end + +local function read_beu32(fp) + return bit.lshift(read_beu16(fp), 16) + read_beu16(fp) +end + +local function signed(u, nbits) + local p = 2^(nbits-1) + if u >= p then u = u - p - p end + return u +end + +local function read_lei16(fp) + return signed(read_leu16(fp), 16) +end + +local function read_lei32(fp) + return signed(read_leu32(fp), 32) +end + +local function read_bei16(fp) + return signed(read_beu16(fp), 16) +end + +local function read_bei32(fp) + return signed(read_beu32(fp), 32) +end + +local function read_led64(fp) + return ffi.cast("double *", ffi.new("char[8]", fp:read(8)))[0] +end + +local SF = {} +SF.__index = SF + +function SF:read_dbf() + local fp = io.open(self.path .. ".dbf", "rb") + local version = bit.band(read_byte(fp), 0x07) + if version ~= 3 then + error("only DBF version 5 is supported") + end + self.year = 1900 + read_byte(fp) + self.month = read_byte(fp) + self.day = read_byte(fp) + self.nrecs = read_leu32(fp) + fp:seek("cur", 24) + local reclen = 0 + local fields = {} + local byte = read_byte(fp) + while byte ~= 0x0D do + local field_name = rtrim(string.char(byte) .. fp:read(10), "\000") + local field_type = fp:read(1) + fp:seek("cur", 4) + local field_length = read_byte(fp) + reclen = reclen + field_length + local field_dec_count = read_byte(fp) + local field = { + name=field_name, + type=field_type, + length=field_length, + dec_count=field_dec_count + } + table.insert(fields, field) + fp:seek("cur", 14) + byte = read_byte(fp) + end + local tab = {} + for i = 1, self.nrecs do + if fp:read(1) == "*" then + fp:seek("cur", reclen) + else + local row = {} + for j = 1, #fields do + local field = fields[j] + local cell = rtrim(fp:read(field.length), " ") + if field.type == "F" or field.type == "N" then + cell = tonumber(cell) + end + table.insert(row, cell) + end + table.insert(tab, row) + end + end + self.fields = fields + self.tab = tab + io.close(fp) +end + +function SF:search(field_name, value) + local col + for i = 1, #self.fields do + if self.fields[i].name == field_name then + col = i + break + end + end + if col ~= nil then + for i = 1, self.nrecs do + if self.tab[i][col] == value then + return i + end + end + end +end + +function SF:tab2csv(sep, fp) + -- TODO: refactor to use table.concat(str_list, sep) + sep = sep or ":" + fp = fp or io.output() + for i = 1, #self.fields do + if i > 1 then + fp:write(sep) + end + fp:write(self.fields[i].name) + end + fp:write("\n") + for i = 1, #self.tab do + for j = 1, #self.fields do + if j > 1 then + fp:write(sep) + end + local cell = self.tab[i][j] + fp:write(cell) + end + fp:write("\n") + end +end + +local shape_name = { + [ 0] = "null", [31] = "multipatch", + [ 1] = "point", [ 3] = "polyline", [ 5] = "polygon", [ 8] = "multipoint", + [11] = "point-z", [13] = "polyline-z", [15] = "polygon-z", [18] = "multipoint-z", + [21] = "point-m", [23] = "polyline-m", [25] = "polygon-m", [28] = "multipoint-m", +} + +local function startswith(s1, s2) + return s1:sub(1, #s2) == s2 +end + +local function read_bbox(fp) + local bbox = {} + bbox.xmin = read_led64(fp) + bbox.ymin = read_led64(fp) + bbox.xmax = read_led64(fp) + bbox.ymax = read_led64(fp) + return bbox +end + +-- SHX and SHP share the same header structure! +local function read_sf_header(fp) + local file_code = read_bei32(fp) + if file_code ~= 9994 then + error("does not seem to be a shapefile (invalid FileCode)") + end + fp:seek("cur", 20) -- unused, always all-zero + local file_len = read_bei32(fp) + local version = read_lei32(fp) + if version ~= 1000 then + error("invalid shapefile version") + end + local header = {} + header.shape = shape_name[read_lei32(fp)] + header.bbox = read_bbox(fp) + header.zmin = read_led64(fp) + header.zmax = read_led64(fp) + header.mmin = read_led64(fp) + header.mmax = read_led64(fp) + return header +end + +function SF:read_shx() + local fp = io.open(self.path .. ".shx", "rb") + self.header = read_sf_header(fp) + self.index = {} + for i = 1, self.nrecs do + local offset = read_bei32(fp) + local length = read_bei32(fp) + table.insert(self.index, {offset=offset, length=length}) + end + io.close(fp) +end + +function SF:read_record_header(index) + local fp = self.fp + fp:seek("set", self.index[index].offset * 2) + local num = read_bei32(fp) + local len = read_bei32(fp) + local shape = shape_name[read_lei32(fp)] + return num, len, shape +end + +-- this is the old version, using coroutines +function SF:read_polygons_co(index) + assert(startswith(self.header.shape, "polygon"), "type mismatch") + local fp = self.fp + local num, len, shape = self:read_record_header(index) + if shape == "null" then + return nil + end + local bbox = read_bbox(fp) + local nparts = read_lei32(fp) + --~ print(nparts) + local npoints = read_lei32(fp) + local total = read_lei32(fp) -- first index is always zero? + local lengths = {} + for i = 2, nparts do + local index = read_lei32(fp) + table.insert(lengths, index - total) + total = index + end + table.insert(lengths, npoints - total) + return coroutine.wrap(function() + for i = 1, #lengths do + local length = lengths[i] + coroutine.yield(coroutine.wrap(function() + for i = 1, length do + local x = read_led64(fp) + local y = read_led64(fp) + coroutine.yield({x, y}) + end + end)) + end + end) +end + +function SF:read_polygons(index) + assert(startswith(self.header.shape, "polygon"), "type mismatch") + local fp = self.fp + local num, len, shape = self:read_record_header(index) + if shape == "null" then + return nil + end + local bbox = read_bbox(fp) + local nparts = read_lei32(fp) + --~ print(nparts) + local npoints = read_lei32(fp) + local total = read_lei32(fp) -- first index is always zero? + local lengths = {} + for i = 2, nparts do + local index = read_lei32(fp) + table.insert(lengths, index - total) + total = index + end + table.insert(lengths, npoints - total) + local i = 0 + return bbox, lengths, function() + i = i + 1 + if i <= #lengths then + local j = 0 + local length = lengths[i] + return function() + j = j + 1 + if j <= length then + local x = read_led64(fp) + local y = read_led64(fp) + return {x, y} + end + end + end + end +end + +function SF:close() + io.close(self.fp) +end + +local function open_shapefile(path) + local self = setmetatable({path=path}, SF) + self:read_dbf() + self:read_shx() + self.fp = io.open(path .. ".shp", "rb") + return self +end + +local function read_dbf(fp) + local version = bit.band(read_byte(fp), 0x07) + if version ~= 3 then + error("only DBF version 5 is supported") + end + local year = 1900 + read_byte(fp) + local month = read_byte(fp) + local day = read_byte(fp) + -- print(("%04d-%02d-%02d"):format(year, month, day)) + local nrecs = read_leu32(fp) + fp:seek("cur", 24) + local reclen = 0 + local fields = {} + local byte = read_byte(fp) + while byte ~= 0x0D do + local field_name = rtrim(string.char(byte) .. fp:read(10), "\000") + local field_type = fp:read(1) + fp:seek("cur", 4) + local field_length = read_byte(fp) + reclen = reclen + field_length + local field_dec_count = read_byte(fp) + local field = { + name=field_name, + type=field_type, + length=field_length, + dec_count=field_dec_count + } + table.insert(fields, field) + fp:seek("cur", 14) + byte = read_byte(fp) + end + local tab = {} + for i = 1, nrecs do + if fp:read(1) == "*" then + fp:seek("cur", reclen) + else + local row = {} + for j = 1, #fields do + local field = fields[j] + local cell = rtrim(fp:read(field.length), " ") + if field.type == "F" or field.type == "N" then + cell = tonumber(cell) + end + table.insert(row, cell) + end + table.insert(tab, row) + end + end + return fields, tab +end + +local function tab2csv(fields, tab, sep, fp) + -- TODO: refactor to use table.concat(str_list, sep) + sep = sep or ":" + fp = fp or io.output() + for i = 1, #fields do + if i > 1 then + fp:write(sep) + end + fp:write(fields[i].name) + end + fp:write("\n") + for i = 1, #tab do + for j = 1, #fields do + if j > 1 then + fp:write(sep) + end + local cell = tab[i][j] + fp:write(cell) + end + fp:write("\n") + end +end + +return {open_shapefile=open_shapefile, read_dbf=read_dbf, tab2csv=tab2csv}