login

<     >

2021-08-22 17:04:49 (UTC-03:00)

Marcel Rodrigues <marcelgmr@gmail.com>

add shapefile reader

diff --git a/shp.lua b/shp.lua
new file mode 100644
index 0000000..f786a2d
--- /dev/null
+++ b/shp.lua
@@ -0,0 +1,374 @@
+local ffi = require "ffi"
+local bit = require "bit"
+
+local function rtrim(s, c)
+    local i = #s
+    while s:sub(i, i) == c do
+        i = i - 1
+    end
+    return s:sub(1, i)
+end
+
+local function read_byte(fp)
+    return fp:read(1):byte(1)
+end
+
+local function read_leu16(fp)
+    return read_byte(fp) + bit.lshift(read_byte(fp), 8)
+end
+
+local function read_leu32(fp)
+    return read_leu16(fp) + bit.lshift(read_leu16(fp), 16)
+end
+
+local function read_beu16(fp)
+    return bit.lshift(read_byte(fp), 8) + read_byte(fp)
+end
+
+local function read_beu32(fp)
+    return bit.lshift(read_beu16(fp), 16) + read_beu16(fp)
+end
+
+local function signed(u, nbits)
+    local p = 2^(nbits-1)
+    if u >= p then u = u - p - p end
+    return u
+end
+
+local function read_lei16(fp)
+    return signed(read_leu16(fp), 16)
+end
+
+local function read_lei32(fp)
+    return signed(read_leu32(fp), 32)
+end
+
+local function read_bei16(fp)
+    return signed(read_beu16(fp), 16)
+end
+
+local function read_bei32(fp)
+    return signed(read_beu32(fp), 32)
+end
+
+local function read_led64(fp)
+    return ffi.cast("double *", ffi.new("char[8]", fp:read(8)))[0]
+end
+
+local SF = {}
+SF.__index = SF
+
+function SF:read_dbf()
+    local fp = io.open(self.path .. ".dbf", "rb")
+    local version = bit.band(read_byte(fp), 0x07)
+    if version ~= 3 then
+        error("only DBF version 5 is supported")
+    end
+    self.year = 1900 + read_byte(fp)
+    self.month = read_byte(fp)
+    self.day = read_byte(fp)
+    self.nrecs = read_leu32(fp)
+    fp:seek("cur", 24)
+    local reclen = 0
+    local fields = {}
+    local byte = read_byte(fp)
+    while byte ~= 0x0D do
+        local field_name = rtrim(string.char(byte) .. fp:read(10), "\000")
+        local field_type = fp:read(1)
+        fp:seek("cur", 4)
+        local field_length = read_byte(fp)
+        reclen = reclen + field_length
+        local field_dec_count = read_byte(fp)
+        local field = {
+          name=field_name,
+          type=field_type,
+          length=field_length,
+          dec_count=field_dec_count
+        }
+        table.insert(fields, field)
+        fp:seek("cur", 14)
+        byte = read_byte(fp)
+    end
+    local tab = {}
+    for i = 1, self.nrecs do
+        if fp:read(1) == "*" then
+            fp:seek("cur", reclen)
+        else
+            local row = {}
+            for j = 1, #fields do
+                local field = fields[j]
+                local cell = rtrim(fp:read(field.length), " ")
+                if field.type == "F" or field.type == "N" then
+                    cell = tonumber(cell)
+                end
+                table.insert(row, cell)
+            end
+            table.insert(tab, row)
+        end
+    end
+    self.fields = fields
+    self.tab = tab
+    io.close(fp)
+end
+
+function SF:search(field_name, value)
+    local col
+    for i = 1, #self.fields do
+        if self.fields[i].name == field_name then
+            col = i
+            break
+        end
+    end
+    if col ~= nil then
+        for i = 1, self.nrecs do
+            if self.tab[i][col] == value then
+                return i
+            end
+        end
+    end
+end
+
+function SF:tab2csv(sep, fp)
+    -- TODO: refactor to use table.concat(str_list, sep)
+    sep = sep or ":"
+    fp = fp or io.output()
+    for i = 1, #self.fields do
+        if i > 1 then
+            fp:write(sep)
+        end
+        fp:write(self.fields[i].name)
+    end
+    fp:write("\n")
+    for i = 1, #self.tab do
+        for j = 1, #self.fields do
+            if j > 1 then
+                fp:write(sep)
+            end
+            local cell = self.tab[i][j]
+            fp:write(cell)
+        end
+        fp:write("\n")
+    end
+end
+
+local shape_name = {
+    [ 0] = "null",          [31] = "multipatch",
+    [ 1] = "point",   [ 3] = "polyline",   [ 5] = "polygon",   [ 8] = "multipoint",
+    [11] = "point-z", [13] = "polyline-z", [15] = "polygon-z", [18] = "multipoint-z",
+    [21] = "point-m", [23] = "polyline-m", [25] = "polygon-m", [28] = "multipoint-m",
+}
+
+local function startswith(s1, s2)
+    return s1:sub(1, #s2) == s2
+end
+
+local function read_bbox(fp)
+    local bbox = {}
+    bbox.xmin = read_led64(fp)
+    bbox.ymin = read_led64(fp)
+    bbox.xmax = read_led64(fp)
+    bbox.ymax = read_led64(fp)
+    return bbox
+end
+
+-- SHX and SHP share the same header structure!
+local function read_sf_header(fp)
+    local file_code = read_bei32(fp)
+    if file_code ~= 9994 then
+        error("does not seem to be a shapefile (invalid FileCode)")
+    end
+    fp:seek("cur", 20) -- unused, always all-zero
+    local file_len = read_bei32(fp)
+    local version = read_lei32(fp)
+    if version ~= 1000 then
+        error("invalid shapefile version")
+    end
+    local header = {}
+    header.shape = shape_name[read_lei32(fp)]
+    header.bbox = read_bbox(fp)
+    header.zmin = read_led64(fp)
+    header.zmax = read_led64(fp)
+    header.mmin = read_led64(fp)
+    header.mmax = read_led64(fp)
+    return header
+end
+
+function SF:read_shx()
+    local fp = io.open(self.path .. ".shx", "rb")
+    self.header = read_sf_header(fp)
+    self.index = {}
+    for i = 1, self.nrecs do
+        local offset = read_bei32(fp)
+        local length = read_bei32(fp)
+        table.insert(self.index, {offset=offset, length=length})
+    end
+    io.close(fp)
+end
+
+function SF:read_record_header(index)
+    local fp = self.fp
+    fp:seek("set", self.index[index].offset * 2)
+    local num = read_bei32(fp)
+    local len = read_bei32(fp)
+    local shape = shape_name[read_lei32(fp)]
+    return num, len, shape
+end
+
+-- this is the old version, using coroutines
+function SF:read_polygons_co(index)
+    assert(startswith(self.header.shape, "polygon"), "type mismatch")
+    local fp = self.fp
+    local num, len, shape = self:read_record_header(index)
+    if shape == "null" then
+        return nil
+    end
+    local bbox = read_bbox(fp)
+    local nparts = read_lei32(fp)
+    --~ print(nparts)
+    local npoints = read_lei32(fp)
+    local total = read_lei32(fp) -- first index is always zero?
+    local lengths = {}
+    for i = 2, nparts do
+        local index = read_lei32(fp)
+        table.insert(lengths, index - total)
+        total = index
+    end
+    table.insert(lengths, npoints - total)
+    return coroutine.wrap(function()
+        for i = 1, #lengths do
+            local length = lengths[i]
+            coroutine.yield(coroutine.wrap(function()
+                for i = 1, length do
+                    local x = read_led64(fp)
+                    local y = read_led64(fp)
+                    coroutine.yield({x, y})
+                end
+            end))
+        end
+    end)
+end
+
+function SF:read_polygons(index)
+    assert(startswith(self.header.shape, "polygon"), "type mismatch")
+    local fp = self.fp
+    local num, len, shape = self:read_record_header(index)
+    if shape == "null" then
+        return nil
+    end
+    local bbox = read_bbox(fp)
+    local nparts = read_lei32(fp)
+    --~ print(nparts)
+    local npoints = read_lei32(fp)
+    local total = read_lei32(fp) -- first index is always zero?
+    local lengths = {}
+    for i = 2, nparts do
+        local index = read_lei32(fp)
+        table.insert(lengths, index - total)
+        total = index
+    end
+    table.insert(lengths, npoints - total)
+    local i = 0
+    return bbox, lengths, function()
+        i = i + 1
+        if i <= #lengths then
+            local j = 0
+            local length = lengths[i]
+            return function()
+                j = j + 1
+                if j <= length then
+                    local x = read_led64(fp)
+                    local y = read_led64(fp)
+                    return {x, y}
+                end
+            end
+        end
+    end
+end
+
+function SF:close()
+    io.close(self.fp)
+end
+
+local function open_shapefile(path)
+    local self = setmetatable({path=path}, SF)
+    self:read_dbf()
+    self:read_shx()
+    self.fp = io.open(path .. ".shp", "rb")
+    return self
+end
+
+local function read_dbf(fp)
+    local version = bit.band(read_byte(fp), 0x07)
+    if version ~= 3 then
+        error("only DBF version 5 is supported")
+    end
+    local year = 1900 + read_byte(fp)
+    local month = read_byte(fp)
+    local day = read_byte(fp)
+    -- print(("%04d-%02d-%02d"):format(year, month, day))
+    local nrecs = read_leu32(fp)
+    fp:seek("cur", 24)
+    local reclen = 0
+    local fields = {}
+    local byte = read_byte(fp)
+    while byte ~= 0x0D do
+        local field_name = rtrim(string.char(byte) .. fp:read(10), "\000")
+        local field_type = fp:read(1)
+        fp:seek("cur", 4)
+        local field_length = read_byte(fp)
+        reclen = reclen + field_length
+        local field_dec_count = read_byte(fp)
+        local field = {
+          name=field_name,
+          type=field_type,
+          length=field_length,
+          dec_count=field_dec_count
+        }
+        table.insert(fields, field)
+        fp:seek("cur", 14)
+        byte = read_byte(fp)
+    end
+    local tab = {}
+    for i = 1, nrecs do
+        if fp:read(1) == "*" then
+            fp:seek("cur", reclen)
+        else
+            local row = {}
+            for j = 1, #fields do
+                local field = fields[j]
+                local cell = rtrim(fp:read(field.length), " ")
+                if field.type == "F" or field.type == "N" then
+                    cell = tonumber(cell)
+                end
+                table.insert(row, cell)
+            end
+            table.insert(tab, row)
+        end
+    end
+    return fields, tab
+end
+
+local function tab2csv(fields, tab, sep, fp)
+    -- TODO: refactor to use table.concat(str_list, sep)
+    sep = sep or ":"
+    fp = fp or io.output()
+    for i = 1, #fields do
+        if i > 1 then
+            fp:write(sep)
+        end
+        fp:write(fields[i].name)
+    end
+    fp:write("\n")
+    for i = 1, #tab do
+        for j = 1, #fields do
+            if j > 1 then
+                fp:write(sep)
+            end
+            local cell = tab[i][j]
+            fp:write(cell)
+        end
+        fp:write("\n")
+    end
+end
+
+return {open_shapefile=open_shapefile, read_dbf=read_dbf, tab2csv=tab2csv}