From fc96b169206546975549292ff0dad14ae65fed44 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:11:50 +0100 Subject: [PATCH] v3.1.4. Fix bug with dates supplied as strings & test all encodings in CI Test on all supported encodings in CI only Use Python 3.9 compatible except: Syntax v3.1.4. Fix bug with dates supplied as strings Remove @reproduce_failure --- README.md | 13 +++++++--- changelog.txt | 8 +++--- src/shapefile.py | 4 +-- tests/hypothesis_tests.py | 52 ++++++++++++++++----------------------- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index a48a338..8899510 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py - **Author**: [Joel Lawhead](https://github.com/GeospatialPython) - **Maintainers**: [James Parrott](https://github.com/JamesParrott) & [Karim Bahgat](https://github.com/karimbahgat) -- **Version**: 3.1.4.dev -- **Date**: 27th June 2026 +- **Version**: 3.1.4 +- **Date**: 29th June 2026 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents @@ -93,9 +93,14 @@ part of your geospatial project. # Version Changes -## 3.1.4.dev +## 3.1.4 +### Bug fix + - Fix bug causing dates supplied as length 8 strings of digits to be encoded by the custom encoding, not ascii. + ### Testing - - Test other codecs (ascii and unicode so far). + - Test other codecs (ascii and UTF-8, UTF-16 & UTF-32 so far). + - Test all available codecs in CI (92 of them). + ## 3.1.3 - Restore faster text writing paths for single-byte Ascii encodings, and Utf-8. diff --git a/changelog.txt b/changelog.txt index 3d00825..3d46e33 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,8 +1,10 @@ -VERSION 3.1.4.dev - +VERSION 3.1.4 +2026-06-29 + * Fix bug causing dates supplied as length 8 strings to be encoded by the custom codec, not ascii 2026-06-27 - * Test other codecs (ascii and unicode so far). + * Test other codecs (ascii and UTF-8, UTF-16 & UTF-32 so far). + * Test all other codecs in CI VERSION 3.1.3 diff --git a/src/shapefile.py b/src/shapefile.py index 2a047f6..547a4c0 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -8,7 +8,7 @@ from __future__ import annotations -__version__ = "3.1.4.dev" +__version__ = "3.1.4" import abc import array @@ -4436,7 +4436,7 @@ def _record(self, record: list[RecordValue]) -> None: elif value in MISSING: str_val = "0" * 8 # QGIS NULL for date type elif isinstance(value, str) and len(value) == 8: - pass # value is already a date string + str_val = value else: raise ShapefileException( f"Could not read as date: {value}. " diff --git a/tests/hypothesis_tests.py b/tests/hypothesis_tests.py index 8c95724..8b6381b 100644 --- a/tests/hypothesis_tests.py +++ b/tests/hypothesis_tests.py @@ -4,6 +4,7 @@ import datetime import io import itertools +import os import string import warnings @@ -27,6 +28,8 @@ import shapefile as shp +IN_CI = bool(os.getenv("CI") or os.getenv("GITHUB_ACTIONS")) + @contextlib.contextmanager def ignore_warnings(category=None): with warnings.catch_warnings(): @@ -551,42 +554,29 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: } -ENCODINGS = [ +ENCODINGS = [ "ascii", "latin1", "utf-8", "utf-16-be", "utf-16-le", - "utf-16", - "utf-32-be", "utf-32-le", - "cp1252", - "cp1254", - "cp932", - "euc_kr", - "euc_jp", - "mac_iceland", - "cp932", - "shift_jis", - "iso8859_5", - "koi8_r", - "gbk", - "gb18030", - "big5", + "cp1140", ] -encodings = sampled_from(ENCODINGS) - -# from encodings.aliases import aliases -# encs = set() -# for enc in aliases.values(): -# if enc in encs: -# continue -# try: -# "".encode(enc) -# except UnicodeEncodeError, LookupError: -# continue -# encs.add(enc) -# assert encs == ['utf_16_le', 'iso8859_7', 'cp437', 'iso2022_jp_3', 'shift_jis', 'cp775', 'cp1140', + +def _encodings() -> set[str]: + from encodings.aliases import aliases + encs = set() + for enc in aliases.values(): + if enc in encs: + continue + try: + "".encode(enc) + except (UnicodeEncodeError, LookupError): + continue + encs.add(enc) + return encs +# assert _encodings() == {'utf_16_le', 'iso8859_7', 'cp437', 'iso2022_jp_3', 'shift_jis', 'cp775', 'cp1140', # 'cp861', 'iso8859_11', 'iso8859_9', 'euc_jp', 'utf_16', 'cp950', 'mac_cyrillic', 'mac_turkish', 'iso2022_jp_1', 'iso8859_10', # 'iso2022_jp_2004', 'cp866', 'mac_greek', 'hz', 'cp1257', 'cp037', 'cp863', 'iso8859_4', 'utf_16_be', 'gb18030', 'cp1250', # 'cp850', 'iso8859_5', 'shift_jisx0213', 'iso8859_8', 'cp273', 'euc_jisx0213', 'cp932', 'cp862', 'tis_620', 'cp1125', 'koi8_r', @@ -594,9 +584,9 @@ def test_shx_reader_writer_roundtrip(codes_and_shapes)-> None: # 'ascii', 'cp1254', 'cp424', 'cp855', 'hp_roman8', 'mac_latin2', 'euc_jis_2004', 'euc_kr', 'cp1256', 'shift_jis_2004', # 'utf_32_le', 'gbk', 'cp869', 'iso8859_13', 'iso8859_3', 'big5', 'cp1258', 'cp1253', 'latin_1', 'cp864', 'utf_8', # 'iso2022_kr', 'cp1251', 'cp1255', 'mac_iceland', 'kz1048', 'iso8859_14', 'utf_32_be', 'ptcp154', 'iso8859_6', 'mac_roman', -# 'utf_32', 'iso2022_jp_2', 'iso8859_16', 'mbcs', 'cp500', 'iso8859_2', 'cp949', 'cp852', 'utf_7', 'big5hkscs', 'johab'] +# 'utf_32', 'iso2022_jp_2', 'iso8859_16', 'mbcs', 'cp500', 'iso8859_2', 'cp949', 'cp852', 'utf_7', 'big5hkscs', 'johab'} -# encodings = sampled_from(list(encs)) +encodings = sampled_from(list(_encodings())) # if IN_CI else ENCODINGS) @composite