From ae7109ac7c8004340a7f034c49ad79901386b1d7 Mon Sep 17 00:00:00 2001 From: Timothy Allen Date: Sun, 12 Apr 2026 08:28:49 +0200 Subject: [PATCH] Also source dates from the filename --- piwigo_export.py | 220 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 167 insertions(+), 53 deletions(-) diff --git a/piwigo_export.py b/piwigo_export.py index 9d33230..7f34a5e 100644 --- a/piwigo_export.py +++ b/piwigo_export.py @@ -29,6 +29,7 @@ import json import math import os import pathlib +import re import shutil import subprocess import sys @@ -202,43 +203,91 @@ def _xmp_datetime(s) -> str: def _parse_datetime(s) -> datetime.datetime | None: """Parse a DB or EXIF date string into a datetime, or return None.""" s = str(s).strip() - for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d', - '%Y:%m:%d %H:%M:%S', '%Y:%m:%d'): + # Try the full string first (handles both datetime and date-only values). + for fmt in ('%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%d', '%Y:%m:%d'): try: - return datetime.datetime.strptime(s[:len(fmt)], fmt) + return datetime.datetime.strptime(s, fmt) + except ValueError: + continue + # If the string has trailing timezone info or extra fields, try the prefix. + for prefix_len, fmt in ((19, '%Y-%m-%d %H:%M:%S'), (19, '%Y:%m:%d %H:%M:%S'), + (10, '%Y-%m-%d'), (10, '%Y:%m:%d')): + try: + return datetime.datetime.strptime(s[:prefix_len], fmt) except ValueError: continue return None -# EXIF/IPTC/XMP tags that carry a capture or creation date, in preference order. -_DATE_TAGS_IN_IMAGE = ( - 'EXIF:DateTimeOriginal', - 'EXIF:CreateDate', - 'XMP-xmp:CreateDate', - 'IPTC:DateCreated', - 'EXIF:ModifyDate', -) - - def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None: """Return the earliest datetime found in the image's embedded metadata. + Scans every tag returned by exiftool whose name contains 'date' or 'time' + (case-insensitive), skipping filesystem/tool pseudo-groups (File:, + ExifTool:, Composite:) and GPS tags (which are UTC and timezone-ambiguous). + This catches EXIF:ModifyDate, PNG:ModifyDate, XMP-xmp:CreateDate, etc. + without needing a format-specific allowlist. + Returns None if exiftool is not available or no date tags are found. """ if not shutil.which('exiftool'): return None existing = _read_existing_metadata(image_path) dates = [] - for tag in _DATE_TAGS_IN_IMAGE: - val = existing.get(tag) - if val: - dt = _parse_datetime(str(val)) - if dt: - dates.append(dt) + for key, val in existing.items(): + group = key.split(':')[0] if ':' in key else '' + if group in ('File', 'ExifTool', 'Composite', 'GPS'): + continue + tag_name = key.split(':', 1)[1] if ':' in key else key + if 'date' not in tag_name.lower() and 'time' not in tag_name.lower(): + continue + dt = _parse_datetime(str(val)) + if dt and 1900 < dt.year < 2100: + dates.append(dt) return min(dates) if dates else None +# Matches 14 consecutive digits (YYYYMMDDHHMMSS) not adjacent to another digit. +_RE_DATETIME_14 = re.compile(r'(? tuple[datetime.datetime | None, bool]: + """Try to parse a date/datetime from a filename. + + Recognises patterns such as: + 20120415142550-b05adf19.png → 2012-04-15 14:25:50 (has_time=True) + IMG-20120415142550.jpg → 2012-04-15 14:25:50 (has_time=True) + IMG-20120415.jpg → 2012-04-15 00:00:00 (has_time=False) + + Returns (datetime, has_time). has_time=False means only a date was found; + the time component is set to midnight but should not be treated as known. + Returns (None, False) if no recognisable pattern is found. + """ + stem = pathlib.Path(name).stem + + # Try 14-digit datetime first. + m = _RE_DATETIME_14.search(stem) + if m: + y, mo, d, h, mi, s = (int(x) for x in m.groups()) + try: + return datetime.datetime(y, mo, d, h, mi, s), True + except ValueError: + pass # invalid date/time components — fall through + + # Fall back to 8-digit date. + m = _RE_DATE_8.search(stem) + if m: + y, mo, d = (int(x) for x in m.groups()) + try: + return datetime.datetime(y, mo, d, 0, 0, 0), False + except ValueError: + pass + + return None, False + + def _build_metadata_tags(metadata: dict, fmt: str) -> dict: """ Build a dict of { 'GROUP:TagName': value } for everything we want to write. @@ -510,8 +559,17 @@ def export_image( tags = tags_by_image.get(image_id, []) cat_ids = cats_by_image.get(image_id, []) - # Collect both date sources before building the metadata dict. + # Collect all three date sources before building the metadata dict. + img_filename = pathlib.Path(image_row['path']).name date_embedded_dt = _earliest_image_date(src_file) + date_filename_dt, filename_has_time = _date_from_filename(img_filename) + if date_filename_dt is not None: + date_filename_str = ( + date_filename_dt.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time + else date_filename_dt.strftime('%Y-%m-%d') + ) + else: + date_filename_str = None metadata = { 'title': image_row.get('name'), @@ -519,6 +577,7 @@ def export_image( 'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None, 'date_added': str(image_row['date_available']) if image_row.get('date_available') else None, 'date_embedded': str(date_embedded_dt) if date_embedded_dt else None, + 'date_filename': date_filename_str, 'description': image_row.get('comment'), 'tags': tags, 'albums': [category_display_path(cid, categories) for cid in cat_ids], @@ -531,11 +590,22 @@ def export_image( 'original_path': image_row['path'], } - # Print both date sources so the user can see any discrepancy. - piwigo_str = metadata['date_created'] or '—' + # Print all three date sources so the user can see any discrepancy. + piwigo_str = metadata['date_created'] or '—' embedded_str = metadata['date_embedded'] or '—' - filename_str = pathlib.Path(image_row['path']).name - print(f' {filename_str} piwigo: {piwigo_str} embedded: {embedded_str}') + fn_date_str = metadata['date_filename'] or '—' + print(f' {img_filename} piwigo: {piwigo_str} embedded: {embedded_str} filename: {fn_date_str}') + + # Best date for file mtime: piwigo > embedded > filename. + mtime_ts = None + for _ds in (metadata.get('date_created'), metadata.get('date_embedded'), metadata.get('date_filename')): + if _ds: + _dt = _parse_datetime(_ds) + if _dt: + mtime_ts = _dt.timestamp() + break + if mtime_ts is None: + print(f' NOTE: {img_filename}: no date found; file mtime will not be set', file=sys.stderr) dest_dirs = ( [output_dir / category_fs_path(cid, categories) for cid in cat_ids] @@ -543,7 +613,7 @@ def export_image( else [output_dir / '_unsorted'] ) - filename = pathlib.Path(image_row['path']).name + filename = img_filename stem = pathlib.Path(filename).stem written = 0 @@ -571,11 +641,20 @@ def export_image( print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}') continue - # Copy image file + # Copy image file. shutil.copy2(str(src_file), str(dest_image)) written += 1 if metadata_format: - embed_metadata(dest_image, metadata, metadata_format, never_overwrite_metadata) + # If no Piwigo date, fall back to filename-derived date so that + # missing EXIF/IPTC/XMP date tags are filled in from the filename. + meta_for_embed = metadata + if not meta_for_embed.get('date_created') and meta_for_embed.get('date_filename'): + meta_for_embed = {**metadata, 'date_created': metadata['date_filename']} + embed_metadata(dest_image, meta_for_embed, metadata_format, never_overwrite_metadata) + + # Set mtime after any exiftool call so exiftool doesn't reset it. + if mtime_ts is not None: + os.utime(str(dest_image), (mtime_ts, mtime_ts)) # Write/refresh the sidecar so it stays in sync with the DB. dest_sidecar.write_text( @@ -683,63 +762,98 @@ def cmd_set_dates(args): print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr) continue - # Parse the two candidate dates from the sidecar. + # Parse all three candidate dates from the sidecar. dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None + fn_date_str = data.get('date_filename') + dt_filename = _parse_datetime(fn_date_str) if fn_date_str else None + # has_time is False when the sidecar stored only a date (no space → no time part). + filename_has_time = bool(fn_date_str and ' ' in fn_date_str) + + chosen = None + chosen_source = None - # Determine which date to apply. if args.use == 'piwigo': - chosen = dt_piwigo - if chosen is None: + if dt_piwigo is None: print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.') noted += 1 continue + chosen, chosen_source = dt_piwigo, 'piwigo' elif args.use == 'embedded': - chosen = dt_embedded - if chosen is None: + if dt_embedded is None: print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.') noted += 1 continue + chosen, chosen_source = dt_embedded, 'embedded' + + elif args.use == 'filename': + if dt_filename is None: + print(f' NOTE: {image_path.name}: no filename date in sidecar; skipping.') + noted += 1 + continue + chosen, chosen_source = dt_filename, 'filename' else: - # Interactive mode. - if dt_piwigo is None and dt_embedded is None: + # Interactive mode: collect available (source, datetime) pairs. + options = [] + if dt_piwigo: options.append(('piwigo', dt_piwigo)) + if dt_embedded: options.append(('embedded', dt_embedded)) + if dt_filename: options.append(('filename', dt_filename)) + + if not options: print(f' NOTE: {image_path.name}: no dates available; skipping.') noted += 1 continue - if dt_piwigo == dt_embedded or (dt_piwigo and dt_embedded is None): - chosen = dt_piwigo - elif dt_embedded and dt_piwigo is None: - chosen = dt_embedded + # If all available dates are the same, or only one source, apply silently. + unique_dts = list(dict.fromkeys(o[1] for o in options)) + if len(unique_dts) == 1: + chosen, chosen_source = options[0][1], options[0][0] else: - # Both present and different — ask the user. + # Multiple different dates — ask the user. print(f'\n{image_path.name}') - print(f' [1] piwigo : {dt_piwigo}') - print(f' [2] embedded : {dt_embedded}') + for i, (src, dt) in enumerate(options, 1): + print(f' [{i}] {src:<8} : {dt}') print(f' [s] skip') while True: - choice = input('Choice [1/2/s]: ').strip().lower() + choice = input(f'Choice [1-{len(options)}/s]: ').strip().lower() if choice in ('s', 'skip', ''): - chosen = None break - if choice == '1': - chosen = dt_piwigo - break - if choice == '2': - chosen = dt_embedded - break - print(' Please enter 1, 2, or s.') + try: + idx = int(choice) - 1 + if 0 <= idx < len(options): + chosen_source, chosen = options[idx] + break + except ValueError: + pass + print(f' Please enter a number between 1 and {len(options)}, or s.') if chosen is None: skipped += 1 continue + # Set file mtime. ts = chosen.timestamp() os.utime(image_path, (ts, ts)) applied += 1 + # For filename-derived dates, also embed the date into any missing + # EXIF/IPTC/XMP tags so the image carries its own date going forward. + if chosen_source == 'filename' and shutil.which('exiftool'): + date_str = ( + chosen.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time + else chosen.strftime('%Y-%m-%d') + ) + embed_metadata( + image_path, + {'date_created': date_str}, + ['exif', 'iptc', 'xmp'], + never_overwrite=True, + ) + # exiftool rewrites the file and resets its mtime — restore it. + os.utime(image_path, (ts, ts)) + print( f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.' ) @@ -808,8 +922,8 @@ def main(): help='Directory containing the exported files and JSON sidecars.', ) dp.add_argument( - '--use', choices=['piwigo', 'embedded'], metavar='SOURCE', - help='Auto-select a date source (piwigo or embedded) instead of ' + '--use', choices=['piwigo', 'embedded', 'filename'], metavar='SOURCE', + help='Auto-select a date source (piwigo, embedded, or filename) instead of ' 'prompting for each image.', )