Also source dates from the filename

This commit is contained in:
2026-04-12 08:28:49 +02:00
parent a6db47237e
commit ae7109ac7c
+167 -53
View File
@@ -29,6 +29,7 @@ import json
import math import math
import os import os
import pathlib import pathlib
import re
import shutil import shutil
import subprocess import subprocess
import sys import sys
@@ -202,43 +203,91 @@ def _xmp_datetime(s) -> str:
def _parse_datetime(s) -> datetime.datetime | None: def _parse_datetime(s) -> datetime.datetime | None:
"""Parse a DB or EXIF date string into a datetime, or return None.""" """Parse a DB or EXIF date string into a datetime, or return None."""
s = str(s).strip() s = str(s).strip()
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d', # Try the full string first (handles both datetime and date-only values).
'%Y:%m:%d %H:%M:%S', '%Y:%m:%d'): for fmt in ('%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%d', '%Y:%m:%d'):
try: try:
return datetime.datetime.strptime(s[:len(fmt)], fmt) return datetime.datetime.strptime(s, fmt)
except ValueError:
continue
# If the string has trailing timezone info or extra fields, try the prefix.
for prefix_len, fmt in ((19, '%Y-%m-%d %H:%M:%S'), (19, '%Y:%m:%d %H:%M:%S'),
(10, '%Y-%m-%d'), (10, '%Y:%m:%d')):
try:
return datetime.datetime.strptime(s[:prefix_len], fmt)
except ValueError: except ValueError:
continue continue
return None return None
# EXIF/IPTC/XMP tags that carry a capture or creation date, in preference order.
_DATE_TAGS_IN_IMAGE = (
'EXIF:DateTimeOriginal',
'EXIF:CreateDate',
'XMP-xmp:CreateDate',
'IPTC:DateCreated',
'EXIF:ModifyDate',
)
def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None: def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None:
"""Return the earliest datetime found in the image's embedded metadata. """Return the earliest datetime found in the image's embedded metadata.
Scans every tag returned by exiftool whose name contains 'date' or 'time'
(case-insensitive), skipping filesystem/tool pseudo-groups (File:,
ExifTool:, Composite:) and GPS tags (which are UTC and timezone-ambiguous).
This catches EXIF:ModifyDate, PNG:ModifyDate, XMP-xmp:CreateDate, etc.
without needing a format-specific allowlist.
Returns None if exiftool is not available or no date tags are found. Returns None if exiftool is not available or no date tags are found.
""" """
if not shutil.which('exiftool'): if not shutil.which('exiftool'):
return None return None
existing = _read_existing_metadata(image_path) existing = _read_existing_metadata(image_path)
dates = [] dates = []
for tag in _DATE_TAGS_IN_IMAGE: for key, val in existing.items():
val = existing.get(tag) group = key.split(':')[0] if ':' in key else ''
if val: if group in ('File', 'ExifTool', 'Composite', 'GPS'):
dt = _parse_datetime(str(val)) continue
if dt: tag_name = key.split(':', 1)[1] if ':' in key else key
dates.append(dt) if 'date' not in tag_name.lower() and 'time' not in tag_name.lower():
continue
dt = _parse_datetime(str(val))
if dt and 1900 < dt.year < 2100:
dates.append(dt)
return min(dates) if dates else None return min(dates) if dates else None
# Matches 14 consecutive digits (YYYYMMDDHHMMSS) not adjacent to another digit.
_RE_DATETIME_14 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(?!\d)')
# Matches 8 consecutive digits (YYYYMMDD) not adjacent to another digit.
_RE_DATE_8 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(?!\d)')
def _date_from_filename(name: str) -> tuple[datetime.datetime | None, bool]:
"""Try to parse a date/datetime from a filename.
Recognises patterns such as:
20120415142550-b05adf19.png → 2012-04-15 14:25:50 (has_time=True)
IMG-20120415142550.jpg → 2012-04-15 14:25:50 (has_time=True)
IMG-20120415.jpg → 2012-04-15 00:00:00 (has_time=False)
Returns (datetime, has_time). has_time=False means only a date was found;
the time component is set to midnight but should not be treated as known.
Returns (None, False) if no recognisable pattern is found.
"""
stem = pathlib.Path(name).stem
# Try 14-digit datetime first.
m = _RE_DATETIME_14.search(stem)
if m:
y, mo, d, h, mi, s = (int(x) for x in m.groups())
try:
return datetime.datetime(y, mo, d, h, mi, s), True
except ValueError:
pass # invalid date/time components — fall through
# Fall back to 8-digit date.
m = _RE_DATE_8.search(stem)
if m:
y, mo, d = (int(x) for x in m.groups())
try:
return datetime.datetime(y, mo, d, 0, 0, 0), False
except ValueError:
pass
return None, False
def _build_metadata_tags(metadata: dict, fmt: str) -> dict: def _build_metadata_tags(metadata: dict, fmt: str) -> dict:
""" """
Build a dict of { 'GROUP:TagName': value } for everything we want to write. Build a dict of { 'GROUP:TagName': value } for everything we want to write.
@@ -510,8 +559,17 @@ def export_image(
tags = tags_by_image.get(image_id, []) tags = tags_by_image.get(image_id, [])
cat_ids = cats_by_image.get(image_id, []) cat_ids = cats_by_image.get(image_id, [])
# Collect both date sources before building the metadata dict. # Collect all three date sources before building the metadata dict.
img_filename = pathlib.Path(image_row['path']).name
date_embedded_dt = _earliest_image_date(src_file) date_embedded_dt = _earliest_image_date(src_file)
date_filename_dt, filename_has_time = _date_from_filename(img_filename)
if date_filename_dt is not None:
date_filename_str = (
date_filename_dt.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
else date_filename_dt.strftime('%Y-%m-%d')
)
else:
date_filename_str = None
metadata = { metadata = {
'title': image_row.get('name'), 'title': image_row.get('name'),
@@ -519,6 +577,7 @@ def export_image(
'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None, 'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None,
'date_added': str(image_row['date_available']) if image_row.get('date_available') else None, 'date_added': str(image_row['date_available']) if image_row.get('date_available') else None,
'date_embedded': str(date_embedded_dt) if date_embedded_dt else None, 'date_embedded': str(date_embedded_dt) if date_embedded_dt else None,
'date_filename': date_filename_str,
'description': image_row.get('comment'), 'description': image_row.get('comment'),
'tags': tags, 'tags': tags,
'albums': [category_display_path(cid, categories) for cid in cat_ids], 'albums': [category_display_path(cid, categories) for cid in cat_ids],
@@ -531,11 +590,22 @@ def export_image(
'original_path': image_row['path'], 'original_path': image_row['path'],
} }
# Print both date sources so the user can see any discrepancy. # Print all three date sources so the user can see any discrepancy.
piwigo_str = metadata['date_created'] or '' piwigo_str = metadata['date_created'] or ''
embedded_str = metadata['date_embedded'] or '' embedded_str = metadata['date_embedded'] or ''
filename_str = pathlib.Path(image_row['path']).name fn_date_str = metadata['date_filename'] or ''
print(f' {filename_str} piwigo: {piwigo_str} embedded: {embedded_str}') print(f' {img_filename} piwigo: {piwigo_str} embedded: {embedded_str} filename: {fn_date_str}')
# Best date for file mtime: piwigo > embedded > filename.
mtime_ts = None
for _ds in (metadata.get('date_created'), metadata.get('date_embedded'), metadata.get('date_filename')):
if _ds:
_dt = _parse_datetime(_ds)
if _dt:
mtime_ts = _dt.timestamp()
break
if mtime_ts is None:
print(f' NOTE: {img_filename}: no date found; file mtime will not be set', file=sys.stderr)
dest_dirs = ( dest_dirs = (
[output_dir / category_fs_path(cid, categories) for cid in cat_ids] [output_dir / category_fs_path(cid, categories) for cid in cat_ids]
@@ -543,7 +613,7 @@ def export_image(
else [output_dir / '_unsorted'] else [output_dir / '_unsorted']
) )
filename = pathlib.Path(image_row['path']).name filename = img_filename
stem = pathlib.Path(filename).stem stem = pathlib.Path(filename).stem
written = 0 written = 0
@@ -571,11 +641,20 @@ def export_image(
print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}') print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}')
continue continue
# Copy image file # Copy image file.
shutil.copy2(str(src_file), str(dest_image)) shutil.copy2(str(src_file), str(dest_image))
written += 1 written += 1
if metadata_format: if metadata_format:
embed_metadata(dest_image, metadata, metadata_format, never_overwrite_metadata) # If no Piwigo date, fall back to filename-derived date so that
# missing EXIF/IPTC/XMP date tags are filled in from the filename.
meta_for_embed = metadata
if not meta_for_embed.get('date_created') and meta_for_embed.get('date_filename'):
meta_for_embed = {**metadata, 'date_created': metadata['date_filename']}
embed_metadata(dest_image, meta_for_embed, metadata_format, never_overwrite_metadata)
# Set mtime after any exiftool call so exiftool doesn't reset it.
if mtime_ts is not None:
os.utime(str(dest_image), (mtime_ts, mtime_ts))
# Write/refresh the sidecar so it stays in sync with the DB. # Write/refresh the sidecar so it stays in sync with the DB.
dest_sidecar.write_text( dest_sidecar.write_text(
@@ -683,63 +762,98 @@ def cmd_set_dates(args):
print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr) print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr)
continue continue
# Parse the two candidate dates from the sidecar. # Parse all three candidate dates from the sidecar.
dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None
dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None
fn_date_str = data.get('date_filename')
dt_filename = _parse_datetime(fn_date_str) if fn_date_str else None
# has_time is False when the sidecar stored only a date (no space → no time part).
filename_has_time = bool(fn_date_str and ' ' in fn_date_str)
chosen = None
chosen_source = None
# Determine which date to apply.
if args.use == 'piwigo': if args.use == 'piwigo':
chosen = dt_piwigo if dt_piwigo is None:
if chosen is None:
print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.') print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.')
noted += 1 noted += 1
continue continue
chosen, chosen_source = dt_piwigo, 'piwigo'
elif args.use == 'embedded': elif args.use == 'embedded':
chosen = dt_embedded if dt_embedded is None:
if chosen is None:
print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.') print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.')
noted += 1 noted += 1
continue continue
chosen, chosen_source = dt_embedded, 'embedded'
elif args.use == 'filename':
if dt_filename is None:
print(f' NOTE: {image_path.name}: no filename date in sidecar; skipping.')
noted += 1
continue
chosen, chosen_source = dt_filename, 'filename'
else: else:
# Interactive mode. # Interactive mode: collect available (source, datetime) pairs.
if dt_piwigo is None and dt_embedded is None: options = []
if dt_piwigo: options.append(('piwigo', dt_piwigo))
if dt_embedded: options.append(('embedded', dt_embedded))
if dt_filename: options.append(('filename', dt_filename))
if not options:
print(f' NOTE: {image_path.name}: no dates available; skipping.') print(f' NOTE: {image_path.name}: no dates available; skipping.')
noted += 1 noted += 1
continue continue
if dt_piwigo == dt_embedded or (dt_piwigo and dt_embedded is None): # If all available dates are the same, or only one source, apply silently.
chosen = dt_piwigo unique_dts = list(dict.fromkeys(o[1] for o in options))
elif dt_embedded and dt_piwigo is None: if len(unique_dts) == 1:
chosen = dt_embedded chosen, chosen_source = options[0][1], options[0][0]
else: else:
# Both present and different — ask the user. # Multiple different dates — ask the user.
print(f'\n{image_path.name}') print(f'\n{image_path.name}')
print(f' [1] piwigo : {dt_piwigo}') for i, (src, dt) in enumerate(options, 1):
print(f' [2] embedded : {dt_embedded}') print(f' [{i}] {src:<8} : {dt}')
print(f' [s] skip') print(f' [s] skip')
while True: while True:
choice = input('Choice [1/2/s]: ').strip().lower() choice = input(f'Choice [1-{len(options)}/s]: ').strip().lower()
if choice in ('s', 'skip', ''): if choice in ('s', 'skip', ''):
chosen = None
break break
if choice == '1': try:
chosen = dt_piwigo idx = int(choice) - 1
break if 0 <= idx < len(options):
if choice == '2': chosen_source, chosen = options[idx]
chosen = dt_embedded break
break except ValueError:
print(' Please enter 1, 2, or s.') pass
print(f' Please enter a number between 1 and {len(options)}, or s.')
if chosen is None: if chosen is None:
skipped += 1 skipped += 1
continue continue
# Set file mtime.
ts = chosen.timestamp() ts = chosen.timestamp()
os.utime(image_path, (ts, ts)) os.utime(image_path, (ts, ts))
applied += 1 applied += 1
# For filename-derived dates, also embed the date into any missing
# EXIF/IPTC/XMP tags so the image carries its own date going forward.
if chosen_source == 'filename' and shutil.which('exiftool'):
date_str = (
chosen.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
else chosen.strftime('%Y-%m-%d')
)
embed_metadata(
image_path,
{'date_created': date_str},
['exif', 'iptc', 'xmp'],
never_overwrite=True,
)
# exiftool rewrites the file and resets its mtime — restore it.
os.utime(image_path, (ts, ts))
print( print(
f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.' f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.'
) )
@@ -808,8 +922,8 @@ def main():
help='Directory containing the exported files and JSON sidecars.', help='Directory containing the exported files and JSON sidecars.',
) )
dp.add_argument( dp.add_argument(
'--use', choices=['piwigo', 'embedded'], metavar='SOURCE', '--use', choices=['piwigo', 'embedded', 'filename'], metavar='SOURCE',
help='Auto-select a date source (piwigo or embedded) instead of ' help='Auto-select a date source (piwigo, embedded, or filename) instead of '
'prompting for each image.', 'prompting for each image.',
) )