Also source dates from the filename

This commit is contained in:
2026-04-12 08:28:49 +02:00
parent a6db47237e
commit ae7109ac7c
+167 -53
View File
@@ -29,6 +29,7 @@ import json
import math
import os
import pathlib
import re
import shutil
import subprocess
import sys
@@ -202,43 +203,91 @@ def _xmp_datetime(s) -> str:
def _parse_datetime(s) -> datetime.datetime | None:
"""Parse a DB or EXIF date string into a datetime, or return None."""
s = str(s).strip()
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d',
'%Y:%m:%d %H:%M:%S', '%Y:%m:%d'):
# Try the full string first (handles both datetime and date-only values).
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%d', '%Y:%m:%d'):
try:
return datetime.datetime.strptime(s[:len(fmt)], fmt)
return datetime.datetime.strptime(s, fmt)
except ValueError:
continue
# If the string has trailing timezone info or extra fields, try the prefix.
for prefix_len, fmt in ((19, '%Y-%m-%d %H:%M:%S'), (19, '%Y:%m:%d %H:%M:%S'),
(10, '%Y-%m-%d'), (10, '%Y:%m:%d')):
try:
return datetime.datetime.strptime(s[:prefix_len], fmt)
except ValueError:
continue
return None
# EXIF/IPTC/XMP tags that carry a capture or creation date, in preference order.
_DATE_TAGS_IN_IMAGE = (
'EXIF:DateTimeOriginal',
'EXIF:CreateDate',
'XMP-xmp:CreateDate',
'IPTC:DateCreated',
'EXIF:ModifyDate',
)
def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None:
"""Return the earliest datetime found in the image's embedded metadata.
Scans every tag returned by exiftool whose name contains 'date' or 'time'
(case-insensitive), skipping filesystem/tool pseudo-groups (File:,
ExifTool:, Composite:) and GPS tags (which are UTC and timezone-ambiguous).
This catches EXIF:ModifyDate, PNG:ModifyDate, XMP-xmp:CreateDate, etc.
without needing a format-specific allowlist.
Returns None if exiftool is not available or no date tags are found.
"""
if not shutil.which('exiftool'):
return None
existing = _read_existing_metadata(image_path)
dates = []
for tag in _DATE_TAGS_IN_IMAGE:
val = existing.get(tag)
if val:
dt = _parse_datetime(str(val))
if dt:
dates.append(dt)
for key, val in existing.items():
group = key.split(':')[0] if ':' in key else ''
if group in ('File', 'ExifTool', 'Composite', 'GPS'):
continue
tag_name = key.split(':', 1)[1] if ':' in key else key
if 'date' not in tag_name.lower() and 'time' not in tag_name.lower():
continue
dt = _parse_datetime(str(val))
if dt and 1900 < dt.year < 2100:
dates.append(dt)
return min(dates) if dates else None
# Matches 14 consecutive digits (YYYYMMDDHHMMSS) not adjacent to another digit.
_RE_DATETIME_14 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(?!\d)')
# Matches 8 consecutive digits (YYYYMMDD) not adjacent to another digit.
_RE_DATE_8 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(?!\d)')
def _date_from_filename(name: str) -> tuple[datetime.datetime | None, bool]:
"""Try to parse a date/datetime from a filename.
Recognises patterns such as:
20120415142550-b05adf19.png → 2012-04-15 14:25:50 (has_time=True)
IMG-20120415142550.jpg → 2012-04-15 14:25:50 (has_time=True)
IMG-20120415.jpg → 2012-04-15 00:00:00 (has_time=False)
Returns (datetime, has_time). has_time=False means only a date was found;
the time component is set to midnight but should not be treated as known.
Returns (None, False) if no recognisable pattern is found.
"""
stem = pathlib.Path(name).stem
# Try 14-digit datetime first.
m = _RE_DATETIME_14.search(stem)
if m:
y, mo, d, h, mi, s = (int(x) for x in m.groups())
try:
return datetime.datetime(y, mo, d, h, mi, s), True
except ValueError:
pass # invalid date/time components — fall through
# Fall back to 8-digit date.
m = _RE_DATE_8.search(stem)
if m:
y, mo, d = (int(x) for x in m.groups())
try:
return datetime.datetime(y, mo, d, 0, 0, 0), False
except ValueError:
pass
return None, False
def _build_metadata_tags(metadata: dict, fmt: str) -> dict:
"""
Build a dict of { 'GROUP:TagName': value } for everything we want to write.
@@ -510,8 +559,17 @@ def export_image(
tags = tags_by_image.get(image_id, [])
cat_ids = cats_by_image.get(image_id, [])
# Collect both date sources before building the metadata dict.
# Collect all three date sources before building the metadata dict.
img_filename = pathlib.Path(image_row['path']).name
date_embedded_dt = _earliest_image_date(src_file)
date_filename_dt, filename_has_time = _date_from_filename(img_filename)
if date_filename_dt is not None:
date_filename_str = (
date_filename_dt.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
else date_filename_dt.strftime('%Y-%m-%d')
)
else:
date_filename_str = None
metadata = {
'title': image_row.get('name'),
@@ -519,6 +577,7 @@ def export_image(
'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None,
'date_added': str(image_row['date_available']) if image_row.get('date_available') else None,
'date_embedded': str(date_embedded_dt) if date_embedded_dt else None,
'date_filename': date_filename_str,
'description': image_row.get('comment'),
'tags': tags,
'albums': [category_display_path(cid, categories) for cid in cat_ids],
@@ -531,11 +590,22 @@ def export_image(
'original_path': image_row['path'],
}
# Print both date sources so the user can see any discrepancy.
piwigo_str = metadata['date_created'] or ''
# Print all three date sources so the user can see any discrepancy.
piwigo_str = metadata['date_created'] or ''
embedded_str = metadata['date_embedded'] or ''
filename_str = pathlib.Path(image_row['path']).name
print(f' {filename_str} piwigo: {piwigo_str} embedded: {embedded_str}')
fn_date_str = metadata['date_filename'] or ''
print(f' {img_filename} piwigo: {piwigo_str} embedded: {embedded_str} filename: {fn_date_str}')
# Best date for file mtime: piwigo > embedded > filename.
mtime_ts = None
for _ds in (metadata.get('date_created'), metadata.get('date_embedded'), metadata.get('date_filename')):
if _ds:
_dt = _parse_datetime(_ds)
if _dt:
mtime_ts = _dt.timestamp()
break
if mtime_ts is None:
print(f' NOTE: {img_filename}: no date found; file mtime will not be set', file=sys.stderr)
dest_dirs = (
[output_dir / category_fs_path(cid, categories) for cid in cat_ids]
@@ -543,7 +613,7 @@ def export_image(
else [output_dir / '_unsorted']
)
filename = pathlib.Path(image_row['path']).name
filename = img_filename
stem = pathlib.Path(filename).stem
written = 0
@@ -571,11 +641,20 @@ def export_image(
print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}')
continue
# Copy image file
# Copy image file.
shutil.copy2(str(src_file), str(dest_image))
written += 1
if metadata_format:
embed_metadata(dest_image, metadata, metadata_format, never_overwrite_metadata)
# If no Piwigo date, fall back to filename-derived date so that
# missing EXIF/IPTC/XMP date tags are filled in from the filename.
meta_for_embed = metadata
if not meta_for_embed.get('date_created') and meta_for_embed.get('date_filename'):
meta_for_embed = {**metadata, 'date_created': metadata['date_filename']}
embed_metadata(dest_image, meta_for_embed, metadata_format, never_overwrite_metadata)
# Set mtime after any exiftool call so exiftool doesn't reset it.
if mtime_ts is not None:
os.utime(str(dest_image), (mtime_ts, mtime_ts))
# Write/refresh the sidecar so it stays in sync with the DB.
dest_sidecar.write_text(
@@ -683,63 +762,98 @@ def cmd_set_dates(args):
print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr)
continue
# Parse the two candidate dates from the sidecar.
# Parse all three candidate dates from the sidecar.
dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None
dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None
fn_date_str = data.get('date_filename')
dt_filename = _parse_datetime(fn_date_str) if fn_date_str else None
# has_time is False when the sidecar stored only a date (no space → no time part).
filename_has_time = bool(fn_date_str and ' ' in fn_date_str)
chosen = None
chosen_source = None
# Determine which date to apply.
if args.use == 'piwigo':
chosen = dt_piwigo
if chosen is None:
if dt_piwigo is None:
print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.')
noted += 1
continue
chosen, chosen_source = dt_piwigo, 'piwigo'
elif args.use == 'embedded':
chosen = dt_embedded
if chosen is None:
if dt_embedded is None:
print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.')
noted += 1
continue
chosen, chosen_source = dt_embedded, 'embedded'
elif args.use == 'filename':
if dt_filename is None:
print(f' NOTE: {image_path.name}: no filename date in sidecar; skipping.')
noted += 1
continue
chosen, chosen_source = dt_filename, 'filename'
else:
# Interactive mode.
if dt_piwigo is None and dt_embedded is None:
# Interactive mode: collect available (source, datetime) pairs.
options = []
if dt_piwigo: options.append(('piwigo', dt_piwigo))
if dt_embedded: options.append(('embedded', dt_embedded))
if dt_filename: options.append(('filename', dt_filename))
if not options:
print(f' NOTE: {image_path.name}: no dates available; skipping.')
noted += 1
continue
if dt_piwigo == dt_embedded or (dt_piwigo and dt_embedded is None):
chosen = dt_piwigo
elif dt_embedded and dt_piwigo is None:
chosen = dt_embedded
# If all available dates are the same, or only one source, apply silently.
unique_dts = list(dict.fromkeys(o[1] for o in options))
if len(unique_dts) == 1:
chosen, chosen_source = options[0][1], options[0][0]
else:
# Both present and different — ask the user.
# Multiple different dates — ask the user.
print(f'\n{image_path.name}')
print(f' [1] piwigo : {dt_piwigo}')
print(f' [2] embedded : {dt_embedded}')
for i, (src, dt) in enumerate(options, 1):
print(f' [{i}] {src:<8} : {dt}')
print(f' [s] skip')
while True:
choice = input('Choice [1/2/s]: ').strip().lower()
choice = input(f'Choice [1-{len(options)}/s]: ').strip().lower()
if choice in ('s', 'skip', ''):
chosen = None
break
if choice == '1':
chosen = dt_piwigo
break
if choice == '2':
chosen = dt_embedded
break
print(' Please enter 1, 2, or s.')
try:
idx = int(choice) - 1
if 0 <= idx < len(options):
chosen_source, chosen = options[idx]
break
except ValueError:
pass
print(f' Please enter a number between 1 and {len(options)}, or s.')
if chosen is None:
skipped += 1
continue
# Set file mtime.
ts = chosen.timestamp()
os.utime(image_path, (ts, ts))
applied += 1
# For filename-derived dates, also embed the date into any missing
# EXIF/IPTC/XMP tags so the image carries its own date going forward.
if chosen_source == 'filename' and shutil.which('exiftool'):
date_str = (
chosen.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
else chosen.strftime('%Y-%m-%d')
)
embed_metadata(
image_path,
{'date_created': date_str},
['exif', 'iptc', 'xmp'],
never_overwrite=True,
)
# exiftool rewrites the file and resets its mtime — restore it.
os.utime(image_path, (ts, ts))
print(
f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.'
)
@@ -808,8 +922,8 @@ def main():
help='Directory containing the exported files and JSON sidecars.',
)
dp.add_argument(
'--use', choices=['piwigo', 'embedded'], metavar='SOURCE',
help='Auto-select a date source (piwigo or embedded) instead of '
'--use', choices=['piwigo', 'embedded', 'filename'], metavar='SOURCE',
help='Auto-select a date source (piwigo, embedded, or filename) instead of '
'prompting for each image.',
)