Also source dates from the filename
This commit is contained in:
+167
-53
@@ -29,6 +29,7 @@ import json
|
||||
import math
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -202,43 +203,91 @@ def _xmp_datetime(s) -> str:
|
||||
def _parse_datetime(s) -> datetime.datetime | None:
|
||||
"""Parse a DB or EXIF date string into a datetime, or return None."""
|
||||
s = str(s).strip()
|
||||
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d',
|
||||
'%Y:%m:%d %H:%M:%S', '%Y:%m:%d'):
|
||||
# Try the full string first (handles both datetime and date-only values).
|
||||
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%d', '%Y:%m:%d'):
|
||||
try:
|
||||
return datetime.datetime.strptime(s[:len(fmt)], fmt)
|
||||
return datetime.datetime.strptime(s, fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
# If the string has trailing timezone info or extra fields, try the prefix.
|
||||
for prefix_len, fmt in ((19, '%Y-%m-%d %H:%M:%S'), (19, '%Y:%m:%d %H:%M:%S'),
|
||||
(10, '%Y-%m-%d'), (10, '%Y:%m:%d')):
|
||||
try:
|
||||
return datetime.datetime.strptime(s[:prefix_len], fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
# EXIF/IPTC/XMP tags that carry a capture or creation date, in preference order.
|
||||
_DATE_TAGS_IN_IMAGE = (
|
||||
'EXIF:DateTimeOriginal',
|
||||
'EXIF:CreateDate',
|
||||
'XMP-xmp:CreateDate',
|
||||
'IPTC:DateCreated',
|
||||
'EXIF:ModifyDate',
|
||||
)
|
||||
|
||||
|
||||
def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None:
|
||||
"""Return the earliest datetime found in the image's embedded metadata.
|
||||
|
||||
Scans every tag returned by exiftool whose name contains 'date' or 'time'
|
||||
(case-insensitive), skipping filesystem/tool pseudo-groups (File:,
|
||||
ExifTool:, Composite:) and GPS tags (which are UTC and timezone-ambiguous).
|
||||
This catches EXIF:ModifyDate, PNG:ModifyDate, XMP-xmp:CreateDate, etc.
|
||||
without needing a format-specific allowlist.
|
||||
|
||||
Returns None if exiftool is not available or no date tags are found.
|
||||
"""
|
||||
if not shutil.which('exiftool'):
|
||||
return None
|
||||
existing = _read_existing_metadata(image_path)
|
||||
dates = []
|
||||
for tag in _DATE_TAGS_IN_IMAGE:
|
||||
val = existing.get(tag)
|
||||
if val:
|
||||
dt = _parse_datetime(str(val))
|
||||
if dt:
|
||||
dates.append(dt)
|
||||
for key, val in existing.items():
|
||||
group = key.split(':')[0] if ':' in key else ''
|
||||
if group in ('File', 'ExifTool', 'Composite', 'GPS'):
|
||||
continue
|
||||
tag_name = key.split(':', 1)[1] if ':' in key else key
|
||||
if 'date' not in tag_name.lower() and 'time' not in tag_name.lower():
|
||||
continue
|
||||
dt = _parse_datetime(str(val))
|
||||
if dt and 1900 < dt.year < 2100:
|
||||
dates.append(dt)
|
||||
return min(dates) if dates else None
|
||||
|
||||
|
||||
# Matches 14 consecutive digits (YYYYMMDDHHMMSS) not adjacent to another digit.
|
||||
_RE_DATETIME_14 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(?!\d)')
|
||||
# Matches 8 consecutive digits (YYYYMMDD) not adjacent to another digit.
|
||||
_RE_DATE_8 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(?!\d)')
|
||||
|
||||
|
||||
def _date_from_filename(name: str) -> tuple[datetime.datetime | None, bool]:
|
||||
"""Try to parse a date/datetime from a filename.
|
||||
|
||||
Recognises patterns such as:
|
||||
20120415142550-b05adf19.png → 2012-04-15 14:25:50 (has_time=True)
|
||||
IMG-20120415142550.jpg → 2012-04-15 14:25:50 (has_time=True)
|
||||
IMG-20120415.jpg → 2012-04-15 00:00:00 (has_time=False)
|
||||
|
||||
Returns (datetime, has_time). has_time=False means only a date was found;
|
||||
the time component is set to midnight but should not be treated as known.
|
||||
Returns (None, False) if no recognisable pattern is found.
|
||||
"""
|
||||
stem = pathlib.Path(name).stem
|
||||
|
||||
# Try 14-digit datetime first.
|
||||
m = _RE_DATETIME_14.search(stem)
|
||||
if m:
|
||||
y, mo, d, h, mi, s = (int(x) for x in m.groups())
|
||||
try:
|
||||
return datetime.datetime(y, mo, d, h, mi, s), True
|
||||
except ValueError:
|
||||
pass # invalid date/time components — fall through
|
||||
|
||||
# Fall back to 8-digit date.
|
||||
m = _RE_DATE_8.search(stem)
|
||||
if m:
|
||||
y, mo, d = (int(x) for x in m.groups())
|
||||
try:
|
||||
return datetime.datetime(y, mo, d, 0, 0, 0), False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return None, False
|
||||
|
||||
|
||||
def _build_metadata_tags(metadata: dict, fmt: str) -> dict:
|
||||
"""
|
||||
Build a dict of { 'GROUP:TagName': value } for everything we want to write.
|
||||
@@ -510,8 +559,17 @@ def export_image(
|
||||
tags = tags_by_image.get(image_id, [])
|
||||
cat_ids = cats_by_image.get(image_id, [])
|
||||
|
||||
# Collect both date sources before building the metadata dict.
|
||||
# Collect all three date sources before building the metadata dict.
|
||||
img_filename = pathlib.Path(image_row['path']).name
|
||||
date_embedded_dt = _earliest_image_date(src_file)
|
||||
date_filename_dt, filename_has_time = _date_from_filename(img_filename)
|
||||
if date_filename_dt is not None:
|
||||
date_filename_str = (
|
||||
date_filename_dt.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
|
||||
else date_filename_dt.strftime('%Y-%m-%d')
|
||||
)
|
||||
else:
|
||||
date_filename_str = None
|
||||
|
||||
metadata = {
|
||||
'title': image_row.get('name'),
|
||||
@@ -519,6 +577,7 @@ def export_image(
|
||||
'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None,
|
||||
'date_added': str(image_row['date_available']) if image_row.get('date_available') else None,
|
||||
'date_embedded': str(date_embedded_dt) if date_embedded_dt else None,
|
||||
'date_filename': date_filename_str,
|
||||
'description': image_row.get('comment'),
|
||||
'tags': tags,
|
||||
'albums': [category_display_path(cid, categories) for cid in cat_ids],
|
||||
@@ -531,11 +590,22 @@ def export_image(
|
||||
'original_path': image_row['path'],
|
||||
}
|
||||
|
||||
# Print both date sources so the user can see any discrepancy.
|
||||
piwigo_str = metadata['date_created'] or '—'
|
||||
# Print all three date sources so the user can see any discrepancy.
|
||||
piwigo_str = metadata['date_created'] or '—'
|
||||
embedded_str = metadata['date_embedded'] or '—'
|
||||
filename_str = pathlib.Path(image_row['path']).name
|
||||
print(f' {filename_str} piwigo: {piwigo_str} embedded: {embedded_str}')
|
||||
fn_date_str = metadata['date_filename'] or '—'
|
||||
print(f' {img_filename} piwigo: {piwigo_str} embedded: {embedded_str} filename: {fn_date_str}')
|
||||
|
||||
# Best date for file mtime: piwigo > embedded > filename.
|
||||
mtime_ts = None
|
||||
for _ds in (metadata.get('date_created'), metadata.get('date_embedded'), metadata.get('date_filename')):
|
||||
if _ds:
|
||||
_dt = _parse_datetime(_ds)
|
||||
if _dt:
|
||||
mtime_ts = _dt.timestamp()
|
||||
break
|
||||
if mtime_ts is None:
|
||||
print(f' NOTE: {img_filename}: no date found; file mtime will not be set', file=sys.stderr)
|
||||
|
||||
dest_dirs = (
|
||||
[output_dir / category_fs_path(cid, categories) for cid in cat_ids]
|
||||
@@ -543,7 +613,7 @@ def export_image(
|
||||
else [output_dir / '_unsorted']
|
||||
)
|
||||
|
||||
filename = pathlib.Path(image_row['path']).name
|
||||
filename = img_filename
|
||||
stem = pathlib.Path(filename).stem
|
||||
written = 0
|
||||
|
||||
@@ -571,11 +641,20 @@ def export_image(
|
||||
print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}')
|
||||
continue
|
||||
|
||||
# Copy image file
|
||||
# Copy image file.
|
||||
shutil.copy2(str(src_file), str(dest_image))
|
||||
written += 1
|
||||
if metadata_format:
|
||||
embed_metadata(dest_image, metadata, metadata_format, never_overwrite_metadata)
|
||||
# If no Piwigo date, fall back to filename-derived date so that
|
||||
# missing EXIF/IPTC/XMP date tags are filled in from the filename.
|
||||
meta_for_embed = metadata
|
||||
if not meta_for_embed.get('date_created') and meta_for_embed.get('date_filename'):
|
||||
meta_for_embed = {**metadata, 'date_created': metadata['date_filename']}
|
||||
embed_metadata(dest_image, meta_for_embed, metadata_format, never_overwrite_metadata)
|
||||
|
||||
# Set mtime after any exiftool call so exiftool doesn't reset it.
|
||||
if mtime_ts is not None:
|
||||
os.utime(str(dest_image), (mtime_ts, mtime_ts))
|
||||
|
||||
# Write/refresh the sidecar so it stays in sync with the DB.
|
||||
dest_sidecar.write_text(
|
||||
@@ -683,63 +762,98 @@ def cmd_set_dates(args):
|
||||
print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Parse the two candidate dates from the sidecar.
|
||||
# Parse all three candidate dates from the sidecar.
|
||||
dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None
|
||||
dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None
|
||||
fn_date_str = data.get('date_filename')
|
||||
dt_filename = _parse_datetime(fn_date_str) if fn_date_str else None
|
||||
# has_time is False when the sidecar stored only a date (no space → no time part).
|
||||
filename_has_time = bool(fn_date_str and ' ' in fn_date_str)
|
||||
|
||||
chosen = None
|
||||
chosen_source = None
|
||||
|
||||
# Determine which date to apply.
|
||||
if args.use == 'piwigo':
|
||||
chosen = dt_piwigo
|
||||
if chosen is None:
|
||||
if dt_piwigo is None:
|
||||
print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.')
|
||||
noted += 1
|
||||
continue
|
||||
chosen, chosen_source = dt_piwigo, 'piwigo'
|
||||
|
||||
elif args.use == 'embedded':
|
||||
chosen = dt_embedded
|
||||
if chosen is None:
|
||||
if dt_embedded is None:
|
||||
print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.')
|
||||
noted += 1
|
||||
continue
|
||||
chosen, chosen_source = dt_embedded, 'embedded'
|
||||
|
||||
elif args.use == 'filename':
|
||||
if dt_filename is None:
|
||||
print(f' NOTE: {image_path.name}: no filename date in sidecar; skipping.')
|
||||
noted += 1
|
||||
continue
|
||||
chosen, chosen_source = dt_filename, 'filename'
|
||||
|
||||
else:
|
||||
# Interactive mode.
|
||||
if dt_piwigo is None and dt_embedded is None:
|
||||
# Interactive mode: collect available (source, datetime) pairs.
|
||||
options = []
|
||||
if dt_piwigo: options.append(('piwigo', dt_piwigo))
|
||||
if dt_embedded: options.append(('embedded', dt_embedded))
|
||||
if dt_filename: options.append(('filename', dt_filename))
|
||||
|
||||
if not options:
|
||||
print(f' NOTE: {image_path.name}: no dates available; skipping.')
|
||||
noted += 1
|
||||
continue
|
||||
|
||||
if dt_piwigo == dt_embedded or (dt_piwigo and dt_embedded is None):
|
||||
chosen = dt_piwigo
|
||||
elif dt_embedded and dt_piwigo is None:
|
||||
chosen = dt_embedded
|
||||
# If all available dates are the same, or only one source, apply silently.
|
||||
unique_dts = list(dict.fromkeys(o[1] for o in options))
|
||||
if len(unique_dts) == 1:
|
||||
chosen, chosen_source = options[0][1], options[0][0]
|
||||
else:
|
||||
# Both present and different — ask the user.
|
||||
# Multiple different dates — ask the user.
|
||||
print(f'\n{image_path.name}')
|
||||
print(f' [1] piwigo : {dt_piwigo}')
|
||||
print(f' [2] embedded : {dt_embedded}')
|
||||
for i, (src, dt) in enumerate(options, 1):
|
||||
print(f' [{i}] {src:<8} : {dt}')
|
||||
print(f' [s] skip')
|
||||
while True:
|
||||
choice = input('Choice [1/2/s]: ').strip().lower()
|
||||
choice = input(f'Choice [1-{len(options)}/s]: ').strip().lower()
|
||||
if choice in ('s', 'skip', ''):
|
||||
chosen = None
|
||||
break
|
||||
if choice == '1':
|
||||
chosen = dt_piwigo
|
||||
break
|
||||
if choice == '2':
|
||||
chosen = dt_embedded
|
||||
break
|
||||
print(' Please enter 1, 2, or s.')
|
||||
try:
|
||||
idx = int(choice) - 1
|
||||
if 0 <= idx < len(options):
|
||||
chosen_source, chosen = options[idx]
|
||||
break
|
||||
except ValueError:
|
||||
pass
|
||||
print(f' Please enter a number between 1 and {len(options)}, or s.')
|
||||
|
||||
if chosen is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Set file mtime.
|
||||
ts = chosen.timestamp()
|
||||
os.utime(image_path, (ts, ts))
|
||||
applied += 1
|
||||
|
||||
# For filename-derived dates, also embed the date into any missing
|
||||
# EXIF/IPTC/XMP tags so the image carries its own date going forward.
|
||||
if chosen_source == 'filename' and shutil.which('exiftool'):
|
||||
date_str = (
|
||||
chosen.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
|
||||
else chosen.strftime('%Y-%m-%d')
|
||||
)
|
||||
embed_metadata(
|
||||
image_path,
|
||||
{'date_created': date_str},
|
||||
['exif', 'iptc', 'xmp'],
|
||||
never_overwrite=True,
|
||||
)
|
||||
# exiftool rewrites the file and resets its mtime — restore it.
|
||||
os.utime(image_path, (ts, ts))
|
||||
|
||||
print(
|
||||
f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.'
|
||||
)
|
||||
@@ -808,8 +922,8 @@ def main():
|
||||
help='Directory containing the exported files and JSON sidecars.',
|
||||
)
|
||||
dp.add_argument(
|
||||
'--use', choices=['piwigo', 'embedded'], metavar='SOURCE',
|
||||
help='Auto-select a date source (piwigo or embedded) instead of '
|
||||
'--use', choices=['piwigo', 'embedded', 'filename'], metavar='SOURCE',
|
||||
help='Auto-select a date source (piwigo, embedded, or filename) instead of '
|
||||
'prompting for each image.',
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user