Also source dates from the filename
This commit is contained in:
+163
-49
@@ -29,6 +29,7 @@ import json
|
|||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@@ -202,43 +203,91 @@ def _xmp_datetime(s) -> str:
|
|||||||
def _parse_datetime(s) -> datetime.datetime | None:
|
def _parse_datetime(s) -> datetime.datetime | None:
|
||||||
"""Parse a DB or EXIF date string into a datetime, or return None."""
|
"""Parse a DB or EXIF date string into a datetime, or return None."""
|
||||||
s = str(s).strip()
|
s = str(s).strip()
|
||||||
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d',
|
# Try the full string first (handles both datetime and date-only values).
|
||||||
'%Y:%m:%d %H:%M:%S', '%Y:%m:%d'):
|
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%d', '%Y:%m:%d'):
|
||||||
try:
|
try:
|
||||||
return datetime.datetime.strptime(s[:len(fmt)], fmt)
|
return datetime.datetime.strptime(s, fmt)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
# If the string has trailing timezone info or extra fields, try the prefix.
|
||||||
|
for prefix_len, fmt in ((19, '%Y-%m-%d %H:%M:%S'), (19, '%Y:%m:%d %H:%M:%S'),
|
||||||
|
(10, '%Y-%m-%d'), (10, '%Y:%m:%d')):
|
||||||
|
try:
|
||||||
|
return datetime.datetime.strptime(s[:prefix_len], fmt)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
# EXIF/IPTC/XMP tags that carry a capture or creation date, in preference order.
|
|
||||||
_DATE_TAGS_IN_IMAGE = (
|
|
||||||
'EXIF:DateTimeOriginal',
|
|
||||||
'EXIF:CreateDate',
|
|
||||||
'XMP-xmp:CreateDate',
|
|
||||||
'IPTC:DateCreated',
|
|
||||||
'EXIF:ModifyDate',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None:
|
def _earliest_image_date(image_path: pathlib.Path) -> datetime.datetime | None:
|
||||||
"""Return the earliest datetime found in the image's embedded metadata.
|
"""Return the earliest datetime found in the image's embedded metadata.
|
||||||
|
|
||||||
|
Scans every tag returned by exiftool whose name contains 'date' or 'time'
|
||||||
|
(case-insensitive), skipping filesystem/tool pseudo-groups (File:,
|
||||||
|
ExifTool:, Composite:) and GPS tags (which are UTC and timezone-ambiguous).
|
||||||
|
This catches EXIF:ModifyDate, PNG:ModifyDate, XMP-xmp:CreateDate, etc.
|
||||||
|
without needing a format-specific allowlist.
|
||||||
|
|
||||||
Returns None if exiftool is not available or no date tags are found.
|
Returns None if exiftool is not available or no date tags are found.
|
||||||
"""
|
"""
|
||||||
if not shutil.which('exiftool'):
|
if not shutil.which('exiftool'):
|
||||||
return None
|
return None
|
||||||
existing = _read_existing_metadata(image_path)
|
existing = _read_existing_metadata(image_path)
|
||||||
dates = []
|
dates = []
|
||||||
for tag in _DATE_TAGS_IN_IMAGE:
|
for key, val in existing.items():
|
||||||
val = existing.get(tag)
|
group = key.split(':')[0] if ':' in key else ''
|
||||||
if val:
|
if group in ('File', 'ExifTool', 'Composite', 'GPS'):
|
||||||
|
continue
|
||||||
|
tag_name = key.split(':', 1)[1] if ':' in key else key
|
||||||
|
if 'date' not in tag_name.lower() and 'time' not in tag_name.lower():
|
||||||
|
continue
|
||||||
dt = _parse_datetime(str(val))
|
dt = _parse_datetime(str(val))
|
||||||
if dt:
|
if dt and 1900 < dt.year < 2100:
|
||||||
dates.append(dt)
|
dates.append(dt)
|
||||||
return min(dates) if dates else None
|
return min(dates) if dates else None
|
||||||
|
|
||||||
|
|
||||||
|
# Matches 14 consecutive digits (YYYYMMDDHHMMSS) not adjacent to another digit.
|
||||||
|
_RE_DATETIME_14 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(?!\d)')
|
||||||
|
# Matches 8 consecutive digits (YYYYMMDD) not adjacent to another digit.
|
||||||
|
_RE_DATE_8 = re.compile(r'(?<!\d)(\d{4})(\d{2})(\d{2})(?!\d)')
|
||||||
|
|
||||||
|
|
||||||
|
def _date_from_filename(name: str) -> tuple[datetime.datetime | None, bool]:
|
||||||
|
"""Try to parse a date/datetime from a filename.
|
||||||
|
|
||||||
|
Recognises patterns such as:
|
||||||
|
20120415142550-b05adf19.png → 2012-04-15 14:25:50 (has_time=True)
|
||||||
|
IMG-20120415142550.jpg → 2012-04-15 14:25:50 (has_time=True)
|
||||||
|
IMG-20120415.jpg → 2012-04-15 00:00:00 (has_time=False)
|
||||||
|
|
||||||
|
Returns (datetime, has_time). has_time=False means only a date was found;
|
||||||
|
the time component is set to midnight but should not be treated as known.
|
||||||
|
Returns (None, False) if no recognisable pattern is found.
|
||||||
|
"""
|
||||||
|
stem = pathlib.Path(name).stem
|
||||||
|
|
||||||
|
# Try 14-digit datetime first.
|
||||||
|
m = _RE_DATETIME_14.search(stem)
|
||||||
|
if m:
|
||||||
|
y, mo, d, h, mi, s = (int(x) for x in m.groups())
|
||||||
|
try:
|
||||||
|
return datetime.datetime(y, mo, d, h, mi, s), True
|
||||||
|
except ValueError:
|
||||||
|
pass # invalid date/time components — fall through
|
||||||
|
|
||||||
|
# Fall back to 8-digit date.
|
||||||
|
m = _RE_DATE_8.search(stem)
|
||||||
|
if m:
|
||||||
|
y, mo, d = (int(x) for x in m.groups())
|
||||||
|
try:
|
||||||
|
return datetime.datetime(y, mo, d, 0, 0, 0), False
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None, False
|
||||||
|
|
||||||
|
|
||||||
def _build_metadata_tags(metadata: dict, fmt: str) -> dict:
|
def _build_metadata_tags(metadata: dict, fmt: str) -> dict:
|
||||||
"""
|
"""
|
||||||
Build a dict of { 'GROUP:TagName': value } for everything we want to write.
|
Build a dict of { 'GROUP:TagName': value } for everything we want to write.
|
||||||
@@ -510,8 +559,17 @@ def export_image(
|
|||||||
tags = tags_by_image.get(image_id, [])
|
tags = tags_by_image.get(image_id, [])
|
||||||
cat_ids = cats_by_image.get(image_id, [])
|
cat_ids = cats_by_image.get(image_id, [])
|
||||||
|
|
||||||
# Collect both date sources before building the metadata dict.
|
# Collect all three date sources before building the metadata dict.
|
||||||
|
img_filename = pathlib.Path(image_row['path']).name
|
||||||
date_embedded_dt = _earliest_image_date(src_file)
|
date_embedded_dt = _earliest_image_date(src_file)
|
||||||
|
date_filename_dt, filename_has_time = _date_from_filename(img_filename)
|
||||||
|
if date_filename_dt is not None:
|
||||||
|
date_filename_str = (
|
||||||
|
date_filename_dt.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
|
||||||
|
else date_filename_dt.strftime('%Y-%m-%d')
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
date_filename_str = None
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
'title': image_row.get('name'),
|
'title': image_row.get('name'),
|
||||||
@@ -519,6 +577,7 @@ def export_image(
|
|||||||
'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None,
|
'date_created': str(image_row['date_creation']) if image_row.get('date_creation') else None,
|
||||||
'date_added': str(image_row['date_available']) if image_row.get('date_available') else None,
|
'date_added': str(image_row['date_available']) if image_row.get('date_available') else None,
|
||||||
'date_embedded': str(date_embedded_dt) if date_embedded_dt else None,
|
'date_embedded': str(date_embedded_dt) if date_embedded_dt else None,
|
||||||
|
'date_filename': date_filename_str,
|
||||||
'description': image_row.get('comment'),
|
'description': image_row.get('comment'),
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'albums': [category_display_path(cid, categories) for cid in cat_ids],
|
'albums': [category_display_path(cid, categories) for cid in cat_ids],
|
||||||
@@ -531,11 +590,22 @@ def export_image(
|
|||||||
'original_path': image_row['path'],
|
'original_path': image_row['path'],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Print both date sources so the user can see any discrepancy.
|
# Print all three date sources so the user can see any discrepancy.
|
||||||
piwigo_str = metadata['date_created'] or '—'
|
piwigo_str = metadata['date_created'] or '—'
|
||||||
embedded_str = metadata['date_embedded'] or '—'
|
embedded_str = metadata['date_embedded'] or '—'
|
||||||
filename_str = pathlib.Path(image_row['path']).name
|
fn_date_str = metadata['date_filename'] or '—'
|
||||||
print(f' {filename_str} piwigo: {piwigo_str} embedded: {embedded_str}')
|
print(f' {img_filename} piwigo: {piwigo_str} embedded: {embedded_str} filename: {fn_date_str}')
|
||||||
|
|
||||||
|
# Best date for file mtime: piwigo > embedded > filename.
|
||||||
|
mtime_ts = None
|
||||||
|
for _ds in (metadata.get('date_created'), metadata.get('date_embedded'), metadata.get('date_filename')):
|
||||||
|
if _ds:
|
||||||
|
_dt = _parse_datetime(_ds)
|
||||||
|
if _dt:
|
||||||
|
mtime_ts = _dt.timestamp()
|
||||||
|
break
|
||||||
|
if mtime_ts is None:
|
||||||
|
print(f' NOTE: {img_filename}: no date found; file mtime will not be set', file=sys.stderr)
|
||||||
|
|
||||||
dest_dirs = (
|
dest_dirs = (
|
||||||
[output_dir / category_fs_path(cid, categories) for cid in cat_ids]
|
[output_dir / category_fs_path(cid, categories) for cid in cat_ids]
|
||||||
@@ -543,7 +613,7 @@ def export_image(
|
|||||||
else [output_dir / '_unsorted']
|
else [output_dir / '_unsorted']
|
||||||
)
|
)
|
||||||
|
|
||||||
filename = pathlib.Path(image_row['path']).name
|
filename = img_filename
|
||||||
stem = pathlib.Path(filename).stem
|
stem = pathlib.Path(filename).stem
|
||||||
written = 0
|
written = 0
|
||||||
|
|
||||||
@@ -571,11 +641,20 @@ def export_image(
|
|||||||
print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}')
|
print(f' SKIP (both files exist, use --overwrite to replace): {dest_image}')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Copy image file
|
# Copy image file.
|
||||||
shutil.copy2(str(src_file), str(dest_image))
|
shutil.copy2(str(src_file), str(dest_image))
|
||||||
written += 1
|
written += 1
|
||||||
if metadata_format:
|
if metadata_format:
|
||||||
embed_metadata(dest_image, metadata, metadata_format, never_overwrite_metadata)
|
# If no Piwigo date, fall back to filename-derived date so that
|
||||||
|
# missing EXIF/IPTC/XMP date tags are filled in from the filename.
|
||||||
|
meta_for_embed = metadata
|
||||||
|
if not meta_for_embed.get('date_created') and meta_for_embed.get('date_filename'):
|
||||||
|
meta_for_embed = {**metadata, 'date_created': metadata['date_filename']}
|
||||||
|
embed_metadata(dest_image, meta_for_embed, metadata_format, never_overwrite_metadata)
|
||||||
|
|
||||||
|
# Set mtime after any exiftool call so exiftool doesn't reset it.
|
||||||
|
if mtime_ts is not None:
|
||||||
|
os.utime(str(dest_image), (mtime_ts, mtime_ts))
|
||||||
|
|
||||||
# Write/refresh the sidecar so it stays in sync with the DB.
|
# Write/refresh the sidecar so it stays in sync with the DB.
|
||||||
dest_sidecar.write_text(
|
dest_sidecar.write_text(
|
||||||
@@ -683,63 +762,98 @@ def cmd_set_dates(args):
|
|||||||
print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr)
|
print(f'WARNING: no image file found for {sidecar.name}', file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Parse the two candidate dates from the sidecar.
|
# Parse all three candidate dates from the sidecar.
|
||||||
dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None
|
dt_piwigo = _parse_datetime(data['date_created']) if data.get('date_created') else None
|
||||||
dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None
|
dt_embedded = _parse_datetime(data['date_embedded']) if data.get('date_embedded') else None
|
||||||
|
fn_date_str = data.get('date_filename')
|
||||||
|
dt_filename = _parse_datetime(fn_date_str) if fn_date_str else None
|
||||||
|
# has_time is False when the sidecar stored only a date (no space → no time part).
|
||||||
|
filename_has_time = bool(fn_date_str and ' ' in fn_date_str)
|
||||||
|
|
||||||
|
chosen = None
|
||||||
|
chosen_source = None
|
||||||
|
|
||||||
# Determine which date to apply.
|
|
||||||
if args.use == 'piwigo':
|
if args.use == 'piwigo':
|
||||||
chosen = dt_piwigo
|
if dt_piwigo is None:
|
||||||
if chosen is None:
|
|
||||||
print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.')
|
print(f' NOTE: {image_path.name}: no piwigo date in sidecar; skipping.')
|
||||||
noted += 1
|
noted += 1
|
||||||
continue
|
continue
|
||||||
|
chosen, chosen_source = dt_piwigo, 'piwigo'
|
||||||
|
|
||||||
elif args.use == 'embedded':
|
elif args.use == 'embedded':
|
||||||
chosen = dt_embedded
|
if dt_embedded is None:
|
||||||
if chosen is None:
|
|
||||||
print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.')
|
print(f' NOTE: {image_path.name}: no embedded date in sidecar; skipping.')
|
||||||
noted += 1
|
noted += 1
|
||||||
continue
|
continue
|
||||||
|
chosen, chosen_source = dt_embedded, 'embedded'
|
||||||
|
|
||||||
|
elif args.use == 'filename':
|
||||||
|
if dt_filename is None:
|
||||||
|
print(f' NOTE: {image_path.name}: no filename date in sidecar; skipping.')
|
||||||
|
noted += 1
|
||||||
|
continue
|
||||||
|
chosen, chosen_source = dt_filename, 'filename'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Interactive mode.
|
# Interactive mode: collect available (source, datetime) pairs.
|
||||||
if dt_piwigo is None and dt_embedded is None:
|
options = []
|
||||||
|
if dt_piwigo: options.append(('piwigo', dt_piwigo))
|
||||||
|
if dt_embedded: options.append(('embedded', dt_embedded))
|
||||||
|
if dt_filename: options.append(('filename', dt_filename))
|
||||||
|
|
||||||
|
if not options:
|
||||||
print(f' NOTE: {image_path.name}: no dates available; skipping.')
|
print(f' NOTE: {image_path.name}: no dates available; skipping.')
|
||||||
noted += 1
|
noted += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if dt_piwigo == dt_embedded or (dt_piwigo and dt_embedded is None):
|
# If all available dates are the same, or only one source, apply silently.
|
||||||
chosen = dt_piwigo
|
unique_dts = list(dict.fromkeys(o[1] for o in options))
|
||||||
elif dt_embedded and dt_piwigo is None:
|
if len(unique_dts) == 1:
|
||||||
chosen = dt_embedded
|
chosen, chosen_source = options[0][1], options[0][0]
|
||||||
else:
|
else:
|
||||||
# Both present and different — ask the user.
|
# Multiple different dates — ask the user.
|
||||||
print(f'\n{image_path.name}')
|
print(f'\n{image_path.name}')
|
||||||
print(f' [1] piwigo : {dt_piwigo}')
|
for i, (src, dt) in enumerate(options, 1):
|
||||||
print(f' [2] embedded : {dt_embedded}')
|
print(f' [{i}] {src:<8} : {dt}')
|
||||||
print(f' [s] skip')
|
print(f' [s] skip')
|
||||||
while True:
|
while True:
|
||||||
choice = input('Choice [1/2/s]: ').strip().lower()
|
choice = input(f'Choice [1-{len(options)}/s]: ').strip().lower()
|
||||||
if choice in ('s', 'skip', ''):
|
if choice in ('s', 'skip', ''):
|
||||||
chosen = None
|
|
||||||
break
|
break
|
||||||
if choice == '1':
|
try:
|
||||||
chosen = dt_piwigo
|
idx = int(choice) - 1
|
||||||
|
if 0 <= idx < len(options):
|
||||||
|
chosen_source, chosen = options[idx]
|
||||||
break
|
break
|
||||||
if choice == '2':
|
except ValueError:
|
||||||
chosen = dt_embedded
|
pass
|
||||||
break
|
print(f' Please enter a number between 1 and {len(options)}, or s.')
|
||||||
print(' Please enter 1, 2, or s.')
|
|
||||||
|
|
||||||
if chosen is None:
|
if chosen is None:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Set file mtime.
|
||||||
ts = chosen.timestamp()
|
ts = chosen.timestamp()
|
||||||
os.utime(image_path, (ts, ts))
|
os.utime(image_path, (ts, ts))
|
||||||
applied += 1
|
applied += 1
|
||||||
|
|
||||||
|
# For filename-derived dates, also embed the date into any missing
|
||||||
|
# EXIF/IPTC/XMP tags so the image carries its own date going forward.
|
||||||
|
if chosen_source == 'filename' and shutil.which('exiftool'):
|
||||||
|
date_str = (
|
||||||
|
chosen.strftime('%Y-%m-%d %H:%M:%S') if filename_has_time
|
||||||
|
else chosen.strftime('%Y-%m-%d')
|
||||||
|
)
|
||||||
|
embed_metadata(
|
||||||
|
image_path,
|
||||||
|
{'date_created': date_str},
|
||||||
|
['exif', 'iptc', 'xmp'],
|
||||||
|
never_overwrite=True,
|
||||||
|
)
|
||||||
|
# exiftool rewrites the file and resets its mtime — restore it.
|
||||||
|
os.utime(image_path, (ts, ts))
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.'
|
f'\nDone. {applied} mtime(s) set, {skipped} skipped, {noted} with no date.'
|
||||||
)
|
)
|
||||||
@@ -808,8 +922,8 @@ def main():
|
|||||||
help='Directory containing the exported files and JSON sidecars.',
|
help='Directory containing the exported files and JSON sidecars.',
|
||||||
)
|
)
|
||||||
dp.add_argument(
|
dp.add_argument(
|
||||||
'--use', choices=['piwigo', 'embedded'], metavar='SOURCE',
|
'--use', choices=['piwigo', 'embedded', 'filename'], metavar='SOURCE',
|
||||||
help='Auto-select a date source (piwigo or embedded) instead of '
|
help='Auto-select a date source (piwigo, embedded, or filename) instead of '
|
||||||
'prompting for each image.',
|
'prompting for each image.',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user