457 lines
17 KiB
Python
457 lines
17 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# -*- coding: utf-8 -*-
|
||
|
"""Quick and Dirty Tab Groups Dumper for Firefox
|
||
|
--snip--
|
||
|
|
||
|
Copyright (C) 2014 Stephan Sokolow
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||
|
a copy of this software and associated documentation files (the "Software"),
|
||
|
to deal in the Software without restriction, including without limitation
|
||
|
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
|
and/or sell copies of the Software, and to permit persons to whom the
|
||
|
Software is furnished to do so, subject to the following conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be included
|
||
|
in all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||
|
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
||
|
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
"""
|
||
|
|
||
|
__appname__ = "Quick And Dirty Tab Groups Dumper for Firefox"
|
||
|
__author__ = "Stephan Sokolow (deitarion/SSokolow)"
|
||
|
__version__ = "0.2"
|
||
|
__license__ = "MIT"
|
||
|
|
||
|
import logging
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
import json, os, re
|
||
|
|
||
|
#{{{ Data Types for Schema
|
||
|
bad_anchor_char_re = re.compile('[^A-Za-z0-9-_:.]+')
|
||
|
url_re = re.compile("""
|
||
|
^data:image/png;base64,|
|
||
|
^(chrome|file|https?)://|
|
||
|
^about:(blank|home|newtab)$|
|
||
|
^javascript:
|
||
|
""", re.VERBOSE)
|
||
|
is_nonempty_string = lambda x: isinstance(x, unicode) and x
|
||
|
|
||
|
is_url = lambda x: isinstance(x, unicode) and url_re.match(x)
|
||
|
is_nullable_url = lambda x: x is None or is_url(x)
|
||
|
|
||
|
is_natural_int = lambda x: isinstance(x, int) and x >= 0
|
||
|
is_positive_int = lambda x: is_natural_int(x) and x > 0
|
||
|
|
||
|
is_bool_string = lambda x: isinstance(x, unicode) and x in ('true', 'false')
|
||
|
def is_int_string(x):
|
||
|
"""Return true if the input is a string containing an integer"""
|
||
|
if not is_nonempty_string(x):
|
||
|
return False
|
||
|
try:
|
||
|
int(x)
|
||
|
return True
|
||
|
except ValueError:
|
||
|
return False
|
||
|
is_int_ish = lambda x: is_int_string(x) or is_natural_int(x)
|
||
|
|
||
|
def is_coord_pair(x):
|
||
|
"""Return true if the input is two comma-separated integers"""
|
||
|
return (is_nonempty_string(x) and
|
||
|
len(x.split(',')) == 2 and
|
||
|
all(is_int_string(y) for y in x.split(',')))
|
||
|
|
||
|
#}}}
|
||
|
#{{{ Schema
|
||
|
|
||
|
def apply_subschema(prefix, subschema):
|
||
|
"""Generate a new schema dict with prefixed paths"""
|
||
|
return {prefix + x: y for x, y in subschema.items()}
|
||
|
|
||
|
TAB_ENTRY_SCHEMA = {
|
||
|
'ID/': is_natural_int,
|
||
|
'docIdentifier/': is_natural_int,
|
||
|
'docshellID/': is_natural_int,
|
||
|
'owner_b64/': is_nonempty_string,
|
||
|
'referrer/': is_url,
|
||
|
'scroll/': is_nonempty_string,
|
||
|
'subframe/': lambda x: isinstance(x, bool),
|
||
|
'title/': is_nonempty_string,
|
||
|
'url/': is_url,
|
||
|
|
||
|
}
|
||
|
|
||
|
TABS_SCHEMA = {
|
||
|
'tabs/': lambda x: isinstance(x, list) and x,
|
||
|
'tabs/list/': lambda x: isinstance(x, dict) and x,
|
||
|
'tabs/list/attributes/': lambda x: isinstance(x, dict),
|
||
|
'tabs/list/attributes/image/': is_url,
|
||
|
'tabs/list/entries/': lambda x: isinstance(x, list) and x,
|
||
|
'tabs/list/entries/list/': lambda x: isinstance(x, dict) and x,
|
||
|
'tabs/list/entries/list/ID/': is_natural_int,
|
||
|
'tabs/list/entries/list/children/': lambda x: isinstance(x, list),
|
||
|
'tabs/list/entries/list/children/list/': lambda x: isinstance(x, dict),
|
||
|
'tabs/list/entries/list/url/': is_url,
|
||
|
'tabs/list/extData/': lambda x: isinstance(x, dict) and x,
|
||
|
'tabs/list/extData/tabview-tab/': is_nonempty_string,
|
||
|
'tabs/list/extData/weaveLastUsed/': is_nonempty_string,
|
||
|
'tabs/list/hidden/': lambda x: isinstance(x, bool),
|
||
|
'tabs/list/image/': is_nullable_url,
|
||
|
'tabs/list/index/': is_positive_int,
|
||
|
'tabs/list/lastAccessed/': is_natural_int,
|
||
|
'tabs/list/pinned/': lambda x: isinstance(x, bool),
|
||
|
'tabs/list/scroll/': lambda x: isinstance(x, dict),
|
||
|
'tabs/list/scroll/scroll/': is_nonempty_string,
|
||
|
}
|
||
|
TABS_SCHEMA.update(apply_subschema(
|
||
|
'tabs/list/entries/list/', TAB_ENTRY_SCHEMA))
|
||
|
TABS_SCHEMA.update(apply_subschema(
|
||
|
'tabs/list/entries/list/children/list/', TAB_ENTRY_SCHEMA))
|
||
|
TABS_SCHEMA.update(apply_subschema(
|
||
|
'tabs/list/entries/list/children/list/children/list/', TAB_ENTRY_SCHEMA))
|
||
|
|
||
|
CLOSED_TABS_SCHEMA = {
|
||
|
'_closedTabs/': lambda x: isinstance(x, list),
|
||
|
'_closedTabs/list/': lambda x: isinstance(x, dict),
|
||
|
'_closedTabs/list/closedAt/': is_natural_int,
|
||
|
'_closedTabs/list/image/': is_url,
|
||
|
'_closedTabs/list/pos/': is_positive_int,
|
||
|
'_closedTabs/list/state/': lambda x: isinstance(x, dict),
|
||
|
'_closedTabs/list/state/attributes/': lambda x: isinstance(x, dict),
|
||
|
'_closedTabs/list/state/entries/': lambda x: isinstance(x, list),
|
||
|
'_closedTabs/list/state/entries/list/': lambda x: isinstance(x, dict),
|
||
|
'_closedTabs/list/state/extData/': lambda x: isinstance(x, dict),
|
||
|
'_closedTabs/list/state/extData/tabview-tab/':
|
||
|
lambda x: isinstance(x, unicode),
|
||
|
'_closedTabs/list/state/hidden/': lambda x: isinstance(x, bool),
|
||
|
'_closedTabs/list/state/image/': is_url,
|
||
|
'_closedTabs/list/state/index/': is_positive_int,
|
||
|
'_closedTabs/list/state/lastAccessed/': is_natural_int,
|
||
|
'_closedTabs/list/state/scroll/': lambda x: isinstance(x, dict),
|
||
|
'_closedTabs/list/state/scroll/scroll/': is_coord_pair,
|
||
|
'_closedTabs/list/title/': lambda x: isinstance(x, unicode),
|
||
|
}
|
||
|
CLOSED_TABS_SCHEMA.update(apply_subschema(
|
||
|
'_closedTabs/list/state/entries/list/', TAB_ENTRY_SCHEMA))
|
||
|
|
||
|
WINDOW_SCHEMA = {
|
||
|
'extData/': lambda x: isinstance(x, dict),
|
||
|
'extData/tabview-group/': is_nonempty_string,
|
||
|
'extData/tabview-groups/': is_nonempty_string,
|
||
|
'extData/tabview-ui/': is_nonempty_string,
|
||
|
'height/': is_int_ish,
|
||
|
'screenX/': is_int_ish,
|
||
|
'screenY/': is_int_ish,
|
||
|
'selected/': is_positive_int,
|
||
|
'sizemode/': lambda x: isinstance(x, unicode),
|
||
|
'title/': is_nonempty_string,
|
||
|
'width/': is_int_ish,
|
||
|
}
|
||
|
|
||
|
#TODO: Finish deduplicating this schema definition
|
||
|
SCHEMA = {
|
||
|
'': lambda x: isinstance(x, dict) and x,
|
||
|
'_closedWindows/': lambda x: isinstance(x, list),
|
||
|
'_closedWindows/list/': lambda x: isinstance(x, dict),
|
||
|
'_closedWindows/list/closedAt/': lambda x: isinstance(x, int),
|
||
|
'_closedWindows/list/extData/__SessionManagerWindowId/':
|
||
|
lambda x: isinstance(x, unicode),
|
||
|
'global/': lambda x: isinstance(x, dict),
|
||
|
'scratchpads/': lambda x: isinstance(x, list),
|
||
|
'selectedWindow/': is_natural_int,
|
||
|
'session/': lambda x: isinstance(x, dict),
|
||
|
'session/lastUpdate/': is_natural_int,
|
||
|
'session/recentCrashes/': is_natural_int,
|
||
|
'session/startTime/': is_natural_int,
|
||
|
'windows/': lambda x: isinstance(x, list) and x,
|
||
|
'windows/list/': lambda x: isinstance(x, dict) and x,
|
||
|
'windows/list/busy/': lambda x: isinstance(x, bool),
|
||
|
'windows/list/extData/': lambda x: isinstance(x, dict) and x,
|
||
|
'windows/list/extData/__SessionManagerWindowId/': is_nonempty_string,
|
||
|
'windows/list/extData/tabview-last-session-group-name/':
|
||
|
is_nonempty_string,
|
||
|
'windows/list/extData/tabview-visibility/': is_bool_string,
|
||
|
|
||
|
'windows/list/tabs/list/disallow/': is_nonempty_string,
|
||
|
'windows/list/tabs/list/entries/list/cacheKey/': is_positive_int,
|
||
|
'windows/list/tabs/list/entries/list/children/list/children/':
|
||
|
lambda x: isinstance(x, list),
|
||
|
'windows/list/tabs/list/entries/list/children/list/children/list/':
|
||
|
lambda x: isinstance(x, dict),
|
||
|
'windows/list/tabs/list/entries/list/structuredCloneState/':
|
||
|
is_nonempty_string,
|
||
|
'windows/list/tabs/list/entries/list/structuredCloneVersion/':
|
||
|
is_positive_int,
|
||
|
'windows/list/tabs/list/extData/': lambda x: isinstance(x, dict),
|
||
|
'windows/list/tabs/list/extData/tabview-tab/': is_nonempty_string,
|
||
|
'windows/list/tabs/list/extData/weaveLastUsed/': is_nonempty_string,
|
||
|
'windows/list/tabs/list/lastAccessed/': is_natural_int,
|
||
|
'windows/list/tabs/list/pageStyle/':
|
||
|
lambda x: isinstance(x, (unicode, dict)),
|
||
|
'windows/list/tabs/list/pageStyle/pageStyle/': is_nonempty_string,
|
||
|
'windows/list/tabs/list/userTypedClear/': is_natural_int,
|
||
|
'windows/list/tabs/list/userTypedValue/': is_nonempty_string,
|
||
|
}
|
||
|
SCHEMA.update(apply_subschema('_closedWindows/list/', CLOSED_TABS_SCHEMA))
|
||
|
SCHEMA.update(apply_subschema('_closedWindows/list/', TABS_SCHEMA))
|
||
|
SCHEMA.update(apply_subschema('_closedWindows/list/', WINDOW_SCHEMA))
|
||
|
SCHEMA.update(apply_subschema('windows/list/', CLOSED_TABS_SCHEMA))
|
||
|
SCHEMA.update(apply_subschema('windows/list/', TABS_SCHEMA))
|
||
|
SCHEMA.update(apply_subschema('windows/list/', WINDOW_SCHEMA))
|
||
|
|
||
|
#}}}
|
||
|
#{{{ Schema-related routines
|
||
|
|
||
|
def make_schema_line(path, data):
|
||
|
"""Generate a draft schema line to be copy-pasted"""
|
||
|
tmp = "'%s': lambda x: isinstance(x, %s)," % (path, type(data).__name__)
|
||
|
return '%-100s # %s' % (tmp, repr(data)[:80])
|
||
|
|
||
|
def check_schema(data, dom_path='', schema=None, make_schema=False):
|
||
|
"""Recursive exploration for JSON via schemas"""
|
||
|
schema = schema or {}
|
||
|
result = {}
|
||
|
|
||
|
def fail(msg):
|
||
|
"""Unified failure message"""
|
||
|
dump = result[dom_path] = make_schema_line(dom_path, data)
|
||
|
if isinstance(data, (list, dict)):
|
||
|
raise ValueError("%s: %s @ %s\n %s" % (
|
||
|
msg, type(data), dom_path, dump))
|
||
|
else:
|
||
|
raise ValueError("%s: %s(%s) @ %s\n %s" % (
|
||
|
msg, type(data), data, dom_path, dump))
|
||
|
|
||
|
if dom_path not in schema:
|
||
|
if make_schema:
|
||
|
result[dom_path] = make_schema_line(dom_path, data)
|
||
|
else:
|
||
|
fail("Unexpected element")
|
||
|
|
||
|
if make_schema or schema[dom_path](data):
|
||
|
if isinstance(data, list):
|
||
|
for x in data:
|
||
|
result.update(check_schema(x, dom_path + 'list/',
|
||
|
schema=schema, make_schema=make_schema))
|
||
|
elif isinstance(data, dict):
|
||
|
for x in data:
|
||
|
result.update(check_schema(data[x], '%s%s/' % (dom_path, x),
|
||
|
schema=schema, make_schema=make_schema))
|
||
|
elif not (data is None or isinstance(data, (int, float, unicode))):
|
||
|
fail("Unexpected data type")
|
||
|
else:
|
||
|
fail("Element failed schema")
|
||
|
|
||
|
return result
|
||
|
|
||
|
#}}}
|
||
|
|
||
|
def _collect_tab_metadata(tab):
|
||
|
"""Restructure the tab metadata into a single dict"""
|
||
|
grp_id = None
|
||
|
grp_data = tab.get('extData', {}).get('tabview-tab', {})
|
||
|
if grp_data:
|
||
|
grp_data = json.loads(grp_data)
|
||
|
if grp_data:
|
||
|
grp_id = grp_data.get('groupID', None)
|
||
|
del grp_data
|
||
|
|
||
|
empty = {
|
||
|
'title' : 'New tab',
|
||
|
'url' : 'about:newtab',
|
||
|
}
|
||
|
if len(tab['entries']) == 0:
|
||
|
content = empty
|
||
|
else:
|
||
|
content = tab['entries'][-1] # -1 is most recent
|
||
|
|
||
|
return {
|
||
|
'index': tab.get('index'),
|
||
|
'group': grp_id,
|
||
|
'title': content.get('title', content['url']),
|
||
|
'url': content['url'],
|
||
|
'favicon': tab.get('image', None),
|
||
|
'pinned': tab.get('pinned', False)
|
||
|
}
|
||
|
|
||
|
def dump_tab_groups(data):
|
||
|
"""Load a JSON file from a path and extract the relevant data structure."""
|
||
|
windows = []
|
||
|
for window in data['windows']:
|
||
|
grp_names = json.loads(window.get('extData', {}).get(
|
||
|
'tabview-group', '{}'))
|
||
|
grp_names = {int(x): grp_names[x]['title'] for x in grp_names.keys()}
|
||
|
|
||
|
tabs = [_collect_tab_metadata(tab) for tab in window['tabs']]
|
||
|
|
||
|
# Group the tabs by group ID and then replace the IDs with the
|
||
|
# group names without risking naming collisions
|
||
|
groups = {}
|
||
|
[groups.setdefault(tab['group'], []).append(tab) for tab in tabs]
|
||
|
groups = [(grp_names.get(k, None), v) for k, v in groups.items()]
|
||
|
#groups.sort() # TODO: Sort case-insensitively
|
||
|
|
||
|
windows.append(groups)
|
||
|
return windows
|
||
|
|
||
|
def dump_to_html(dump, for_tiddlywiki=False):
|
||
|
"""Convert `dump_tab_groups` output to HTML
|
||
|
|
||
|
Specifically, an outline represented in HTML via the XOXO microformat
|
||
|
so that it's both human- and machine-readable:
|
||
|
http://www.microformats.org/wiki/xoxo
|
||
|
|
||
|
@todo: Clean up this code and reuse anything possible to produce an XBEL
|
||
|
output option:
|
||
|
http://en.wikipedia.org/wiki/XBEL
|
||
|
"""
|
||
|
from lxml import etree
|
||
|
from lxml.builder import E
|
||
|
|
||
|
def attr_class(*args):
|
||
|
"""workaround for `class` being a reserved word"""
|
||
|
return {"class": ' '.join(args)}
|
||
|
|
||
|
h3_prefix = lambda: (E.span('!!!', attr_class('copy-only'))
|
||
|
if for_tiddlywiki else '')
|
||
|
|
||
|
noicon = PLACEHOLDER_FAVICON_URI
|
||
|
|
||
|
tab_count, windows = 0, []
|
||
|
for pos, window in enumerate(dump):
|
||
|
k_toc = []
|
||
|
e_groups = []
|
||
|
|
||
|
for group, tabs in window:
|
||
|
if not group and all(x.get('pinned') for x in tabs):
|
||
|
grp_name = "<Pinned Tabs>"
|
||
|
else:
|
||
|
grp_name = group or '<Unnamed Group>'
|
||
|
grp_key = (grp_name, bad_anchor_char_re.sub('_', grp_name).lower())
|
||
|
|
||
|
grp_key_actual, idx = grp_key, 0
|
||
|
while grp_key_actual in k_toc:
|
||
|
idx += 1
|
||
|
grp_key_actual = '%s%s' % (grp_key, idx)
|
||
|
k_toc.append(grp_key_actual)
|
||
|
|
||
|
tab_count += len(tabs)
|
||
|
if for_tiddlywiki:
|
||
|
lines = []
|
||
|
for x in tabs:
|
||
|
lines.extend(['* [[', E.b(x['title']), '|',
|
||
|
E.a(x['url'], href=x['url']),
|
||
|
']]\n'])
|
||
|
e_tabs = E.pre(*lines)
|
||
|
else:
|
||
|
e_tabs = E.ul(*[E.li(E.a(E.img(src=x['favicon'] or noicon,
|
||
|
alt='', width='16', height='16'),
|
||
|
x['title'], href=x['url'])) for x in tabs])
|
||
|
|
||
|
e_groups.append(E.li(
|
||
|
E.h3(h3_prefix(), grp_name, id=grp_key_actual[1]),
|
||
|
e_tabs
|
||
|
))
|
||
|
|
||
|
windows.append(E.li(E.h2("Window %d" % pos), *e_groups))
|
||
|
|
||
|
e_toc = E.ul(*[E.li(E.a(x[0], href='#%s' % x[1])) for x in k_toc])
|
||
|
|
||
|
title_str = '%s (%s tabs total)' % (HTML_EXPORT_TITLE, tab_count)
|
||
|
return etree.tostring(E.html(
|
||
|
E.head(
|
||
|
E.title(title_str),
|
||
|
E.style(HTML_EXPORT_STYLE)),
|
||
|
E.body(
|
||
|
E.h1(title_str), e_toc,
|
||
|
E.ul(attr_class('xoxo'), *windows))))
|
||
|
|
||
|
|
||
|
def dump_to_text(dump):
|
||
|
"""Convert `dump_tab_groups` output to text
|
||
|
"""
|
||
|
tab_count, windows = 0, []
|
||
|
text = ''
|
||
|
for pos, window in enumerate(dump):
|
||
|
text += "Window {0}\n".format(pos)
|
||
|
|
||
|
for group, tabs in window:
|
||
|
if not group and all(x.get('pinned') for x in tabs):
|
||
|
grp_name = "<Pinned Tabs>"
|
||
|
else:
|
||
|
grp_name = group or '<Unnamed Group>'
|
||
|
text += "{0}\n".format(grp_name)
|
||
|
tab_count += len(tabs)
|
||
|
for x in tabs:
|
||
|
text += "\t{1}\n{0}\n".format(x['url'], x['title'])
|
||
|
text += "\n"
|
||
|
|
||
|
text += '({0} tabs total)'.format(tab_count)
|
||
|
|
||
|
return text
|
||
|
|
||
|
# From https://gist.github.com/Tblue/62ff47bef7f894e92ed5
|
||
|
def decompress(file_obj):
|
||
|
import lz4.block
|
||
|
if file_obj.read(8) != b"mozLz40\0":
|
||
|
raise "Invalid magic number"
|
||
|
return lz4.block.decompress(file_obj.read())
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
from optparse import OptionParser
|
||
|
parser = OptionParser(version="%%prog v%s" % __version__,
|
||
|
usage="%prog [opts] [path]",
|
||
|
description=__doc__.replace('\r\n', '\n').split('\n--snip--\n')[0])
|
||
|
parser.add_option('-v', '--verbose', action="count", dest="verbose",
|
||
|
default=2, help="Increase the verbosity. Use twice for extra effect")
|
||
|
parser.add_option('-q', '--quiet', action="count", dest="quiet",
|
||
|
default=0, help="Decrease the verbosity. Use twice for extra effect")
|
||
|
parser.add_option('--check-schema', action="store_true",
|
||
|
dest="check_schema", default=False, help="Check JSON against the "
|
||
|
"embedded sessionstore schema")
|
||
|
parser.add_option('--make-schema', action="store_true", dest="make_schema",
|
||
|
default=False, help="Auto-generate a first draft schema from the JSON")
|
||
|
|
||
|
# Allow pre-formatted descriptions
|
||
|
parser.formatter.format_description = lambda description: description
|
||
|
|
||
|
opts, args = parser.parse_args()
|
||
|
|
||
|
# Set up clean logging to stderr
|
||
|
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING,
|
||
|
logging.INFO, logging.DEBUG]
|
||
|
opts.verbose = min(opts.verbose - opts.quiet, len(log_levels) - 1)
|
||
|
opts.verbose = max(opts.verbose, 0)
|
||
|
logging.basicConfig(level=log_levels[opts.verbose],
|
||
|
format='%(levelname)s: %(message)s')
|
||
|
|
||
|
if not args:
|
||
|
import glob
|
||
|
args = glob.glob(os.path.expanduser(
|
||
|
'~/.mozilla/firefox/*/sessionstore-backups/recovery.jsonlz4'))
|
||
|
|
||
|
results = []
|
||
|
for arg in args:
|
||
|
with open(arg, 'rb') as fobj:
|
||
|
data = decompress(fobj)
|
||
|
data = json.loads(data)
|
||
|
|
||
|
if opts.check_schema:
|
||
|
result = check_schema(data, schema=SCHEMA)
|
||
|
elif opts.make_schema:
|
||
|
result = check_schema(data, schema=SCHEMA, make_schema=True)
|
||
|
print("---------------")
|
||
|
print('\n'.join(sorted(result.values())))
|
||
|
else:
|
||
|
results += dump_tab_groups(data)
|
||
|
text = dump_to_text(results)
|
||
|
print(text)
|