From 1ed0df0fac581daa8f62f2c6287b30106ba1bf3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kucha=C5=99?= <honza.kuchar@grifart.cz> Date: Tue, 7 Mar 2017 04:36:47 -0800 Subject: [PATCH] stripped down to parser; only available output format is now JSON --- btrfs-snapshots-diff.py | 310 ++++++++++++++++++---------------------- tests.sh | 1 + 2 files changed, 141 insertions(+), 170 deletions(-) diff --git a/btrfs-snapshots-diff.py b/btrfs-snapshots-diff.py index ef2560a..14f7093 100755 --- a/btrfs-snapshots-diff.py +++ b/btrfs-snapshots-diff.py @@ -30,15 +30,12 @@ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - - +import json from struct import unpack -from sys import argv, exit, stdin, stderr, exc_info +from sys import exit, stderr, exc_info printerr = stderr.write import argparse import subprocess -import time -from collections import OrderedDict from os import unlink @@ -129,10 +126,9 @@ class BtrfsStream(object): # Decode commands + attributes idx = 17 count = 0 + # List of commands commands = [] - # modified[path] = [(command, cmd_ref), ...] - modified = OrderedDict() while True: @@ -143,36 +139,43 @@ class BtrfsStream(object): except: raise ValueError('Unkown command %d' % cmd) + commandShort = command[13:].lower() + if command == 'BTRFS_SEND_C_RENAME': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, path_to = self._tlv_get_string( 'BTRFS_SEND_A_PATH_TO', idx2) - # Add bogus renamed_from command on destination to keep track - # of what happened - modified.setdefault(path_to, []).append( - ('renamed_from', count)) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), path, path_to)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'path_to': path_to + }) elif command == 'BTRFS_SEND_C_SYMLINK': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) # XXX BTRFS_SEND_A_PATH_LINK in send-self.stream.c ??? idx2, ino = self._tlv_get_string('BTRFS_SEND_A_INO', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), ino)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'inode': ino + }) elif command == 'BTRFS_SEND_C_LINK': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, path_link = self._tlv_get_string( 'BTRFS_SEND_A_PATH_LINK', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), path_link)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'path_link': path_link + }) elif command == 'BTRFS_SEND_C_UTIMES': idx2, path = self._tlv_get_string( @@ -180,24 +183,34 @@ class BtrfsStream(object): idx2, atime = self._tlv_get_timespec('BTRFS_SEND_A_ATIME', idx2) idx2, mtime = self._tlv_get_timespec('BTRFS_SEND_A_MTIME', idx2) idx2, ctime = self._tlv_get_timespec('BTRFS_SEND_A_CTIME', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), atime, mtime, ctime)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'time_access': atime, + 'time_modified': mtime, + 'time_created': ctime + }) elif command in 'BTRFS_SEND_C_MKFILE BTRFS_SEND_C_MKDIR BTRFS_SEND_C_MKFIFO BTRFS_SEND_C_MKSOCK BTRFS_SEND_C_UNLINK BTRFS_SEND_C_RMDIR '.split(): idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower())) + + commands.append({ + 'command': commandShort, + 'path': path + }) elif command == 'BTRFS_SEND_C_TRUNCATE': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, size = self._tlv_get_u64('BTRFS_SEND_A_SIZE', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), size)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'to_size': size + }) elif command == 'BTRFS_SEND_C_SNAPSHOT': idx2, path = self._tlv_get_string( @@ -209,10 +222,15 @@ class BtrfsStream(object): 'BTRFS_SEND_A_CLONE_UUID', idx2) idx2, clone_ctransid = self._tlv_get_u64( 'BTRFS_SEND_A_CLONE_CTRANSID', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), uuid, ctransid, - clone_uuid, clone_ctransid)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'uuid': uuid, + 'ctransid': ctransid, + 'clone_uuid': clone_uuid, + 'clone_ctransid': clone_ctransid + }) elif command == 'BTRFS_SEND_C_SUBVOL': idx2, path = self._tlv_get_string( @@ -220,18 +238,26 @@ class BtrfsStream(object): idx2, uuid = self._tlv_get_uuid('BTRFS_SEND_A_UUID', idx2) idx2, ctransid = self._tlv_get_u64( 'BTRFS_SEND_A_CTRANSID', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), uuid, ctransid)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'uuid': uuid, + 'ctrans_id': ctransid + }) elif command == 'BTRFS_SEND_C_MKNOD': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, mode = self._tlv_get_u64('BTRFS_SEND_A_MODE', idx2) idx2, rdev = self._tlv_get_u64('BTRFS_SEND_A_RDEV', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), mode, rdev)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'mode': mode, + 'rdev': rdev + }) elif command == 'BTRFS_SEND_C_SET_XATTR': idx2, path = self._tlv_get_string( @@ -240,18 +266,25 @@ class BtrfsStream(object): 'BTRFS_SEND_A_XATTR_NAME', idx2) idx2, xattr_data = self.tlv_get( 'BTRFS_SEND_A_XATTR_DATA', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), xattr_name, xattr_data)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'xattr_name': xattr_name, + 'xattr_data': xattr_data + }) elif command == 'BTRFS_SEND_C_REMOVE_XATTR': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, xattr_name = self._tlv_get_string( 'BTRFS_SEND_A_XATTR_NAME', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), xattr_name)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'xattr_name': xattr_name + }) elif command == 'BTRFS_SEND_C_WRITE': idx2, path = self._tlv_get_string( @@ -260,10 +293,13 @@ class BtrfsStream(object): 'BTRFS_SEND_A_FILE_OFFSET', idx2) idx2, data = self.tlv_get( 'BTRFS_SEND_A_DATA', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append( - (command[13:].lower(), file_offset, data)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'file_offset': file_offset, + 'data': data + }) elif command == 'BTRFS_SEND_C_CLONE': idx2, path = self._tlv_get_string( @@ -280,27 +316,41 @@ class BtrfsStream(object): 'BTRFS_SEND_A_CLONE_PATH', idx + self.l_head) # BTRFS_SEND_A_CLONE8PATH idx2, clone_offset = self._tlv_get_u64( 'BTRFS_SEND_A_CLONE_OFFSET', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), file_offset, clone_len, - clone_uuid, clone_transid, clone_path)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'file_offset': file_offset, + 'clone_len': clone_len, + 'clone_uuid': clone_uuid, + 'clone_transid': clone_transid, + 'clone_path': clone_path, + 'clone_offset': clone_offset + }) elif command == 'BTRFS_SEND_C_CHMOD': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, mode = self._tlv_get_u64('BTRFS_SEND_A_MODE', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), mode)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'mode': mode + }) elif command == 'BTRFS_SEND_C_CHOWN': idx2, path = self._tlv_get_string( 'BTRFS_SEND_A_PATH', idx + self.l_head) idx2, uid = self._tlv_get_u64('BTRFS_SEND_A_UID', idx2) idx2, gid = self._tlv_get_u64('BTRFS_SEND_A_GID', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), uid, gid)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'user_id': uid, + 'group_id': gid + }) elif command == 'BTRFS_SEND_C_UPDATE_EXTENT': idx2, path = self._tlv_get_string( @@ -308,16 +358,27 @@ class BtrfsStream(object): idx2, file_offset = self._tlv_get_u64( 'BTRFS_SEND_A_FILE_OFFSET', idx2) idx2, size = self._tlv_get_u64('BTRFS_SEND_A_SIZE', idx2) - modified.setdefault(path, []).append( - (command[13:].lower(), count)) - commands.append((command[13:].lower(), file_offset, size)) + + commands.append({ + 'command': commandShort, + 'path': path, + 'file_offset': file_offset, + 'size': size + }) elif command == 'BTRFS_SEND_C_END': - commands.append((command[13:].lower(), idx + self.l_head, len(self.stream))) + commands.append({ + 'command': commandShort, + 'headers_length': idx + self.l_head, + 'stream_length': len(self.stream) + }) break elif command == 'BTRFS_SEND_C_UNSPEC': - pass + commands.append({ + 'command': commandShort + }) + break else: # Shoud not happen @@ -326,7 +387,7 @@ class BtrfsStream(object): idx += self.l_head + l_cmd count += 1 - return modified, commands + return commands if __name__ == "__main__": @@ -338,10 +399,9 @@ if __name__ == "__main__": parser.add_argument('-c', '--child', help='child snapshot (will be created if it does not exist)') parser.add_argument('-f', '--file', help="diff file") - parser.add_argument('-t', '--filter', action='store_true', - help='does not display temporary files, nor all time modifications (just latest)') - parser.add_argument('-s', '--csv', action='store_true', - help='CSV output') + parser.add_argument('-j', '--json', action='store_true', + help='JSON output') + # parser.add_argument('-v', '--verbose', action="count", default=0, # help="increase verbosity") args = parser.parse_args() @@ -351,7 +411,8 @@ if __name__ == "__main__": cmd = ['btrfs', 'send', '-p', args.parent, args.child, '--no-data', '-f', '/tmp/snaps-diff'] try: - subprocess.check_call(cmd) + # PIPE to mute output todo: better soluion? + subprocess.check_call(cmd, stdout=subprocess.PIPE) except: printerr('Error: %s\nexecuting "%s"\n' % @@ -370,107 +431,16 @@ if __name__ == "__main__": else: stream_file = args.file + # todo: create stream here and pass it into stream processor + stream = BtrfsStream(stream_file) if stream.version is None: exit(1) - print 'Found a valid Btrfs stream header, version %d' % stream.version - modified, commands = stream.decode() - - # Temporary files / dirs / links... created by btrfs send: they are later - # renamed to definitive files / dirs / links... - if args.filter: - import re - re_tmp = re.compile(r'o\d+-\d+-0$') - for path, actions in modified.iteritems(): - - if args.filter and re_tmp.match(path): - # Don't display files created temporarily and later renamed - if not (actions[0][0] in ('mkfile', 'mkdir', 'symlink') and \ - actions[1][0] == 'rename') and \ - not (actions[0][0] == ('renamed_from') and \ - actions[1][0] == 'rmdir'): - print path, '\n\t', actions, '=' * 20 - continue - - if path == '': - path = '__sub_root__' - - prev_action = None - extents = [] - print_actions = [] - - for a in actions: - - cmd = commands[a[1]] - - if prev_action == 'update_extent' and a[0] != 'update_extent': - print_actions.append('update extents %d -> %d' % ( - extents[0][0], - extents[-1][0] + extents[-1][1])) - - if a[0] == 'renamed_from': - if args.filter and re_tmp.match(cmd[1]): - if prev_action=='unlink': - del(print_actions[-1]) - print_actions.append('rewritten') - else: - print_actions.append('created') - else: - print_actions.append('renamed from "%s"' % cmd[1]) - - elif a[0] == 'set_xattr': - print_actions.append('xattr %s %d' % cmd[1:]) - - elif a[0] == 'update_extent': - extents .append(cmd[1:]) - - elif a[0] == 'truncate': - print_actions.append('truncate %d' % cmd[1]) - - elif a[0] == 'chown': - print_actions.append('owner %d:%d' % cmd[1:]) - - elif a[0] == 'chmod': - print_actions.append('mode %o' % cmd[1]) - - elif a[0] == 'link': - print_actions.append('link to "%s"' % cmd[1]) - - elif a[0] == 'symlink': - print_actions.append('symlink to "%s"' % cmd[1]) - - elif a[0] in ('unlink', 'mkfile', 'mkdir', 'mkfifo'): - print_actions.append('%s' % a[0]) - - elif a[0] == 'rename': - print_actions.append('rename to "%s"' % cmd[2]) - - elif a[0] == 'utimes': - if args.filter and prev_action=='utimes': - # Print only last utimes - del(print_actions[-1]) - print_actions.append('times a=%s m=%s c=%s' % ( - time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(cmd[1])), - time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(cmd[2])), - time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(cmd[3])) - )) - - elif a[0] == 'snapshot': - print_actions.append( - 'snapshot: uuid=%s, ctrasid=%d, clone_uuid=%s, clone_ctransid=%d' % cmd[1:]) - - elif a[0] == 'write': - # XXX cmd[2] is data, but what does it represent? - print_actions.append('write: from %d' % cmd[1]) - - else: - print_actions.append('%s, %s %s' % (a, cmd, '-' * 20)) - prev_action = a[0] - - if args.csv: - print '%s;%s' % (path, ';'.join(print_actions)) - else: - print '\n%s' % path - for p in print_actions: - print '\t%s' % p + commands = stream.decode() + if args.json is not None: + print json.dumps({'commands': commands}) + exit(0) + else: + parser.print_help(); + exit(1); diff --git a/tests.sh b/tests.sh index a4b6209..d396dee 100755 --- a/tests.sh +++ b/tests.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash #set -x function clean_up { -- GitLab