From 1ed0df0fac581daa8f62f2c6287b30106ba1bf3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Kucha=C5=99?= <honza.kuchar@grifart.cz>
Date: Tue, 7 Mar 2017 04:36:47 -0800
Subject: [PATCH] stripped down to parser; only available output format is now
 JSON

---
 btrfs-snapshots-diff.py | 310 ++++++++++++++++++----------------------
 tests.sh                |   1 +
 2 files changed, 141 insertions(+), 170 deletions(-)

diff --git a/btrfs-snapshots-diff.py b/btrfs-snapshots-diff.py
index ef2560a..14f7093 100755
--- a/btrfs-snapshots-diff.py
+++ b/btrfs-snapshots-diff.py
@@ -30,15 +30,12 @@
 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-
-
+import json
 from struct import unpack
-from sys import argv, exit, stdin, stderr, exc_info
+from sys import exit, stderr, exc_info
 printerr = stderr.write
 import argparse
 import subprocess
-import time
-from collections import OrderedDict
 from os import unlink
 
 
@@ -129,10 +126,9 @@ class BtrfsStream(object):
         # Decode commands + attributes
         idx = 17
         count = 0
+
         # List of commands
         commands = []
-        # modified[path] = [(command, cmd_ref), ...]
-        modified = OrderedDict()
 
         while True:
 
@@ -143,36 +139,43 @@ class BtrfsStream(object):
             except:
                 raise ValueError('Unkown command %d' % cmd)
 
+            commandShort = command[13:].lower()
+
             if command == 'BTRFS_SEND_C_RENAME':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, path_to = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH_TO', idx2)
-                # Add bogus renamed_from command on destination to keep track
-                # of what happened
-                modified.setdefault(path_to, []).append(
-                    ('renamed_from', count))
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), path, path_to))
+
+                commands.append({
+                    'command': commandShort,
+                    'path':    path,
+                    'path_to': path_to
+                })
 
             elif command == 'BTRFS_SEND_C_SYMLINK':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 # XXX BTRFS_SEND_A_PATH_LINK in send-self.stream.c ???
                 idx2, ino = self._tlv_get_string('BTRFS_SEND_A_INO', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), ino))
+
+                commands.append({
+                    'command': commandShort,
+                    'path':    path,
+                    'inode':   ino
+                })
 
             elif command == 'BTRFS_SEND_C_LINK':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, path_link = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH_LINK', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), path_link))
+
+                commands.append({
+                    'command':   commandShort,
+                    'path':      path,
+                    'path_link': path_link
+                })
 
             elif command == 'BTRFS_SEND_C_UTIMES':
                 idx2, path = self._tlv_get_string(
@@ -180,24 +183,34 @@ class BtrfsStream(object):
                 idx2, atime = self._tlv_get_timespec('BTRFS_SEND_A_ATIME', idx2)
                 idx2, mtime = self._tlv_get_timespec('BTRFS_SEND_A_MTIME', idx2)
                 idx2, ctime = self._tlv_get_timespec('BTRFS_SEND_A_CTIME', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), atime, mtime, ctime))
+
+                commands.append({
+                    'command':       commandShort,
+                    'path':          path,
+                    'time_access':   atime,
+                    'time_modified': mtime,
+                    'time_created':  ctime
+                })
 
             elif command in 'BTRFS_SEND_C_MKFILE BTRFS_SEND_C_MKDIR BTRFS_SEND_C_MKFIFO BTRFS_SEND_C_MKSOCK BTRFS_SEND_C_UNLINK BTRFS_SEND_C_RMDIR '.split():
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower()))
+
+                commands.append({
+                    'command': commandShort,
+                    'path':    path
+                })
 
             elif command == 'BTRFS_SEND_C_TRUNCATE':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, size = self._tlv_get_u64('BTRFS_SEND_A_SIZE', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), size))
+
+                commands.append({
+                    'command': commandShort,
+                    'path':    path,
+                    'to_size': size
+                })
 
             elif command == 'BTRFS_SEND_C_SNAPSHOT':
                 idx2, path = self._tlv_get_string(
@@ -209,10 +222,15 @@ class BtrfsStream(object):
                     'BTRFS_SEND_A_CLONE_UUID', idx2)
                 idx2, clone_ctransid = self._tlv_get_u64(
                     'BTRFS_SEND_A_CLONE_CTRANSID', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), uuid, ctransid,
-                                 clone_uuid, clone_ctransid))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'uuid': uuid,
+                    'ctransid': ctransid,
+                    'clone_uuid': clone_uuid,
+                    'clone_ctransid': clone_ctransid
+                })
 
             elif command == 'BTRFS_SEND_C_SUBVOL':
                 idx2, path = self._tlv_get_string(
@@ -220,18 +238,26 @@ class BtrfsStream(object):
                 idx2, uuid = self._tlv_get_uuid('BTRFS_SEND_A_UUID', idx2)
                 idx2, ctransid = self._tlv_get_u64(
                     'BTRFS_SEND_A_CTRANSID', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), uuid, ctransid))
+
+                commands.append({
+                    'command': commandShort,
+                    'path':    path,
+                    'uuid':    uuid,
+                    'ctrans_id': ctransid
+                })
 
             elif command == 'BTRFS_SEND_C_MKNOD':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, mode = self._tlv_get_u64('BTRFS_SEND_A_MODE', idx2)
                 idx2, rdev = self._tlv_get_u64('BTRFS_SEND_A_RDEV', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), mode, rdev))
+
+                commands.append({
+                    'command': commandShort,
+                    'path':    path,
+                    'mode':    mode,
+                    'rdev':    rdev
+                })
 
             elif command == 'BTRFS_SEND_C_SET_XATTR':
                 idx2, path = self._tlv_get_string(
@@ -240,18 +266,25 @@ class BtrfsStream(object):
                     'BTRFS_SEND_A_XATTR_NAME', idx2)
                 idx2, xattr_data = self.tlv_get(
                     'BTRFS_SEND_A_XATTR_DATA', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), xattr_name, xattr_data))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'xattr_name': xattr_name,
+                    'xattr_data': xattr_data
+                })
 
             elif command == 'BTRFS_SEND_C_REMOVE_XATTR':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, xattr_name = self._tlv_get_string(
                     'BTRFS_SEND_A_XATTR_NAME', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), xattr_name))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'xattr_name': xattr_name
+                })
 
             elif command == 'BTRFS_SEND_C_WRITE':
                 idx2, path = self._tlv_get_string(
@@ -260,10 +293,13 @@ class BtrfsStream(object):
                     'BTRFS_SEND_A_FILE_OFFSET', idx2)
                 idx2, data = self.tlv_get(
                     'BTRFS_SEND_A_DATA', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append(
-                    (command[13:].lower(), file_offset, data))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'file_offset': file_offset,
+                    'data': data
+                })
 
             elif command == 'BTRFS_SEND_C_CLONE':
                 idx2, path = self._tlv_get_string(
@@ -280,27 +316,41 @@ class BtrfsStream(object):
                     'BTRFS_SEND_A_CLONE_PATH', idx + self.l_head)  # BTRFS_SEND_A_CLONE8PATH
                 idx2, clone_offset = self._tlv_get_u64(
                     'BTRFS_SEND_A_CLONE_OFFSET', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), file_offset, clone_len,
-                                 clone_uuid, clone_transid, clone_path))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'file_offset': file_offset,
+                    'clone_len': clone_len,
+                    'clone_uuid': clone_uuid,
+                    'clone_transid': clone_transid,
+                    'clone_path':    clone_path,
+                    'clone_offset':  clone_offset
+                })
 
             elif command == 'BTRFS_SEND_C_CHMOD':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, mode = self._tlv_get_u64('BTRFS_SEND_A_MODE', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), mode))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'mode': mode
+                })
 
             elif command == 'BTRFS_SEND_C_CHOWN':
                 idx2, path = self._tlv_get_string(
                     'BTRFS_SEND_A_PATH', idx + self.l_head)
                 idx2, uid = self._tlv_get_u64('BTRFS_SEND_A_UID', idx2)
                 idx2, gid = self._tlv_get_u64('BTRFS_SEND_A_GID', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), uid, gid))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'user_id': uid,
+                    'group_id': gid
+                })
 
             elif command == 'BTRFS_SEND_C_UPDATE_EXTENT':
                 idx2, path = self._tlv_get_string(
@@ -308,16 +358,27 @@ class BtrfsStream(object):
                 idx2, file_offset = self._tlv_get_u64(
                     'BTRFS_SEND_A_FILE_OFFSET', idx2)
                 idx2, size = self._tlv_get_u64('BTRFS_SEND_A_SIZE', idx2)
-                modified.setdefault(path, []).append(
-                    (command[13:].lower(), count))
-                commands.append((command[13:].lower(), file_offset, size))
+
+                commands.append({
+                    'command': commandShort,
+                    'path': path,
+                    'file_offset': file_offset,
+                    'size': size
+                })
 
             elif command == 'BTRFS_SEND_C_END':
-                commands.append((command[13:].lower(), idx + self.l_head, len(self.stream)))
+                commands.append({
+                    'command': commandShort,
+                    'headers_length': idx + self.l_head,
+                    'stream_length': len(self.stream)
+                })
                 break
 
             elif command == 'BTRFS_SEND_C_UNSPEC':
-                pass
+                commands.append({
+                    'command': commandShort
+                })
+                break
 
             else:
                 # Shoud not happen
@@ -326,7 +387,7 @@ class BtrfsStream(object):
             idx += self.l_head + l_cmd
             count += 1
 
-        return modified, commands
+        return commands
 
 
 if __name__ == "__main__":
@@ -338,10 +399,9 @@ if __name__ == "__main__":
     parser.add_argument('-c', '--child',
                         help='child snapshot (will be created if it does not exist)')
     parser.add_argument('-f', '--file', help="diff file")
-    parser.add_argument('-t', '--filter', action='store_true',
-                        help='does not display temporary files, nor all time modifications (just latest)')
-    parser.add_argument('-s', '--csv', action='store_true',
-                        help='CSV output')
+    parser.add_argument('-j', '--json', action='store_true',
+                        help='JSON output')
+
 #    parser.add_argument('-v', '--verbose', action="count", default=0,
 #                        help="increase verbosity")
     args = parser.parse_args()
@@ -351,7 +411,8 @@ if __name__ == "__main__":
             cmd = ['btrfs', 'send', '-p', args.parent, args.child, '--no-data',
                    '-f', '/tmp/snaps-diff']
             try:
-                subprocess.check_call(cmd)
+                # PIPE to mute output todo: better soluion?
+                subprocess.check_call(cmd, stdout=subprocess.PIPE)
 
             except:
                 printerr('Error: %s\nexecuting "%s"\n' %
@@ -370,107 +431,16 @@ if __name__ == "__main__":
     else:
         stream_file = args.file
 
+    # todo: create stream here and pass it into stream processor
+
     stream = BtrfsStream(stream_file)
     if stream.version is None:
        exit(1)
-    print 'Found a valid Btrfs stream header, version %d' % stream.version
-    modified, commands = stream.decode()
-
-    # Temporary files / dirs / links... created by btrfs send: they are later
-    # renamed to definitive files / dirs / links...
-    if args.filter:
-        import re
-        re_tmp = re.compile(r'o\d+-\d+-0$')
 
-    for path, actions in modified.iteritems():
-
-        if args.filter and re_tmp.match(path):
-            # Don't display files created temporarily and later renamed
-            if not (actions[0][0] in ('mkfile', 'mkdir', 'symlink') and \
-                    actions[1][0] == 'rename') and \
-                    not (actions[0][0] == ('renamed_from') and \
-                    actions[1][0] == 'rmdir'):
-                print path, '\n\t', actions, '=' * 20
-            continue
-
-        if path == '':
-            path = '__sub_root__'
-
-        prev_action = None
-        extents = []
-        print_actions = []
-
-        for a in actions:
-
-            cmd = commands[a[1]]
-
-            if prev_action == 'update_extent' and a[0] != 'update_extent':
-                print_actions.append('update extents %d -> %d' % (
-                    extents[0][0],
-                    extents[-1][0] + extents[-1][1]))
-
-            if a[0] == 'renamed_from':
-                if args.filter and re_tmp.match(cmd[1]):
-                    if prev_action=='unlink':
-                        del(print_actions[-1])
-                        print_actions.append('rewritten')
-                    else:
-                        print_actions.append('created')
-                else:
-                    print_actions.append('renamed from "%s"' % cmd[1])
-
-            elif a[0] == 'set_xattr':
-                print_actions.append('xattr %s %d' % cmd[1:])
-
-            elif a[0] == 'update_extent':
-                extents .append(cmd[1:])
-
-            elif a[0] == 'truncate':
-                print_actions.append('truncate %d' % cmd[1])
-
-            elif a[0] == 'chown':
-                print_actions.append('owner %d:%d' % cmd[1:])
-
-            elif a[0] == 'chmod':
-                print_actions.append('mode %o' % cmd[1])
-
-            elif a[0] == 'link':
-                print_actions.append('link to "%s"' % cmd[1])
-
-            elif a[0] == 'symlink':
-                print_actions.append('symlink to "%s"' % cmd[1])
-
-            elif a[0] in ('unlink', 'mkfile', 'mkdir', 'mkfifo'):
-                print_actions.append('%s' % a[0])
-
-            elif a[0] == 'rename':
-                print_actions.append('rename to "%s"' % cmd[2])
-
-            elif a[0] == 'utimes':
-                if args.filter and prev_action=='utimes':
-                   # Print only last utimes
-                   del(print_actions[-1])
-                print_actions.append('times a=%s m=%s c=%s' % (
-                    time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(cmd[1])),
-                    time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(cmd[2])),
-                    time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(cmd[3]))
-                ))
-
-            elif a[0] == 'snapshot':
-                print_actions.append(
-                    'snapshot: uuid=%s, ctrasid=%d, clone_uuid=%s, clone_ctransid=%d' % cmd[1:])
-
-            elif a[0] == 'write':
-                # XXX cmd[2] is data, but what does it represent?
-                print_actions.append('write: from %d' % cmd[1])
-
-            else:
-                print_actions.append('%s, %s %s' % (a, cmd, '-' * 20))
-            prev_action = a[0]
-
-        if args.csv:
-            print '%s;%s' % (path, ';'.join(print_actions))
-        else:
-            print '\n%s' % path
-            for p in print_actions:
-               print '\t%s' % p
+    commands = stream.decode()
+    if args.json is not None:
+        print json.dumps({'commands': commands})
+        exit(0)
+    else:
+        parser.print_help();
+        exit(1);
diff --git a/tests.sh b/tests.sh
index a4b6209..d396dee 100755
--- a/tests.sh
+++ b/tests.sh
@@ -1,3 +1,4 @@
+#!/usr/bin/env bash
 
 #set -x
 function clean_up {
-- 
GitLab