diff options
Diffstat (limited to 'git.py')
-rw-r--r-- | git.py | 258 |
1 files changed, 146 insertions, 112 deletions
@@ -19,37 +19,45 @@ from html import escape # Path to the git binary. GIT_BIN = "git" -def run_git(repo_path, params, stdin = None, silent_stderr = False, raw = False): + +def run_git(repo_path, params, stdin=None, silent_stderr=False, raw=False): """Invokes git with the given parameters. This function invokes git with the given parameters, and returns a file-like object with the output (from a pipe). """ - params = [GIT_BIN, '--git-dir=%s' % repo_path] + list(params) + params = [GIT_BIN, "--git-dir=%s" % repo_path] + list(params) stderr = None if silent_stderr: stderr = subprocess.PIPE if not stdin: - p = subprocess.Popen(params, - stdin = None, stdout = subprocess.PIPE, stderr = stderr) + p = subprocess.Popen( + params, stdin=None, stdout=subprocess.PIPE, stderr=stderr + ) else: - p = subprocess.Popen(params, - stdin = subprocess.PIPE, stdout = subprocess.PIPE, - stderr = stderr) + p = subprocess.Popen( + params, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=stderr, + ) + p.stdin.write(stdin) p.stdin.close() if raw: return p.stdout - return io.TextIOWrapper(p.stdout, encoding = 'utf8', - errors = 'backslashreplace') + return io.TextIOWrapper( + p.stdout, encoding="utf8", errors="backslashreplace" + ) -class GitCommand (object): +class GitCommand(object): """Convenient way of invoking git.""" + def __init__(self, path, cmd, *args, **kwargs): self._override = True self._path = path @@ -63,10 +71,10 @@ class GitCommand (object): self.__setattr__(k, v) def __setattr__(self, k, v): - if k == '_override' or self._override: + if k == "_override" or self._override: self.__dict__[k] = v return - k = k.replace('_', '-') + k = k.replace("_", "-") self._kwargs[k] = v def arg(self, a): @@ -92,19 +100,20 @@ class GitCommand (object): params = [self._cmd] for k, v in list(self._kwargs.items()): - dash = '--' if len(k) > 1 else '-' + dash = "--" if len(k) > 1 else "-" if v is None: - params.append('%s%s' % (dash, k)) + params.append("%s%s" % (dash, k)) else: - params.append('%s%s=%s' % (dash, k, str(v))) + params.append("%s%s=%s" % (dash, k, str(v))) params.extend(self._args) - return run_git(self._path, params, self._stdin_buf, raw = self._raw) + return run_git(self._path, params, self._stdin_buf, raw=self._raw) -class SimpleNamespace (object): +class SimpleNamespace(object): """An entirely flexible object, which provides a convenient namespace.""" + def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -120,14 +129,15 @@ class smstr: readable. .html -> an HTML-embeddable representation. """ + def __init__(self, raw): if not isinstance(raw, (str, bytes)): raise TypeError( - "The raw string must be instance of 'str', not %s" % - type(raw)) + "The raw string must be instance of 'str', not %s" % type(raw) + ) self.raw = raw if isinstance(raw, bytes): - self.unicode = raw.decode('utf8', errors = 'backslashreplace') + self.unicode = raw.decode("utf8", errors="backslashreplace") else: self.unicode = raw self.url = urllib.request.pathname2url(raw) @@ -147,7 +157,7 @@ class smstr: def split(self, sep): """Like str.split().""" - return [ smstr(s) for s in self.raw.split(sep) ] + return [smstr(s) for s in self.raw.split(sep)] def __add__(self, other): if isinstance(other, smstr): @@ -156,9 +166,9 @@ class smstr: def _to_html(self): """Returns an html representation of the unicode string.""" - html = '' + html = "" for c in escape(self.unicode): - if c in '\t\r\n\r\f\a\b\v\0': + if c in "\t\r\n\r\f\a\b\v\0": esc_c = c.encode("unicode-escape").decode("utf8") html += '<span class="ctrlchr">%s</span>' % esc_c else: @@ -186,7 +196,7 @@ def unquote(s): s = s.encode("latin1").decode("unicode-escape") # Convert to utf8. - s = s.encode("latin1").decode("utf8", errors='backslashreplace') + s = s.encode("latin1").decode("utf8", errors="backslashreplace") return s @@ -194,7 +204,7 @@ def unquote(s): class Repo: """A git repository.""" - def __init__(self, path, name = None, info = None): + def __init__(self, path, name=None, info=None): self.path = path self.name = name self.info = info or SimpleNamespace() @@ -203,9 +213,9 @@ class Repo: """Returns a GitCommand() on our path.""" return GitCommand(self.path, cmd) - def for_each_ref(self, pattern = None, sort = None, count = None): + def for_each_ref(self, pattern=None, sort=None, count=None): """Returns a list of references.""" - cmd = self.cmd('for-each-ref') + cmd = self.cmd("for-each-ref") if sort: cmd.sort = sort if count: @@ -217,61 +227,61 @@ class Repo: obj_id, obj_type, ref = l.split() yield obj_id, obj_type, ref - def branches(self, sort = '-authordate'): + def branches(self, sort="-authordate"): """Get the (name, obj_id) of the branches.""" - refs = self.for_each_ref(pattern = 'refs/heads/', sort = sort) + refs = self.for_each_ref(pattern="refs/heads/", sort=sort) for obj_id, _, ref in refs: - yield ref[len('refs/heads/'):], obj_id + yield ref[len("refs/heads/") :], obj_id def branch_names(self): """Get the names of the branches.""" - return ( name for name, _ in self.branches() ) + return (name for name, _ in self.branches()) - def tags(self, sort = '-taggerdate'): + def tags(self, sort="-taggerdate"): """Get the (name, obj_id) of the tags.""" - refs = self.for_each_ref(pattern = 'refs/tags/', sort = sort) + refs = self.for_each_ref(pattern="refs/tags/", sort=sort) for obj_id, _, ref in refs: - yield ref[len('refs/tags/'):], obj_id + yield ref[len("refs/tags/") :], obj_id def tag_names(self): """Get the names of the tags.""" - return ( name for name, _ in self.tags() ) + return (name for name, _ in self.tags()) - def commit_ids(self, ref, limit = None): + def commit_ids(self, ref, limit=None): """Generate commit ids.""" - cmd = self.cmd('rev-list') + cmd = self.cmd("rev-list") if limit: cmd.max_count = limit cmd.arg(ref) - cmd.arg('--') + cmd.arg("--") for l in cmd.run(): - yield l.rstrip('\n') + yield l.rstrip("\n") def commit(self, commit_id): """Return a single commit.""" - cs = list(self.commits(commit_id, limit = 1)) + cs = list(self.commits(commit_id, limit=1)) if len(cs) != 1: return None return cs[0] - def commits(self, ref, limit = None, offset = 0): + def commits(self, ref, limit=None, offset=0): """Generate commit objects for the ref.""" - cmd = self.cmd('rev-list') + cmd = self.cmd("rev-list") if limit: cmd.max_count = limit + offset cmd.header = None cmd.arg(ref) - cmd.arg('--') + cmd.arg("--") - info_buffer = '' + info_buffer = "" count = 0 for l in cmd.run(): - if '\0' in l: - pre, post = l.split('\0', 1) + if "\0" in l: + pre, post = l.split("\0", 1) info_buffer += pre count += 1 @@ -290,11 +300,11 @@ class Repo: def diff(self, ref): """Return a Diff object for the ref.""" - cmd = self.cmd('diff-tree') + cmd = self.cmd("diff-tree") cmd.patch = None cmd.numstat = None cmd.find_renames = None - if (self.info.root_diff): + if self.info.root_diff: cmd.root = None # Note we intentionally do not use -z, as the filename is just for # reference, and it is safer to let git do the escaping. @@ -305,13 +315,13 @@ class Repo: def refs(self): """Return a dict of obj_id -> ref.""" - cmd = self.cmd('show-ref') + cmd = self.cmd("show-ref") cmd.dereference = None r = defaultdict(list) for l in cmd.run(): l = l.strip() - obj_id, ref = l.split(' ', 1) + obj_id, ref = l.split(" ", 1) r[obj_id].append(ref) return r @@ -322,9 +332,9 @@ class Repo: def blob(self, path, ref): """Returns a Blob instance for the given path.""" - cmd = self.cmd('cat-file') + cmd = self.cmd("cat-file") cmd.raw(True) - cmd.batch = '%(objectsize)' + cmd.batch = "%(objectsize)" # Format: <ref>:<path> # Construct it in binary since the path might not be utf8. @@ -332,29 +342,39 @@ class Repo: out = cmd.run() head = out.readline() - if not head or head.strip().endswith(b'missing'): + if not head or head.strip().endswith(b"missing"): return None - return Blob(out.read()[:int(head)]) + return Blob(out.read()[: int(head)]) def last_commit_timestamp(self): """Return the timestamp of the last commit.""" - refs = self.for_each_ref(pattern = 'refs/heads/', - sort = '-committerdate', count = 1) + refs = self.for_each_ref( + pattern="refs/heads/", sort="-committerdate", count=1 + ) for obj_id, _, _ in refs: commit = self.commit(obj_id) return commit.committer_epoch return -1 -class Commit (object): +class Commit(object): """A git commit.""" - def __init__(self, repo, - commit_id, parents, tree, - author, author_epoch, author_tz, - committer, committer_epoch, committer_tz, - message): + def __init__( + self, + repo, + commit_id, + parents, + tree, + author, + author_epoch, + author_tz, + committer, + committer_epoch, + committer_tz, + message, + ): self._repo = repo self.id = commit_id self.parents = parents @@ -367,28 +387,30 @@ class Commit (object): self.committer_tz = committer_tz self.message = message - self.author_name, self.author_email = \ - email.utils.parseaddr(self.author) + self.author_name, self.author_email = email.utils.parseaddr( + self.author + ) - self.committer_name, self.committer_email = \ - email.utils.parseaddr(self.committer) + self.committer_name, self.committer_email = email.utils.parseaddr( + self.committer + ) - self.subject, self.body = self.message.split('\n', 1) + self.subject, self.body = self.message.split("\n", 1) self.author_date = Date(self.author_epoch, self.author_tz) self.committer_date = Date(self.committer_epoch, self.committer_tz) - # Only get this lazily when we need it; most of the time it's not # required by the caller. self._diff = None def __repr__(self): - return '<C %s p:%s a:%s s:%r>' % ( - self.id[:7], - ','.join(p[:7] for p in self.parents), - self.author_email, - self.subject[:20]) + return "<C %s p:%s a:%s s:%r>" % ( + self.id[:7], + ",".join(p[:7] for p in self.parents), + self.author_email, + self.subject[:20], + ) @property def diff(self): @@ -400,57 +422,68 @@ class Commit (object): @staticmethod def from_str(repo, buf): """Parses git rev-list output, returns a commit object.""" - if '\n\n' in buf: + if "\n\n" in buf: # Header, commit message - header, raw_message = buf.split('\n\n', 1) + header, raw_message = buf.split("\n\n", 1) else: # Header only, no commit message - header, raw_message = buf.rstrip(), ' ' + header, raw_message = buf.rstrip(), " " - header_lines = header.split('\n') + header_lines = header.split("\n") commit_id = header_lines.pop(0) header_dict = defaultdict(list) for line in header_lines: - k, v = line.split(' ', 1) + k, v = line.split(" ", 1) header_dict[k].append(v) - tree = header_dict['tree'][0] - parents = set(header_dict['parent']) - author, author_epoch, author_tz = \ - header_dict['author'][0].rsplit(' ', 2) - committer, committer_epoch, committer_tz = \ - header_dict['committer'][0].rsplit(' ', 2) + tree = header_dict["tree"][0] + parents = set(header_dict["parent"]) + + authorhdr = header_dict["author"][0] + author, author_epoch, author_tz = authorhdr.rsplit(" ", 2) + + committerhdr = header_dict["committer"][0] + committer, committer_epoch, committer_tz = committerhdr.rsplit(" ", 2) # Remove the first four spaces from the message's lines. - message = '' - for line in raw_message.split('\n'): - message += line[4:] + '\n' - - return Commit(repo, - commit_id = commit_id, tree = tree, parents = parents, - author = author, - author_epoch = author_epoch, author_tz = author_tz, - committer = committer, - committer_epoch = committer_epoch, committer_tz = committer_tz, - message = message) + message = "" + for line in raw_message.split("\n"): + message += line[4:] + "\n" + + return Commit( + repo, + commit_id=commit_id, + tree=tree, + parents=parents, + author=author, + author_epoch=author_epoch, + author_tz=author_tz, + committer=committer, + committer_epoch=committer_epoch, + committer_tz=committer_tz, + message=message, + ) + class Date: """Handy representation for a datetime from git.""" + def __init__(self, epoch, tz): self.epoch = int(epoch) self.tz = tz self.utc = datetime.datetime.utcfromtimestamp(self.epoch) self.tz_sec_offset_min = int(tz[1:3]) * 60 + int(tz[4:]) - if tz[0] == '-': + if tz[0] == "-": self.tz_sec_offset_min = -self.tz_sec_offset_min self.local = self.utc + datetime.timedelta( - minutes = self.tz_sec_offset_min) + minutes=self.tz_sec_offset_min + ) - self.str = self.utc.strftime('%a, %d %b %Y %H:%M:%S +0000 ') - self.str += '(%s %s)' % (self.local.strftime('%H:%M'), self.tz) + self.str = self.utc.strftime("%a, %d %b %Y %H:%M:%S +0000 ") + self.str += "(%s %s)" % (self.local.strftime("%H:%M"), self.tz) def __str__(self): return self.str @@ -458,6 +491,7 @@ class Date: class Diff: """A diff between two trees.""" + def __init__(self, ref, changes, body): """Constructor. @@ -477,23 +511,23 @@ class Diff: ref_id = next(lines) except StopIteration: # No diff; this can happen in merges without conflicts. - return Diff(None, [], '') + return Diff(None, [], "") # First, --numstat information. changes = [] l = next(lines) - while l != '\n': - l = l.rstrip('\n') - added, deleted, fname = l.split('\t', 2) - added = added.replace('-', '0') - deleted = deleted.replace('-', '0') + while l != "\n": + l = l.rstrip("\n") + added, deleted, fname = l.split("\t", 2) + added = added.replace("-", "0") + deleted = deleted.replace("-", "0") fname = smstr(unquote(fname)) changes.append((int(added), int(deleted), fname)) l = next(lines) # And now the diff body. We just store as-is, we don't really care for # the contents. - body = ''.join(lines) + body = "".join(lines) return Diff(ref_id, changes, body) @@ -505,9 +539,9 @@ class Tree: self.repo = repo self.ref = ref - def ls(self, path, recursive = False): + def ls(self, path, recursive=False): """Generates (type, name, size) for each file in path.""" - cmd = self.repo.cmd('ls-tree') + cmd = self.repo.cmd("ls-tree") cmd.long = None if recursive: cmd.r = None @@ -521,17 +555,17 @@ class Tree: for l in cmd.run(): _mode, otype, _oid, size, name = l.split(None, 4) - if size == '-': + if size == "-": size = None else: size = int(size) # Remove the quoting (if any); will always give us a str. - name = unquote(name.strip('\n')) + name = unquote(name.strip("\n")) # Strip the leading path, the caller knows it and it's often # easier to work with this way. - name = name[len(path):] + name = name[len(path) :] # We use a smart string for the name, as it's often tricky to # manipulate otherwise. @@ -548,5 +582,5 @@ class Blob: @property def utf8_content(self): if not self._utf8_content: - self._utf8_content = self.raw_content.decode('utf8', 'replace') + self._utf8_content = self.raw_content.decode("utf8", "replace") return self._utf8_content |