Source code for imsi.utils.git_tools

"""
git_tools
=========

This is a module to collect common version control operations.

It might be worth checking out e.g. https://gitpython.readthedocs.io/en/stable/

However, for now to reduce dependencies, and given the basic operations needed,
I'm just implementing a few functions here.

NCS, 10/2021
"""
from getpass import getuser
import os
from re import fullmatch
import shlex
import subprocess
from collections import OrderedDict
from typing import Optional

# NOTE: many git commands issued via subprocess are kept as full strings
# so that they are more easily searchable, but then split using shlex so
# that we don't have to use shell=True.

[docs]class GitException(Exception): """Exceptions from running git commands""" # generic def __init__(self, message, *args): super().__init__(message, *args) self.message = message def __str__(self): return str(self.message)
[docs]def is_git_repo(path): # simple check -- no subprocess TODO replace if path is None: path = os.path.getcwd() git_dir = os.path.join(path, '.git') if os.path.exists(git_dir): return True else: return False
[docs]def is_sha1(s): """Return True if string is like SHA-1 (7 to 40 hex characters)""" return True if fullmatch(r"[0-9a-fA-F]{7,40}", s) else False
[docs]def ensure_git_config(id_as_fallback=True): """ensures that git config for local repo is set if there isn't a global one. (necessary for a commit). """ USER_FALLBACK = "IMSI USER" CONTACT_EMAIL_FALLBACK = '<>' # TODO not great, better idea? git_user = subprocess.run(shlex.split('git config user.name'), capture_output=True) git_email = subprocess.run(shlex.split('git config user.email'), capture_output=True) if not git_user.stdout.decode().strip(): if id_as_fallback: USER = getuser() subprocess.run(shlex.split(f'git config user.name {USER}')) else: subprocess.run(shlex.split(f'git config user.name {USER_FALLBACK}')) if not git_email.stdout.decode().strip(): subprocess.run(shlex.split(f'git config user.email {CONTACT_EMAIL_FALLBACK}'))
[docs]def git_add_all(path=None): if path is None: path = os.getcwd() subprocess.run(shlex.split('git add -A'), cwd=path)
[docs]def git_add_commit(path=None, msg='commit'): if path is None: path = os.getcwd() ensure_git_config() clean, _ = is_repo_clean(path) if not clean: git_add_all(path=path) subprocess.run(shlex.split(f'git commit -q -m "{msg}"'), cwd=path)
[docs]def get_head_hash(path=None): if path is None: path = os.getcwd() proc = subprocess.run(shlex.split('git rev-parse HEAD'), cwd=path, capture_output=True) sha = proc.stdout.decode().strip() return sha
[docs]def init_repo(path=None, branch=None): # It is safe to run git init in a repo that is already initialized # so there is no check for this # see: https://git-scm.com/docs/git-init if path is None: path = os.getcwd() # could make initial branch match runid with # git init -q --initial-branch branch-name # if we checked git version (not available in older versions) subprocess.run(shlex.split('git init -q'), cwd=path) if branch is not None: proc_branch = subprocess.run(shlex.split('git rev-parse --abbrev-ref HEAD'), cwd=path, capture_output=True) # note: keep on current branch if set, forgo checking returned == branch # otherwise: if (proc_branch.returncode != 0): # error = branch doesn't exist yet subprocess.run(shlex.split(f'git checkout -b {branch} -q'), cwd=path)
[docs]def clear_repo(path): subprocess.run(shlex.split('git rm -rfq .'), cwd=path) subprocess.run(shlex.split('git clean -dfxq'), cwd=path)
[docs]def clone(repo_address, local_name, ver, path=None, depth: Optional[int] = None): """Clone a git repository, for standard repositories or containing submodules. If there are submodules, clone the repository recursively, and checkout a specified version across all submodules. Inputs: repo_address : str The address to clone from, typically a url. local_name : str The name to use for the on disk code ver : str The reference to checkout. Can be a branch name or tag (used with the `--branch` arg in git clone). When a SHA-1 is used, a full clone of the repo is done and then checked out to the SHA-1 (`depth` is ignored). path : str, None Path of where to clone the repo, path/local_name. If None, cwd used. depth : int, None Depth to which the repository is cloned, including submodules. If None, the full git history is cloned. Can't be used when ver is a SHA1 """ if path is None: path = os.getcwd() local_name_path = os.path.join(path, local_name) if depth is None or is_sha1(ver): # full clone clone_args = ["git", "clone", "--recursive", repo_address, local_name] elif depth >= 1: # shallow clone - only if ver is branch or tag clone_args = [ "git", "clone", "--branch", ver, "--recurse-submodules", "--depth", f"{depth}", "--shallow-submodules", repo_address, local_name ] else: raise ValueError(f"Can't clone repo with ver '{ver}' and depth '{depth}'") # for clone: passing local_name instead of '.' with cwd set, # thus local_folder_path is not created in this function subprocess.run(clone_args, cwd=path, check=True) subprocess.run(["git", "checkout", "--recurse-submodules", ver], cwd=local_name_path) subprocess.run(["git", "submodule", "update", "--recursive", "--init", "--checkout"], cwd=local_name_path)
[docs]def is_repo_clean(path=None): if path is None: path = os.getcwd() cmd_git_has_changes = "git status --porcelain=v1" # strict version try: proc = subprocess.run(shlex.split(cmd_git_has_changes), capture_output=True, cwd=path) except subprocess.CalledProcessError as e: raise GitException(e.stderr.decode()) return (False if proc.stdout else True, proc.stdout.decode().strip())
[docs]def repo_has_commits(path=None): if path is None: path = os.getcwd() # check if it is a git repo by first running: is_repo_clean(path) cm = 1 try: proc = subprocess.run(shlex.split('git log'), capture_output=True, cwd=path) proc.check_returncode() except subprocess.CalledProcessError as e: # does not have any commits yet cm = 0 return cm
if __name__ == '__main__': SystemExit('Version control module not directly callable')