git实例-敏感信息过滤 [1]

  • git提供了一组后缀为.sample的钩子,将其pre-commit.sample重命名为pre-commit至于仓库.git/hooks下即可保证pre-commit在提交前被执行。
  • git config –global配置全局和不同仓库的敏感词列表。
  • 拷贝(如无则创建)git-template/hooks文件夹,并设置为init.templatedir,这样每次用户执行git init(任意一次)时,都会将其git-templates目录下的内容全部拷贝到当前仓库的.git目录下。
  • 编辑pre-commit文件,提取提交内容并判断是否包含了已配置的敏感词列表,如果包含,错误提醒用户并终止commit。
#/bin/bash
template_hooks_dir=$HOME/.git-templates/hooks
if [[ ! -d "$template_hooks_dir" ]];then
	mkdir -p "$template_hooks_dir"
fi
cp -rf ./pre-commit "$template_hooks_dir/pre-commit"
#Any repositories called git init(first time or later) will copy files in init.templatedir to its own git directory.
#Use git init again for existing repository to apply this setting.
git config --global init.templatedir "$HOME/.git-templates"

#Filter for global.Change value0...valuen to your customized word to filter.
git config --global filter..value0 your-words-to-filter-which-apply-to-all-repository
# 过滤全部带xxxxxxx的
git config --global filter..value1 xiangxinziji

#Filter for certain git host,like github.com. Change value0...valuen to your custmized word.
git config --global filter.github.com.value0 your-words-to-filter-which-apply-to-repository-at-github.com
# 过滤github的带githubgithub的
git config --global filter.github.com.value0 your-words-to-filter-which-apply-to-repository-at-github.com
#!/bin/sh
#
# An example hook script to verify what is about to be committed.
# Called by "git commit" with no arguments.  The hook should
# exit with non-zero status after issuing an appropriate message if
# it wants to stop the commit.

errno=""
illegal_contents=()

## 此函数就是读取之前配置的sensitive列表
function read_all_illegal_contens {
	if [[ ${#illegal_contents} == 0 ]];then
		#read global filter values
		for ((i=0;i>=0;i++))
		do
			## 取出之前通过git config --global filter..value0 xxx配置的数据
			value=`git config --global filter..value$i`
			if [[ ${#value} > 0 ]];then
				if [[ ! " ${illegal_contents[@]} " =~ " ${value} " ]]; then
					illegal_contents+=($value)
				fi
			else
				break
			fi
		done
		#read current repository filter values
		# 如: git@github.com:zhaoweiguo/knowledge.git
		remoteurl=`git config --get remote.origin.url`
		#process ssh style.
		git_regex="^(git@[^:]+)"
		http_regex="(://[^/]+)"
		if [[ $remoteurl =~ $git_regex ]];then
			git_host=`echo ${BASH_REMATCH[0]} | cut -b 5-`
		elif [[ $remoteurl =~ $http_regex ]];then
			git_host=`echo ${BASH_REMATCH[0]} | cut -b 4-`
		fi
		for ((i=0;i>=0;i++))
		do
			value=`git config --global filter."$git_host".value$i`
			if [[ ${#value} > 0 ]];then
				if [[ ! " ${illegal_contents[@]} " =~ " ${value} " ]]; then
					illegal_contents+=($value)
				fi
			else
				break
			fi
		done
	fi
}

function check_if_commitline_contains_illegal {
	content=$3
	read_all_illegal_contens
	illegal_contents_len=${#illegal_contents[@]}
	for ((i=0;i<$illegal_contents_len;i++))
	do
		illegal_content=${illegal_contents[$i]}
		if [[ $content == *$illegal_content* ]];then
			errno="$illegal_content"
			return 1
		fi
	done
	return 0
}

if git rev-parse --verify HEAD >/dev/null 2>&1
then
	against=HEAD
else
	# Initial commit: diff against an empty tree object
	against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
fi

# Redirect output to stderr.
exec 1>&2

## $diff_content的值与在命令行执行git diff head相同
## 注: 在命令行直接执行git diff --cached是空, 因为这时还没放入cached中
##    可以先执行git add再执行git diff --cached
diff_content=$(git diff --cached)
IFS=$'\n' read -rd '' -a diff_lines <<<"$diff_content"
diff_lines_cnt=${#diff_lines[@]}
cur_filename=""
cur_chunk=""
chunk_toline_from=0
chunk_toline_len=0
for ((j=0;j<$diff_lines_cnt;j++));do
	line=${diff_lines[j]}
	file_prefix="diff --git a/"
	chunk_regex="^(@@ [-+][0-9]+,[0-9]+ [-+][0-9]+,[0-9]+ @@)"
	## 第1行类似如下格式:
	## diff --git a/source/info.rst b/source/info.rst
	## 得到文件名为: cur_filename=source/info.rst
	if [[ $line == $file_prefix* ]];then
		file_prefix_len=${#file_prefix}
		cur_filename=`echo "$line" | cut -b $((file_prefix_len))-`
		cur_filename=`echo ${cur_filename/ b\/*/}`
	fi
	## 处理如下格式:
	## @@ -10,7 +10,7 @@ 内容内容sensitive内容内容
	## 得到: cur_chunk=@@ -31,4 +31,7 @@
	if [[ $line =~  $chunk_regex ]];then
		cur_chunk=${BASH_REMATCH[0]}
		chunk_fromline_from=0
		chunk_fromline_len=0
		read chunk_fromline_from chunk_fromline_len chunk_toline_from chunk_toline_len<<<${cur_chunk//[^0-9]/ }
	fi
	if [[ ${#cur_filename} != 0 ]] && [[ ${#cur_chunk} != 0 ]] && [[ $line == "+"* ]];then
		line=`echo "$line" | cut -b 2-`
		## cur_filename: 文件路径, 如:/source/info.rst
		## cur_chunk: 行范围, 如: @@ -10,7 +10,7 @@
		## line: 这一行内容, 如: 内容内容sensitive内容内容
		check_if_commitline_contains_illegal "$cur_filename" "$cur_chunk" "$line"
		exists=$?
		if [[ $exists == 1 ]];then
			echo ""
cat <<EOF
Error: Attempt to commit line
	$line
where illegal content:
	$errno
	is included.
To be allowed to commit,it is advisable to modify the file
	$cur_filename
at chunk
	line:$chunk_toline_from~line:$(($chunk_toline_from+$chunk_toline_len)).
EOF
		exit 1
		fi
	fi
	if [ $j == $((diff_lines_cnt-1)) ] || [[ ${diff_lines[j+1]} =~  $chunkRegex ]];then
		cur_chunk=""
	fi
	if [ $j == $((diff_lines_cnt-1)) ] || [[ ${diff_lines[j+1]} == $file_prefix* ]];then
		cur_filename=""
	fi
done
# If there are whitespace errors, print the offending file names and fail.
# 这一句是不让文件最后有多余的换行
exec git diff-index --check --cached $against --
[1]https://kangwang1988.github.io/tech/2016/07/08/filter-sensitive-words-pre-git-commit.html