Skip to content

Commit 5f05957

Browse files
[FEATURE] Improved performance and security of CHANGELOG generation script (- WIP PR #433 -)
Changes in file .github/workflows/CI-CHGLOG.yml: * fixed some minor overlooked mistakes found in review Changes in file generate_changelog.sh: * major performance improvements about 25 times faster now * refactored for security hardening
1 parent 1c5fa78 commit 5f05957

File tree

2 files changed

+118
-61
lines changed

2 files changed

+118
-61
lines changed

.github/workflows/CI-CHGLOG.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ jobs:
149149
defaults:
150150
run:
151151
shell: bash
152-
timeout-minutes: 15
152+
timeout-minutes: 8
153153
outputs:
154154
chglog_status: ${{ steps.gen_changelog.outcome || 'cancelled' }}
155155
artifact-id: ${{ steps.upload.outputs.artifact-id }}
@@ -167,13 +167,13 @@ jobs:
167167
persist-credentials: false
168168
ref: ${{ needs.check_build.outputs.sha }}
169169
sparse-checkout: '.github/actions/checkout-and-rebuild'
170-
- name: Checkout repository for chglog with ${{ matrix.python-version }}
170+
- name: Checkout repository for chglog
171171
id: fetch-build
172172
uses: ./.github/actions/checkout-and-rebuild
173173
with:
174174
sha: ${{ needs.check_build.outputs.sha }}
175175
build-run-id: ${{ needs.check_build.outputs.trigger_id }}
176-
python-version: ${{ matrix.python-version }}
176+
python-version: "${{ vars.PYTHON_DEFAULT }}"
177177
path: ${{ github.workspace }}
178178
- name: "Generate CHANGELOG for ${{ needs.check_build.outputs.sha }}...${{ needs.check_build.outputs.parent_sha }}"
179179
id: gen_changelog
@@ -219,7 +219,7 @@ jobs:
219219
id: load_chglog_info
220220
if: ${{ always() }}
221221
run: |
222-
mv -vf "${{ runner.temp }}/multicast-chglog-${{ needs.check_build.outputs.sha }}"/CHANGELOG.md" "${{ github.workspace }}/CHANGELOG.md" ;
222+
mv -vf "${{ runner.temp }}/multicast-chglog-${{ needs.check_build.outputs.sha }}/CHANGELOG.md" "${{ github.workspace }}/CHANGELOG.md" ;
223223
wait ;
224224
rmdir -v "${{ runner.temp }}/multicast-chglog-${{ needs.check_build.outputs.sha }}" || : ; # remove if able
225225
- name: "Report chglog status"
@@ -269,7 +269,7 @@ jobs:
269269
persist-credentials: false
270270
ref: ${{ needs.check_build.outputs.sha }}
271271
sparse-checkout: '.github/actions/check-control'
272-
- name: "Compleate chglog GitHub Check"
272+
- name: "Complete chglog GitHub Check"
273273
id: compleate_chglog
274274
if: ${{ (github.repository == 'reactive-firewall/multicast') && always() }}
275275
uses: ./.github/actions/check-control

generate_changelog.sh

Lines changed: 113 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,15 @@ check_command uniq ;
102102
check_command xargs ;
103103

104104
# rest of the script vars
105-
LOG_FILE="chglog_generation_${PPID}.log"
106-
ERR_FILE="chglog_generation_errors_${PPID}.log"
107-
LOCK_FILE="${TMPDIR:-/tmp}/org.pak.multicast.chglog-generation-shell"
105+
declare cache
106+
CHGLOG_GIT_LOG_CACHE_FILE="${TMPDIR:-/tmp}/.changelog_git_hist_buffer.txt"
107+
# shellcheck disable=SC2086
108+
# LOG_FILE="chglog_generation_${PPID}.log"
109+
# shellcheck disable=SC2086
110+
# ERR_FILE="chglog_generation_errors_${PPID}.log"
111+
# shellcheck disable=SC2086
112+
# LOCK_FILE="${TMPDIR:-/tmp}/org.pak.multicast.chglog-generation-shell"
113+
# shellcheck disable=SC2086
108114
EXIT_CODE=0
109115

110116
# USAGE:
@@ -114,16 +120,27 @@ EXIT_CODE=0
114120
# Results:
115121
# returns the most recent tag associated with the current commit.
116122
function id_current_tag() {
117-
local INPUT_HINT_FOR_CURRENT="${1}"
118-
git describe --tags --abbrev=0 ${INPUT_HINT_FOR_CURRENT}
123+
local INPUT_HINT_FOR_CURRENT="$1" ;
124+
# Define regular expression pattern (copied from the improved regex in GHI #273)
125+
local GIT_REF_PATTERN='^[a-zA-Z0-9][-a-zA-Z0-9_\+\./]*$' ;
126+
# Validate the input to ensure it matches expected tag patterns
127+
if printf "%s\n" "$INPUT_HINT_FOR_CURRENT" | grep -qE -e "$GIT_REF_PATTERN" 2>/dev/null; then
128+
# assume input is safe enough to use as a git ref
129+
git describe --tags --abbrev=0 "$INPUT_HINT_FOR_CURRENT" 2>/dev/null ;
130+
else
131+
# assume input is unsafe to use as a git ref an fallback to none,
132+
# e.g., same as no input
133+
git describe --tags --abbrev=0 2>/dev/null ;
134+
fi ;
119135
} # end id_current_tag()
120136

121137
# USAGE:
122138
# ~$ run_enumerate_tag_history
123139
# Results:
124140
# returns a list of tags that are ancestors of the current HEAD, excluding the current tag.
125141
function run_enumerate_tag_history() {
126-
local HEAD_TAG=$(id_current_tag HEAD)
142+
local HEAD_TAG
143+
HEAD_TAG="$(id_current_tag HEAD)"
127144
{ git rev-list --tags --ancestry-path "$(git tag --no-contains "${HEAD_TAG}" | sort -V | head -n1)^..HEAD" ; wait ;} | xargs -I{} git tag --points-at "{}" 2>/dev/null ;
128145
wait ;
129146
} # end run_enumerate_tag_history()
@@ -134,8 +151,12 @@ function run_enumerate_tag_history() {
134151
# returns a cached list of tags that are ancestors of the current HEAD, excluding development tags.
135152
# If the cache is empty, it populates the cache by calling run_enumerate_tag_history.
136153
function enumerate_tag_history() {
154+
if [[ -z "${cache}" ]] && [[ -r "${CHGLOG_GIT_LOG_CACHE_FILE}" ]]; then
155+
cache=$(cat <"${CHGLOG_GIT_LOG_CACHE_FILE}" ; wait;)
156+
fi ;
137157
if [[ -z "${cache}" ]] ; then
138-
cache=$(run_enumerate_tag_history | grep -vE "v?\d+.\d+.\d+-dev" ; wait;)
158+
{ run_enumerate_tag_history | grep -vE "v?\d+.\d+.\d+-dev" ; wait ;} >> "${CHGLOG_GIT_LOG_CACHE_FILE}" ; wait ;
159+
cache=$(cat <"${CHGLOG_GIT_LOG_CACHE_FILE}" ; wait;)
139160
fi ;
140161
printf "%s\n" "${cache}" ;
141162
wait ;
@@ -148,11 +169,74 @@ function enumerate_tag_history() {
148169
# Results:
149170
# returns the parent tag of the specified input tag or commit.
150171
function id_parent_tag() {
151-
local INPUT="${1}"
152-
enumerate_tag_history | grep -A 1 -F -f <(id_current_tag ${INPUT} ) | tail -n1
153-
wait ;
172+
local INPUT="$1" ;
173+
# Define regular expression pattern (copied from the improved regex in GHI #273)
174+
local GIT_REF_PATTERN='^[a-zA-Z0-9][-a-zA-Z0-9_\+\./]*$' ;
175+
local SANITIZED_TAG_PATTERN='[^-,\.\+_[:alnum:]]' ;
176+
# Validate the input to ensure it matches expected tag patterns
177+
if printf "%s\n" "$INPUT" | grep -qE -e "${GIT_REF_PATTERN}"; then
178+
# Capture the first line of the output from id_current_tag
179+
local first_tag ;
180+
first_tag=$(id_current_tag "$INPUT" | head -n1) ;
181+
local sanitized_tag ;
182+
# Sanitize the first_tag to ensure it only contains valid characters
183+
sanitized_tag=$(printf "%s\n" "$first_tag" | sed -E "s/$SANITIZED_TAG_PATTERN//g") ;
184+
# Check if the sanitized tag starts with "v", "V", "head", or "HEAD" (case insensitive)
185+
if printf "%s\n" "$sanitized_tag" | grep -qE -e "$GIT_REF_PATTERN" 2>/dev/null; then
186+
enumerate_tag_history | grep -A 1 -F -f <(printf "%s\n" "$sanitized_tag") | tail -n1 ;
187+
wait ;
188+
fi ;
189+
fi ;
154190
} # end id_parent_tag()
155191

192+
# USAGE:
193+
# ~$ format_changes_by_flag FLAG_NAME CHANGELOG_BUFFER_FILE
194+
# Arguments:
195+
# FLAG_NAME (Required) -- The flag type to filter and group (e.g., FEATURE, FIX)
196+
# CHANGELOG_BUFFER_FILE (Required) -- Path to the changelog buffer file
197+
# Results:
198+
# outputs formatted changelog entries grouped by the specified flag type
199+
function format_changes_by_flag() {
200+
local FLAG_NAME="${1}"
201+
local BUFFER_FILE="${2}"
202+
203+
# Validate inputs
204+
test -z "${FLAG_NAME}" && { printf "Error: FLAG_NAME is required\n" >&2 ; return 64 ; }
205+
test -z "${BUFFER_FILE}" && { printf "Error: BUFFER_FILE is required\n" >&2 ; return 64 ; }
206+
test -f "${BUFFER_FILE}" || { printf "Error: Buffer file not found\n" >&2 ; return 65 ; }
207+
test -r "${BUFFER_FILE}" || { printf "Error: Buffer file not found\n" >&2 ; return 77 ; }
208+
209+
# Execute the AWK processing
210+
awk -v RS='\n' -v ORS='\n\n\n' -v flagname="${FLAG_NAME}" '
211+
{
212+
# Check if the block contains a valid change-log entry
213+
if ($0 ~ /([\[][A-Z]+[]]){1}/ && $0 ~ flagname) {
214+
# Ensure that there is no content before the match
215+
if ($0 ~ /^[a-f0-9]{7,7}[[:space:]]*(([\[][A-Z]+[]]){1})/) {
216+
# Extract the header (first line) and the content
217+
header = $0;
218+
hash_line = substr(header, 1, 7); # Get the hash
219+
header_line = substr(header, index(header, "[") + 1, (index(header, "]") - index(header, "[")) - 1); # Get the header
220+
if (header_line ~ flagname) {
221+
content = " * " hash_line " --" substr(header, index(header, "]") + 1); # Get the content after the header
222+
# Combine the header and content
223+
combined[header_line] = (combined[header_line] ? combined[header_line] "\n" : "") content;
224+
}
225+
}
226+
}
227+
}
228+
END {
229+
# Print combined entries
230+
for (h in combined) {
231+
print h ":\n" combined[h]
232+
}
233+
}
234+
' <"${BUFFER_FILE}" | sort -id | uniq | sort -id -k 4
235+
} # end format_changes_by_flag()
236+
237+
# Export function for sub-shells
238+
export -f format_changes_by_flag
239+
156240
# step 1: is designed to determine the current and previous Git tags and
157241
# then construct a Git range based on these tags. If no argument is provided, it defaults to
158242
# using the tags.
@@ -166,15 +250,16 @@ if [[ -z "${1}" ]] ; then
166250
"${0}" "${FALLBACK_GIT_RANGE}" || : ; wait ;
167251
fi ;
168252
done;
253+
rm -f "${CHGLOG_GIT_LOG_CACHE_FILE}" || : ;
169254
unset FALLBACK_GIT_RANGE 2>/dev/null || : ;
170255
unset GIT_PREVIOUS_TAG 2>/dev/null || : ;
171256
exit 0 ;
172257
fi ;
173258
GIT_RANGE="${1}"
174259

175260
# cache the git full log
176-
CHANGELOG_BUFFER="${TMPDIR:-/tmp}/.chagelog_buffer.txt"
177-
cat <(git log "${GIT_RANGE}" --reverse --pretty=format:"COMMIT_START%n%h%n%B%nCOMMIT_END") >"${CHANGELOG_BUFFER}" ; wait ;
261+
CHANGELOG_BUFFER="${TMPDIR:-/tmp}/.changelog_buffer.txt"
262+
git log "${GIT_RANGE}" --reverse --pretty=format:"COMMIT_START%n%h%n%B%nCOMMIT_END" >"${CHANGELOG_BUFFER}" ; wait ;
178263

179264
RAW_FLAGS_LIST=$(cat <"${CHANGELOG_BUFFER}" | grep -oE "([\[][A-Z]+[]]){1}" | sort -id | uniq -c | sort -rid | grep -oE "([A-Z]+){1}" | sort -id | uniq | sort -rd ; wait ;)
180265

@@ -202,54 +287,26 @@ if [[ ( -n "${RAW_DEL_FILES}" ) ]] ; then
202287
fi ;
203288

204289
if [[ ( ${VERBOSE_CHANGE_LOG:-0} -gt 0 ) ]] ; then
205-
# flags sub-header
206-
printf "\n### Changes by Kind\n" ;
207-
208-
# cache the git log summaries with hashes
209-
CHANGELOG_BUFFER_SHORT="${TMPDIR:-/tmp}/.short chagelog_buffer.txt"
210-
cat <(git log "${GIT_RANGE}" --reverse --grep="([\[][A-Z]+[]]){1}" -E --pretty=format:"COMMIT_START%n%h %s%nCOMMIT_END") >"${CHANGELOG_BUFFER_SHORT}" ; wait ;
211-
212-
# auto-collect by flags
213-
for FLAG_ID in $(printf "%s\n" "${RAW_FLAGS_LIST}") ; do
214-
215-
if [[ ( -n "$FLAG_ID" ) ]] ; then
216-
217-
awk -v RS='\n' -v ORS='\n\n\n' -v flagname="$FLAG_ID" '
218-
{
219-
# Check if the block contains a valid change-log entry
220-
if ($0 ~ /([\[][A-Z]+[]]){1}/ && $0 ~ flagname) {
221-
# Ensure that there is no content before the match
222-
if ($0 ~ /^[a-f0-9]{7,7}[[:space:]]*(([\[][A-Z]+[]]){1})/) {
223-
# Extract the header (first line) and the content
224-
header = $0;
225-
hash_line = substr(header, 1, 7); # Get the hash
226-
header_line = substr(header, index(header, "[") + 1, (index(header, "]") - index(header, "[")) - 1); # Get the header
227-
if (header_line ~ flagname) {
228-
content = " * " hash_line " --" substr(header, index(header, "]") + 1); # Get the content after the header
229-
# Combine the header and content
230-
combined[header_line] = (combined[header_line] ? combined[header_line] "\n" : "") content;
231-
}
232-
}
233-
}
234-
}
235-
END {
236-
# Print combined entries
237-
for (h in combined) {
238-
print h ":\n" combined[h]
239-
}
240-
}
241-
' <"${CHANGELOG_BUFFER_SHORT}" | sort -id | uniq | sort -id -k 4
290+
# flags sub-header
291+
printf "\n### Changes by Kind\n" ;
242292

243-
fi ;
293+
# cache the git log summaries with hashes
294+
CHANGELOG_BUFFER_SHORT="${TMPDIR:-/tmp}/.short_changelog_buffer.txt"
295+
git log "${GIT_RANGE}" --reverse --grep="([\[][A-Z]+[]]){1}" -E --pretty=format:"COMMIT_START%n%h %s%nCOMMIT_END" >"${CHANGELOG_BUFFER_SHORT}" ; wait ;
244296

245-
done ;
297+
# auto-collect by flags
298+
for FLAG_ID in $(printf "%s\n" "${RAW_FLAGS_LIST}") ; do
299+
if [[ ( -n "$FLAG_ID" ) ]] ; then
300+
format_changes_by_flag "${FLAG_ID}" "${CHANGELOG_BUFFER_SHORT}"
301+
fi ;
302+
done ;
246303

247-
rm -f "${CHANGELOG_BUFFER_SHORT}" 2>/dev/null || : ;
248-
# files sub-header
249-
printf "\n### Changes by file\n" ;
304+
rm -f "${CHANGELOG_BUFFER_SHORT}" 2>/dev/null || : ;
305+
# files sub-header
306+
printf "\n### Changes by file\n" ;
250307
else
251-
# NO sub-headers
252-
printf "\n\n" ;
308+
# NO sub-headers
309+
printf "\n\n" ;
253310
fi ;
254311

255312
# auto-collect gitlog

0 commit comments

Comments
 (0)