summaryrefslogtreecommitdiffstats
path: root/site
diff options
context:
space:
mode:
authorThomas Letan <lthms@soap.coffee>2020-12-14 12:05:00 +0100
committerThomas Letan <lthms@soap.coffee>2020-12-14 15:42:52 +0100
commit1eaddf3b92360f9650b8c82de354949c48790e3c (patch)
treebbfc95c334b47ac7568fdaca41a54ada96432bc7 /site
parentAdvertise the use of cleopatra (diff)
Massive performance improvement for `history.sh'
Before: soupault 14.83s user 1.36s system 105% cpu 15.374 total After: soupault 3.37s user 0.77s system 106% cpu 3.871 total
Diffstat (limited to 'site')
-rw-r--r--site/cleopatra/soupault.org270
1 files changed, 131 insertions, 139 deletions
diff --git a/site/cleopatra/soupault.org b/site/cleopatra/soupault.org
index e7de9bf..98eb732 100644
--- a/site/cleopatra/soupault.org
+++ b/site/cleopatra/soupault.org
@@ -416,11 +416,6 @@ command = 'scripts/history.sh templates/history.html'
action = "replace_content"
#+END_SRC
-#+BEGIN_TODO
-This plugin should be reimplemented using ~libgit2~ or other ~git~ libraries, in
-a language more suitable than bash.
-#+END_TODO
-
This plugin proceeds as follows:
1. Using an ad-hoc script, it generates a JSON containing for each revision
@@ -445,165 +440,162 @@ function main () {
}
#+END_SRC
-The difficult part of this script is the definition of the =generate_json=
-function. From a high-level perspective, this function is divided into three
-steps.
-
-1. We get an initial (but partial) set of data about the ~git~ commit of
- ~${file}~, from the most recent to the oldest
-2. For each commit, we check whether or not ~${file}~ was renamed or not
-3. Finally, we output a result (because we are writing a bash script)
-
-#+BEGIN_SRC bash :tangle scripts/history.sh :noweb no-export
-function generate_json () {
- local file="${1}"
- local logs=`<<git-log>>`
-
- if [ ! $? -eq 0 ]; then
- exit 1
- fi
-
- <<remane-tracking>>
+Generating the expected JSON is therefore as simple as:
- <<result-echoing>>
-}
-#+END_SRC
+- Fetching the logs
+- Reading 8 line from the logs, parse the filename from the 6th
+ line
+- Outputing the JSON
-We will use ~git~ to get the information we need. By default, ~git~ subcommands
-use a pager when its output is likely to be long. This typically includes
-~git-log~. To disable this behavior, ~git~ exposes the ~--no-pager~ command.
-We introduce =_git=, a wrapper around ~git~ with the proper option.
+We will use ~git~ to get the information we need. By default, ~git~
+subcommands use a pager when its output is likely to be long. This
+typically includes ~git-log~. To disable this behavior, ~git~ exposes
+the ~--no-pager~ command. Besides, we also need ~--follow~ and
+~--stat~ to deal with file renaming. Without this option, ~git-log~
+stops when the file first appears in the repository, even if this
+“creation” is actually a renaming. Therefore, the ~git~ command line
+we use to collect our history is
-#+BEGIN_SRC bash :tangle scripts/history.sh
-function _git () {
- git --no-pager "$@"
+#+NAME: gitlog
+#+BEGIN_SRC bash :tangle scripts/history.sh :noweb yes
+function gitlog () {
+ local file="${1}"
+ git --no-pager log \
+ --follow \
+ --stat=10000 \
+ --pretty=format:'%s%n%h%n%H%n%cs%n' \
+ "${file}"
}
#+END_SRC
-Afterwards, we use =_git= in place of ~git~.
-
-Using the ~git-log~ ~--pretty~ command-line argument, we can generate
-one JSON object per commit which contains most of the information we need, using
-the following format string.
-
-#+NAME: pretty-format
-#+BEGIN_SRC json
-{ "subject" : "%s", "abbr_hash" : "%h", "hash" : "%H", "date" : "%cs" }
-#+END_SRC
-
-Besides, we also need ~--follow~ to deal with file renaming. Without this
-option, ~git-log~ stops when the file first appears in the repository, even if
-this “creation” is actually a renaming. Therefore, the ~git~ command line we
-use to collect our initial history is
-
-#+NAME: git-log
-#+BEGIN_SRC bash :noweb no-export
-_git log --follow --pretty=format:'<<pretty-format>>' "${file}"
-#+END_SRC
-
-To manipulate JSON, we rely on three operators (yet to be defined):
-
-- =jget OBJECT FIELD= ::
- In an =OBJECT=, get the value of a given =FIELD=
-- =jset OBJECT FIELD VALIE= ::
- In an =OBJECT=, set the =VALUE= of a given =FIELD=
-- =jappend ARRAY VALUE= ::
- Append a =VALUE= at the end of an =ARRAY=
+This function will generate a sequence of 8 lines containing all the
+relevant information we are looking for, for each commit, namely:
-#+NAME: remane-tracking
-#+BEGIN_SRC bash :noweb no-export
-local name="${file}"
-local revisions='[]'
-local first=0
+- Subject
+- Abbreviated hash
+- Full hash
+- Date
+- Empty line
+- Change summary
+- Shortlog
+- Empty line
-while read -r rev; do
- rev=$(jset "${rev}" "filename" "\"${name}\"")
+For instance, the =gitlog= function will output the following lines
+for the last commit of this very file:
- if [ ${first} -eq 0 ]; then
- rev=$(jset "${rev}" "modified" "true")
- first=1
- fi
-
- revisions=$(jappend "${revisions}" "${rev}")
-
- local hash=$(jget "${rev}" "hash")
- local rename=$(previous_name "${name}" "${hash}")
-
- if [[ ! -z "${rename}" ]]; then
- name=${rename}
- fi
-done < <(echo "${logs}")
-
-revisions=$(_jq "${revisions}" "length as \$l | .[\$l - 1].created |= true")
+#+BEGIN_SRC bash :results verbatim :exports results :noweb yes
+<<gitlog>>
+gitlog "soupault.org" | head -n8
#+END_SRC
-#+BEGIN_SRC bash :tangle scripts/history.sh
-function previous_name () {
- local name=${1}
- local hash=${2}
+Among other things, the 6th line contains the filename. We need to
+extract it, and we do that with ~sed~. In case of file renaming, we
+need to parse something of the form ~both/to/{old => new}~.
- local unfold='s/ *\(.*\){\(.*\) => \(.*\)}/\1\2 => \1\3/'
+#+BEGIN_SRC bash :tangle scripts/history.sh :noweb yes
+function parse_filename () {
+ local line="${1}"
+ local shrink='s/ *\(.*\) \+|.*/\1/'
+ local unfold='s/\(.*\){\(.*\) => \(.*\)}/\1\3/'
- _git show --stat=10000 ${hash} \
- | sed -e "${unfold}" \
- | grep "=> ${name}" \
- | xargs \
- | cut -d' ' -f1
+ echo ${line} | sed -e "${shrink}" | sed -e "${unfold}"
}
#+END_SRC
-#+NAME: result-echoing
-#+BEGIN_SRC bash :noweb no-export
-jset "$(jset "{}" "file" "\"${file}\"")" \
- "history" \
- "${revisions}"
-#+END_SRC
-
-The last missing pieces are the definitions of the three JSON operators. We use
-[[https://stedolan.github.io/jq/][~jq~]] to manipulate JSON data. Since ~jq~
-processes JSON from its standard input, we first define a helper (similar to
-=_git=) to deal with JSON from variables seamlessly.
+The next step is to process the logs to generate the expected JSON. We
+have to deal with the fact that JSON does not allow the last item of
+an array to be concluded by ",". Besides, we also want to indicate
+which commit is responsible for the creation of the file. To do that,
+we use two variables: =idx= and =last_entry=. When =idx= is equal to
+0, we know it is the latest commit. When =idx= is equal to
+=last_entry=, we know we are looking at the oldest commit for that
+file.
-#+BEGIN_SRC bash :tangle scripts/history.sh
-function _jq () {
+#+BEGIN_SRC bash :tangle scripts/history.sh :noweb yes
+function generate_json () {
local input="${1}"
- local filter="${2}"
-
- echo "${input}" | jq -jcM "${filter}"
-}
-#+END_SRC
-
-- *-j* tells ~jq~ not to print a new line at the end of its outputs
-- *-c* tells ~jq~ to print JSON in a compact format (rather than prettified)
-- *-M* tells ~jq~ to output monochrome outputs
-
-Internally, =jget=, =jset=, and =jappend= are implemented with ~jq~
-[[https://stedolan.github.io/jq/manual/#Basicfilters][basic filters]].
-
-#+BEGIN_SRC bash :tangle scripts/history.sh
-function jget () {
- local obj="${1}"
- local field="${2}"
+ local logs="$(gitlog ${input})"
- _jq "${obj}" ".${field}"
-}
-
-function jset () {
- local obj="${1}"
- local field="${2}"
- local val="${3}"
+ if [ ! $? -eq 0 ]; then
+ exit 1
+ fi
- _jq "${obj}" "setpath([\"${field}\"]; ${val})"
+ let "idx=0"
+ let "last_entry=$(echo "${logs}" | wc -l) / 8"
+
+ local subject=""
+ local abbr_hash=""
+ local hash=""
+ local date=""
+ local file=""
+ local created="true"
+ local modified="false"
+
+ echo -n "{"
+ echo -n "\"file\": \"${input}\""
+ echo -n ",\"history\": ["
+
+ while read -r subject; do
+ read -r abbr_hash
+ read -r hash
+ read -r date
+ read -r # empty line
+ read -r file
+ read -r # short log
+ read -r # empty line
+
+ if [ ${idx} -ne 0 ]; then
+ echo -n ","
+ fi
+
+ if [ ${idx} -eq ${last_entry} ]; then
+ created="true"
+ modified="false"
+ else
+ created="false"
+ modified="true"
+ fi
+
+ output_json_entry "${subject}" \
+ "${abbr_hash}" \
+ "${hash}" \
+ "${date}" \
+ "$(parse_filename "${file}")" \
+ "${created}" \
+ "${modified}"
+
+ let idx++
+ done < <(echo "${logs}")
+
+ echo -n "]}"
}
-function jappend () {
- local arr="${1}"
- local val="${2}"
+#+END_SRC
- _jq "${arr}" ". + [ ${val} ]"
+Generating the JSON object for a given commit is as simple as
+
+#+BEGIN_SRC bash :tangle scripts/history.sh :noweb yes
+function output_json_entry () {
+ local subject="${1}"
+ local abbr_hash="${2}"
+ local hash="${3}"
+ local date="${4}"
+ local file="${5}"
+ local created="${6}"
+ local last_entry="${7}"
+
+ echo -n "{\"subject\": \"${subject}\""
+ echo -n ",\"created\":${created}"
+ echo -n ",\"modified\":${modified}"
+ echo -n ",\"abbr_hash\":\"${abbr_hash}\""
+ echo -n ",\"hash\":\"${hash}\""
+ echo -n ",\"date\":\"${date}\""
+ echo -n ",\"filename\":\"${file}\""
+ echo -n "}"
}
#+END_SRC
-Everything is defined. We can call =main= now.
+And we are done! We can safely call the =main= function to generate
+our revisions table.
#+BEGIN_SRC bash :tangle scripts/history.sh
main "$(cat)" "${1}"