news-maker.sh

#!/bin/bash
# news-maker.bash - Automatically creates a NEWS file about a project, as long as the Project Admin has published them on GNU Savannah
# Copyright © 2015-2016 Michael Pagan
#
# Author: Michael Pagan
# E-Mail: michael.pagan@member.fsf.org
# Jabber: pegzmasta@member.fsf.org
#
# This file is NOT part of Genshiken.
#
# news-maker.bash is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# news-maker.bash is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with news-maker.bash. If not, see http://www.gnu.org/licenses/.

# Sanity check
set -e

# These variables are provided in the Makefile
# NOTE: NAME is either the system name OR proper name of the project defined on GNU Savannah
NAME=$1
AUTHOR=$2
COPYRIGHT=$(eval $3)

# URL for GNU Savannah (if project is nongnu, we'll re-direct)
PROJ_HOME=https://savannah.gnu.org/

# Let's search for the project location online
echo -e "Retrieving information on project \"$NAME\" ...\n"
curl -d "words=$NAME&type_of_search=soft&Search=Search&exact=1" ${PROJ_HOME}search/ > query-url.html
pkg_path=$(lynx -source query-url.html |\
                  sed -e '1,/<h3>Unique project search/ d' -e '/<h3>Search results/,$ d' | sed -n "/.*href=.*projects.*>$NAME.*/ p" |\
                  sed -e "s:.*\(projects.*$NAME\).*:\1:" -e 's:.>.*::' 2> /dev/null)
pkg_path=${pkg_path:-projects/$NAME}

# Does project NAME really exist?
exists=$(lynx -dump ${PROJ_HOME}${pkg_path} |\
                sed -n '/[ ]*Error:/,/[ ]*Invalid Group: That group does not exist./ p' |\
                sed -n '1 p' |\
                sed -e 's/[ ]*//' -e 's/://')
[[ $exists = "Error" ]] && { echo -e "\n\e[31mError:\e[0m Sorry.  Project \"$NAME\" does not exist!" >&2; exit 1; }

# Add a header to our NEWS file
echo -e "\nAdding header ..."
echo -e "# -*- Mode: Org -*-\n#+TITLE: $NAME - News\n$(sed -n '2,3 p' TODO)\n#+STARTUP:  content" > NEWS

# Add a short description to our NEWS file
echo "Adding description ..."
lynx -dump query-url.html |\
    sed -e "1,/Unique project search result for $NAME:/ d" \
        -e "/Search results for $NAME/,$ d" \
        -e 's/\[.*\]//' \
        -e 's/^[ ]*//' |\
    sed -e '1,3 s/^\w* //' |\
    sed -e '/Description.*/ { s/.*/&\n/ }' -e '2 s/^\(.\)/\* \1/' -e '3,$ s/^\(.\)/  \1/' 2> /dev/null >> NEWS && \
    rm query-url.html; echo -e "* News" >> NEWS

# A link to an archive, containing all the news articles on GNU Savannah for said project
NEWS_ARCHIVE=${PROJ_HOME}$(lynx -source ${PROJ_HOME}${pkg_path} |\
                                  sed -n '/news in archive/ p' |\
                                  sed 's:.*\(/news/.group_id=[0-9]*\).>.*:\1:' 2> /dev/null)

# An array containing footnotes to all news articles for said project
FOOTNOTES=$(lynx -dump $NEWS_ARCHIVE |\
                    sed -e '1,/Latest News Approved/ d' -n -e 's/^[ ]*\[.*/&/p' |\
                    sed -e '/\[Read more\]/ d' -e '/[ ]-[ ][0-9]*[ ]repl[iesy]/ d' \
                        -e '/\[Submit News\]/ d' \
                        -e '/Back to the top/ d' \
                        -e '/\[[0-9]*\]Source/ d' \
                        -e '/\[[0-9]*\]Powered by/ d' \
                        -e '/[0-9]*news in archive/ d' \
                        -e 's/\(\[[0-9]*\][^ ]*\).*/\1/' \
                        -e 's/.*\[\(.*\)\].*/\1\\./' 2> /dev/null)

# An array of all HTML links that are attributed to the $NEWS_ARCHIVE link
ALL_LINKS=$(lynx -dump $NEWS_ARCHIVE |\
                   sed -e '1,/Latest News Approved/ d' -n -e 's/^[ ]*[0-9].*/&/p' |\
                   sed -e '/\[Read more\]/ d' -e '/[ ]-[ ][0-9]*[ ]repl[iesy]/ d' \
                       -e '/\[Submit News\]/ d' \
                       -e '/Back to the top/ d' \
                       -e '/\[[0-9]*\]Source/ d' \
                       -e '/\[[0-9]*\]Powered by/ d' \
                       -e '/[0-9]*news in archive/ d' 2> /dev/null)

# An array containing the title of each news article for said project
TITLES=$(lynx -dump $NEWS_ARCHIVE |\
                sed -e '1,/Latest News Approved/ d' -n -e 's/^[ ]*\[.*/&/p' |\
                sed -e '/\[Read more\]/ d' -e '/[ ]-[ ][0-9]*[ ]repl[iesy]/ d' \
                    -e '/\[Submit News\]/ d' \
                    -e '/Back to the top/ d' \
                    -e '/\[[0-9]*\]Source/ d' \
                    -e '/\[[0-9]*\]Powered by/ d' \
                    -e '/[0-9]*news in archive/ d' |\
                sed -e 's/^[ ]*\[[0-9]*\]//' -e 's/^\(.\)/\*\* \1/' -e 's/\[.*\]//' -e 's/[ ]posted by.*//' 2> /dev/null)

# For each article we find, we will append it to the end of our NEWS file (which we will format later)
for footnote in $(echo "$FOOTNOTES")
do
  # Let's keep track of each iteration for reference
  count=${count:-0}; let count+=1
  total=$(wc -l <<< "$FOOTNOTES")
  article=$(sed -n "$count p" <<< "$TITLES")
  link=$(sed -n "s/$footnote/&/p" <<< "$ALL_LINKS" | sed 's/[0-9]*\.[ ]//')

  # If there are no news articles published for this project, then let the user know about it
  [[ $total -eq 0 ]] && { rm NEWS; echo -e "\n\e[31mError:\e[0m No new articles can be found on project \"$NAME\"" >&2; exit 1; }

  # Let's be grammatically correct, shall we?
  [[ $total -eq 1 ]] && plural=article || plural=articles

  # Which article are we working on again?
  [[ $count -eq 1 ]] && echo -e "There is a total of #[$total] $plural about \"$NAME\"\nFetching $plural ..."
  echo "Appending article #[$count], with title: $article"
  echo "$article" >> NEWS

  # We shall parse the output, in order to append only the body of the article
  case $4 in
      --del-comments)
          lynx $link -dump |\
              sed -e '1,/\[[0-9]* news in archive/ d' -e '/\[[0-9]*\]Comments:/,$ d' -e 's/\[.*\]//' |\
              sed -e '1,2 d' 2> /dev/null >> NEWS
          ;;
      --add-comments)
          lynx $link -dump |\
              sed -e '1,/\[[0-9]* news in archive/ d' -e 's/\[.*\]//' -e '/Change View/ d' -e '/Back to the top/,$ d' \
                  -e '/Comments:/,$ s/.*/   &/' \
                  -e 's/\(.*\)\(Comments:\)/\*\*\* \2/' \
                  -e '/(posted by/ N; s/\n[ ]*/ /' \
                  -e 's/\(^[ ]*\)\(.*(posted by.*)\)/\*\*\*\* \2/' \
                  -e 's/^[ ]\{6,\}/     /' \
                  -e '/\*\{4,\}.*/ { N; s/\n// }' \
                  -e 's/\(.*(posted by.*UTC)\)\([ ]*\)\([^ ]$*\)/\1\n     \3/' \
                  -e 's/posted by \[.*\]/posted by /' -e 's/\(.*No messages in \)\(.*\)/\1\[\[\2\][\2]\]/' |\
              sed -e '1,2 d' 2> /dev/null >> NEWS
          ;;
  esac
done
# Remove excess spaces
sed -i 's/^[ ]*$//' NEWS

# Add blank lines in specific places, to allow for proper paragraph structure
sed -i -e '/.*[\."]$/ { s/[ ]*.*/&\n/ }' \
       -e '/[ ]*http.*$/ { s_.*http://.*$_&\n_ }' \
       -e '/.*: .*tar.gz.*$/ { s_.*tar.gz.*$_&\n_ }' NEWS

# Ensure there are no extra blank lines
sed -i '/^$/ N; /^\n$/ D' NEWS

# PART 1 - I really don't like it when hyperlinks are cut-off!  Let's fix that ...
# NOTE: I'll be adding some extra blank lines, to separate links from text
NEWS=$(< NEWS)
sed -e '/^[ ]*http:.*/ N; s/\n//; /^[ ]*http:.*/ { N; s/\n// }' \
    -e '/^[ ]*http:.*/ s/\(^[ ]*http:[^ ]*\)[ ]*\([^ ]*\)$/\1\2/; s/\(^[ ]*http:[^ ]*\)\([ ]*.*\)/\1\n\2/' <<< "$NEWS" |\
    sed 's/^[ ]*http:.*/&\n/' > NEWS

# Ensure there are no extra blank lines, again!
sed -i '/^$/ N; /^\n$/ D' NEWS

# Ensure that all comments are lined up
sed -i -e '/\*\*\* Comments:/ N; s/\n//; /\*\*\* Comments:/ N; s/[ ]\{5,\}/    /' NEWS

# PART 2 - Still aligning hyperlinks properly ...
#
#  NOTE: Fixxed several errors created by the above sed script, including: replacing links
#+ to proper positions; adjusting moved `Comments:` headers; fixxing any subheaders that had their
#+ asterisks misplaced; and finally, removing excess spaces
NEWS=$(< NEWS)
sed -e 's/\*\*\*Comments:/\n\n\*\*\* Comments:/' \
    -e 's/\(http:[^ ]*\)\(http:[^ ]*\)/\1\n   \2/' \
    -e '/.*\*\*$/,/\*\*\* Comments:$/ { s/^ [^\* ]/\*\*&/ }' \
    -e 's/\*\*$//' <<< "$NEWS" |\
    sed -e '/^[ ]*http:.*-$/ { N; s/\n// }' \
        -e '/^[ ]*http:.*-$/ { N; s/-\n/-/ }' \
        -e 's/\(http:[^ ]*-\)[ ]*\([^ ]*$\)/\1\2/' \
        -e '/^[ ][^ ]/ { s/.*/  &/; s/\(^[ ]*[^ ]*\)[ ][ ]\(.*\)/\1\2/; N; s/\n// }' \
        -e '/\*\*[ ].*/,/\*\*\* Comments:$/ { s/\*\*[ ](/   (/ }' \
        -e 's/^[ ][^ ]/  &/' \
        -e 's/\([^ ]\)   \([^ ]\)/\1\n   \2/g' |\
    sed -e '/\*\*\*\*/ !{ s/^[ ]\{4,\}/ &/ }' \
        -e 's/[ ]\{4,\}/\n     /g' \
        -e 's/\**$//' \
        -e 's/\[[0-9]*\]//' |\
    sed -e '/Comments:/ { N; s/\n// }' -e 's/\([ ]*\)\(No messages in .*\)/    \2/' > NEWS

# PART 3 - Fixes links that that have newlines within them (only fixes them if they are within carrots `<http-link-in-carrots>`)
sed -i '/^[ ]*<http:[^>]*$/ { N; s/\n//; N; s/\n//; s/\(^[ ]*<http:[^ ]*\)[ ]*\([^ ]*\)/\1\2/ }' NEWS

# Create a link for the user to find documentation on this project
sed -i -e "/\* Desc.*/,/\*.*/ { /[^-]GNU s.*$/ { N; s/\(.GNU.*\)\n[ ]/\1/; s_\(GNU.*\)\n_\[\[${PROJ_HOME}${pkg_path}\]\[\1\]\]\n_ }}" \
       -e "/\* Desc.*/,/\*.*/ { /non-GNU.*$/ { N; s/\(.*non-GNU.*\)\n[ ]/\1/; s_\(non-GNU.*\)\n_\[\[${PROJ_HOME}${pkg_path}\]\[\1\]\]\n_ }}" \
       -e '/\* Desc.*/,/\*.*/ { /\[\[.*/ { N; s/[ ]\(\[\[.*\)/\n  \1/ }}' NEWS

# Add Org formats to all paragraphs in order to look well if exported to html
sed -i -e '/^\* D.*/ { N; s/\n/&#+BEGIN_QUOTE/ }' \
       -e '/^\* D.*/,/^$/ { s/^$/&#+END_QUOTE/ }' \
       -e '/^\*\*[^\*].*/ { N; s/\n/&#+BEGIN_QUOTE/ }' \
       -e 's/\*\*\*.*/#+END_QUOTE\n&/' NEWS

# Align the first reply of any comment, properly
sed -i 's/\(\*\*\* Comments:\)\(\*\*\*\*.*\)/\1\n\2/' NEWS

# Remove blank lines after any comment subheading
sed -i -e '/Comments:/ { N; s/\n// }' -e '/Comments:/ { s/\(Comments:\)\([ ].*\)/\1\n\2/ }' NEWS

# Ensure our header is in place
sed -i '3 { N; s/\n// }' NEWS

# Append Copyright information to our file
echo -e "$COPYRIGHT" >> NEWS

# We're done!
echo "NEWS file complete!"

# End:
# news-maker.bash ends here