#!/bin/sh
# simple script that calls wget to save webpage
# Usage: savepage.sh <url> <name>
#
# will create a subdir of that name and update index.html in $PWD
# deepak - n.deepak@gmail.com - http://puggy.symonds.net/~deep/

USAGE="USAGE: $0 <url> <name>"
if [ "$1" == "" -o "$2" == "" ] ; then
	echo $USAGE
	exit 1
fi

#COOKIE_FILE="~/public_html/webpages/cookies.txt"
#COOKIE_OPTS="--load-cookies $COOKIE_FILE --save-cookies $COOKIE_FILE --keep-session-cookie"

URL=$1
NAME="$2"

# make dir
mkdir -p "$NAME"
cd "$NAME"

# save page
wget -E -H -k -K -p -t 5 --restrict-file-names=windows $COOKIE_OPTS "$URL"

cd -

# update index.html
# you will need to edit it and remove links to other (non-main) html files
INDEX=index.html
for file in `find "$NAME" -name "*.html" -or -name "*.htm"` ; do
	# esc some chars to use in URL link
	LINKNAME=`echo "$file" | sed -e 's/%/%25/g'`
	# remove dir components, make it 8-chars.html
	FILENAME=`echo "$file" | awk -F'/' '{ print substr($NF, length($NF)-12) }'`
	echo "$NAME | <a href=\"$URL\">bookmark</a> | <a href=\"$LINKNAME\">$FILENAME</a> | " >> $INDEX
	# add save date, just fyi
	echo `date` >> $INDEX
	echo "<br><br>" >> $INDEX
done

# prevent dir listing
find "$NAME" -type d -exec chmod go-r {} \;

exit 0

