#!/bin/sh

# html2page
# scottm, 2009/07/30
# Extracts data from .html files to .page files for webgen.
# This allows OpenOffice.org presentations to be converted to a webgen site.
# Contents of .page files could also be used for a content management site.
# run from cygwin, or linux
# .page files are overwritten

# Read .html files that OpenOffice.org Impress generated for each slide
#for file in `ls img*.html text*.html|awk -F\. '{print $1}'`
echo "Generating .page files..."
for file in `ls *.html|sed -e 's/.html//'`
do 
	#generate title section from slide title
	export title=`grep "<title>" ${file}.html | sed -e 's#<title>##g'| sed -e 's#</title>##g'`
	echo "---" > ${file}.page
	echo "title: ${title}" >> ${file}.page
	echo "---" >> ${file}.page
	#strip out extra html lines from top an bottom
	cat ${file}.html |grep -v DOCTYPE |grep -v "html>" | grep -v "transitional.dtd"| grep -v "<head>" |grep -v "<meta" | grep -v "<link" | grep -v "</head>"| grep -v "<meta" | grep -v "<title>"|grep -v "<body"  >>${file}.page
done
echo "cleaning .page files..."
# get rid of the closing body tag (may not be on a line by itself)
sed -e 's#</body>##' -i *.page

echo "adding tags..."
# add some line breaks to the img version to make the menu look better
sed -e 's#\<br\>#br/><br/><br/><br/#' -i img*.page

# resize picasa files
sed -e 's/width="640" height="479"/width="540" height="404"/g' -i target*.page

#optional
unix2dos *.page

echo "done."

