" ${file}.html | sed -e 's#<title>##g'| sed -e 's#

#!/bin/sh # html2page # scottm, 2009/07/30 # Extracts data from .html files to .page files for webgen. # This allows OpenOffice.org presentations to be converted to a webgen site. # Contents of .page files could also be used for a content management site. # run from cygwin, or linux # .page files are overwritten # Read .html files that OpenOffice.org Impress generated for each slide #for file in `ls img*.html text*.html|awk -F\. '{print $1}'` echo "Generating .page files..." for file in `ls *.html|sed -e 's/.html//'` do #generate title section from slide title export title=`grep "" ${file}.html | sed -e 's#<title>##g'| sed -e 's###g'` echo "---" > ${file}.page echo "title: ${title}" >> ${file}.page echo "---" >> ${file}.page #strip out extra html lines from top an bottom cat ${file}.html |grep -v DOCTYPE |grep -v "html>" | grep -v "transitional.dtd"| grep -v "" |grep -v ""| grep -v ""|grep -v ">${file}.page done echo "cleaning .page files..." # get rid of the closing body tag (may not be on a line by itself) sed -e 's###' -i *.page echo "adding tags..." # add some line breaks to the img version to make the menu look better sed -e 's#\#br/>