Gather content from a website

Snarf the Gazette local news website

Gather content from the Colorado Springs Gazette news website, convert to iSilo format and install on Palm Pilot.  It also creates a website on your hard drive, for off-line viewing. This exec32 script demonstrates how to use the web clipping function in the exec32 Desktop Scripting Language to gather dynamic information from a website, striping out the "noise", and re-packaging the content for use on a handheld device.

' -----------------------------------------------------------------------------
' Script     : Snarf Gazette.e
' Description: Snarf the Gazette local news website, convert to iSilo format
'              and install on Palm Pilot. It also creates a website on your hard
'              drive, for off-line viewing. This exec32 script demonstrates how
'              to use the web clipping function in the exec32 Desktop Scripting
'              Language to gather dynamic information from a website, striping
'              out the "noise", and re-packaging the content for use on a handheld
'              device.
'
'              You can download the exec32 Desktop Scripting Language from the
'              exec32 website at:  
'              http://home.earthlink.net/~rschoolfbn/exec32/index.htm
'
' Assumptions: C:Temp exists.  You have iSilo commandline converter software
'              installed in D:Program FilesiSiloWeb. You have a batch file
'              "..Batgazette.bat".  Here is the contents of the batch file:
' @echo off
' "D:Program FilesiSiloWebiSiloC32.exe" -y -I -Idef "c: empgazette.htm" "c: empGazette.pdb" -i"Gazette Local News"
' rem pause
'
' Date       : 02-20-2001
' Author     : Robert Schoolfield, rschoolf@hotmail.com
' Changes    :
' Copyright  : (C) Copyright 2000, by Robert Schoolfield.  All Rights Reserved.
'              This script is released to the Public Domain, and may not be re-
'              sold.
' -----------------------------------------------------------------------------

sub main()
' ----------------------------------------------------------------------------------

'trace

dim r, tmp

' --- change directories to the exec32 Scripts directory...
tmp = getdir("rundir") & "Scripts"
cd tmp

' --- check to see if we have done run today, and if so, bail...
if done_run() then
 tmb "The snarf has been done run today."
 exit sub
end if

' --- retrieve each local news item to a random file name, prepend an entry for each in the
' control file...
snarf_todays_items

' --- read through the control file, and create a web site...
create_website

' --- convert the website to an iSilo file, install it on my Palm via HotSync, and
' place a copy on a server for other to access...
convert_to_isilo

end sub

function strip_html(thehtml)
' -----------------------------------------------------------------------------------
' --- Given a string, stript out the HTML markup...

dim tmp, r, i, buf, intag, ch, eatlist

eatlist = chr(10) & chr(13)

for i = 1 to len(thehtml)
 ch = mid(thehtml, i, 1)
 if ch = "<" then
   intag = true
 elseif ch = ">" then
   intag = false
 elseif not intag then
   if instr(eatlist, ch) < 1 then  
     buf = buf & ch
   end if
 end if
next

strip_html = buf

end function

sub snarf_todays_items
' -----------------------------------------------------------------------------------
' --- retrieve each local news item to a random file name, prepend an entry for each
' in the control file, and finish up with a date line.  Web clipping works on the
' principle of predictable begin and end strings around the desired content...

dim controlfile, buf, tmp, theclip, headline, itemfile, thedate
dim fp, r, i, p, q

q = chr(34)

' --- get the local daily news...
for i = 1 to 8
 theclip = clipping("http://www.gazette.com/daily/loc" & trim(i) & ".html", "<!headline>", "<!-- end of storie")

 if len(trim(theclip)) then
   write_the_clip(theclip)
 else
   exit for
 end if
next

' --- get the daily life...
for i = 1 to 8
 theclip = clipping("http://www.gazette.com/daily/life" & trim(i) & ".html", "<!headline>", "<!-- end of storie")

 if len(trim(theclip)) then
   write_the_clip(theclip)
 else
   exit for
 end if
next

' --- get the daily opinion...
for i = 1 to 3
 theclip = clipping("http://www.gazette.com/daily/opin" & trim(i) & ".html", "<!headline>", "<!-- end of storie")

 if len(trim(theclip)) then
   write_the_clip(theclip)
 else
   exit for
 end if
next

' --- add a date line to the control file...
thedate = date
prepend thedate, "", ""

end sub

function gen_rand_name()
' -----------------------------------------------------------------------------------
' --- generate a random webpage name...

dim randint

randint = Int(Rnd * (9999 - 1000 + 1)) + 1000
gen_rand_name = "c: emp" & Left(Trim(randint) & Trim(Int(Timer)), 8) & ".htm"

end function

sub prepend (thedate, itemfile, headline)
' -----------------------------------------------------------------------------------
' --- add a line to the top of the control file...

dim r, buf, fp1, fp2, tmpfile, controlfile

controlfile = "c: empgazette.ctl"
tmpfile = "c: empgazette.tmp"

fp1 = fopen(tmpfile, "output")

buf = thedate & "~" & itemfile & "~" & headline & "~"
r = fprint(fp1, buf)

fp2 = fopen(controlfile, "input")

do
 r = finput(fp2, buf)
 if r < 0 then exit do
 r = fprint(fp1, buf)
loop until r < 0

fclose fp1
fclose fp2

copy tmpfile & " " & controlfile

end sub

sub create_website
' -----------------------------------------------------------------------------
' --- read through the control file, and create a web site...

dim tmp, homefile, fp1, buf, fp2, headline, thedate, controlfile
dim itemfile
dim p, r, q, i

q = chr(34)
controlfile = "c: empGazette.ctl"
homefile = "c: empGazette.htm"

fp1 = fopen(controlfile, "input")

fp2 = fopen(homefile, "output")

buf = "<html>"
r = fprint(fp2, buf)

buf = "<title>Gazette Local News</title>"
r = fprint(fp2, buf)

buf = "<body>"
r = fprint(fp2, buf)

buf = "<h2>Gazette Local News</h2>"
r = fprint(fp2, buf)

buf = "<hr>"
r = fprint(fp2, buf)

buf = "<ul>"
r = fprint(fp2, buf)

for i = 1 to 40
 
 r = finput(fp1, buf)
 
 if r <> 0 then exit for

 thedate = subtoken(buf, 1, "~")
 itemfile = subtoken(buf, 2, "~")  
 headline = subtoken(buf, 3, "~")

 logmsg "thedate: " & thedate
 logmsg "itemfile: " & itemfile
 logmsg "headline: " & headline

 if len(thedate) then
   buf = "</ul><p>"
   r = fprint(fp2, buf)

   buf = "News from " & thedate & "<p>"
   r = fprint(fp2, buf)

   buf = "<ul>"
   r = fprint(fp2, buf)

 else
   if len(itemfile) > 0 then
     ' --- make a link to it...
     buf = "<li> <a href=" & q & itemfile & q & ">" & headline & "</a><br>"
     r = fprint(fp2, buf)
   end if
 end if    

next

buf = "</ul>"
r = fprint(fp2, buf)

buf = "<p>News items snarfed from the Colorado Springs Gazette Telegraph by exec32, written by Robert Schoolfield.<br>Version 1.5<p>"
r = fprint(fp2, buf)

buf = "</body></html>"
r = fprint(fp2, buf)

fclose fp1
fclose fp2

end sub

function subtoken(thestr, elem, delim)
' ---------------------------------------------------------------------------------------
' --- retrieve elements in a delimited string...

dim tmp, buf, thestrc, p, i

thestrc = thestr

for i = 1 to elem
 p = instr(thestrc, delim)
 if p > 0 then
   tmp = left(thestrc, p - 1)
   buf = thestrc
   thestrc = mid(buf, p + 1)
 else
   tmp = ""
 end if

next

subtoken = tmp

end function

sub convert_to_isilo
' ------------------------------------------------------------------------
' --- convert the website to an

You might also like...

Comments

 exec32

Contribute

Why not write for us? Or you could submit an event or a user group in your area. Alternatively just tell us what you think!

Our tools

We've got automatic conversion tools to convert C# to VB.NET, VB.NET to C#. Also you can compress javascript and compress css and generate sql connection strings.

“There are 10 types of people in the world, those who can read binary, and those who can't.”