Gather content from a website

Snarf the Gazette local news website

Gather content from the Colorado Springs Gazette news website, convert to iSilo format and install on Palm Pilot.  It also creates a website on your hard drive, for off-line viewing. This exec32 script demonstrates how to use the web clipping function in the exec32 Desktop Scripting Language to gather dynamic information from a website, striping out the "noise", and re-packaging the content for use on a handheld device.

' -----------------------------------------------------------------------------
' Script     : Snarf Gazette.e
' Description: Snarf the Gazette local news website, convert to iSilo format
'              and install on Palm Pilot. It also creates a website on your hard
'              drive, for off-line viewing. This exec32 script demonstrates how
'              to use the web clipping function in the exec32 Desktop Scripting
'              Language to gather dynamic information from a website, striping
'              out the "noise", and re-packaging the content for use on a handheld
'              device.
'
'              You can download the exec32 Desktop Scripting Language from the
'              exec32 website at:  
'              http://home.earthlink.net/~rschoolfbn/exec32/index.htm
'
' Assumptions: C:Temp exists.  You have iSilo commandline converter software
'              installed in D:Program FilesiSiloWeb. You have a batch file
'              "..Batgazette.bat".  Here is the contents of the batch file:
' @echo off
' "D:Program FilesiSiloWebiSiloC32.exe" -y -I -Idef "c: empgazette.htm" "c: empGazette.pdb" -i"Gazette Local News"
' rem pause
'
' Date       : 02-20-2001
' Author     : Robert Schoolfield, [email protected]
' Changes    :
' Copyright  : (C) Copyright 2000, by Robert Schoolfield.  All Rights Reserved.
'              This script is released to the Public Domain, and may not be re-
'              sold.
' -----------------------------------------------------------------------------

sub main()
' ----------------------------------------------------------------------------------

'trace

dim r, tmp

' --- change directories to the exec32 Scripts directory...
tmp = getdir("rundir") & "Scripts"
cd tmp

' --- check to see if we have done run today, and if so, bail...
if done_run() then
 tmb "The snarf has been done run today."
 exit sub
end if

' --- retrieve each local news item to a random file name, prepend an entry for each in the
' control file...
snarf_todays_items

' --- read through the control file, and create a web site...
create_website

' --- convert the website to an iSilo file, install it on my Palm via HotSync, and
' place a copy on a server for other to access...
convert_to_isilo

end sub

function strip_html(thehtml)
' -----------------------------------------------------------------------------------
' --- Given a string, stript out the HTML markup...

dim tmp, r, i, buf, intag, ch, eatlist

eatlist = chr(10) & chr(13)

for i = 1 to len(thehtml)
 ch = mid(thehtml, i, 1)
 if ch = "<" then
   intag = true
 elseif ch = ">" then
   intag = false
 elseif not intag then
   if instr(eatlist, ch) < 1 then  
     buf = buf & ch
   end if
 end if
next

strip_html = buf

end function

sub snarf_todays_items
' -----------------------------------------------------------------------------------
' --- retrieve each local news item to a random file name, prepend an entry for each
' in the control file, and finish up with a date line.  Web clipping works on the
' principle of predictable begin and end strings around the desired content...

dim controlfile, buf, tmp, theclip, headline, itemfile, thedate
dim fp, r, i, p, q

q = chr(34)

' --- get the local daily news...
for i = 1 to 8
 theclip = clipping("http://www.gazette.com/daily/loc" & trim(i) & ".html", "<!headline>", "<!-- end of storie")

 if len(trim(theclip)) then
   write_the_clip(theclip)
 else
   exit for
 end if
next

' --- get the daily life...
for i = 1 to 8
 theclip = clipping("http://www.gazette.com/daily/life" & trim(i) & ".html", "<!headline>", "<!-- end of storie")

 if len(trim(theclip)) then
   write_the_clip(theclip)
 else
   exit for
 end if
next

' --- get the daily opinion...
for i = 1 to 3
 theclip = clipping("http://www.gazette.com/daily/opin" & trim(i) & ".html", "<!headline>", "<!-- end of storie")

 if len(trim(theclip)) then
   write_the_clip(theclip)
 else
   exit for
 end if
next

' --- add a date line to the control file...
thedate = date
prepend thedate, "", ""

end sub

function gen_rand_name()
' -----------------------------------------------------------------------------------
' --- generate a random webpage name...

dim randint

randint = Int(Rnd * (9999 - 1000 + 1)) + 1000
gen_rand_name = "c: emp" & Left(Trim(randint) & Trim(Int(Timer)), 8) & ".htm"

end function

sub prepend (thedate, itemfile, headline)
' -----------------------------------------------------------------------------------
' --- add a line to the top of the control file...

dim r, buf, fp1, fp2, tmpfile, controlfile

controlfile = "c: empgazette.ctl"
tmpfile = "c: empgazette.tmp"

fp1 = fopen(tmpfile, "output")

buf = thedate & "~" & itemfile & "~" & headline & "~"
r = fprint(fp1, buf)

fp2 = fopen(controlfile, "input")

do
 r = finput(fp2, buf)
 if r < 0 then exit do
 r = fprint(fp1, buf)
loop until r < 0

fclose fp1
fclose fp2

copy tmpfile & " " & controlfile

end sub

sub create_website
' -----------------------------------------------------------------------------
' --- read through the control file, and create a web site...

dim tmp, homefile, fp1, buf, fp2, headline, thedate, controlfile
dim itemfile
dim p, r, q, i

q = chr(34)
controlfile = "c: empGazette.ctl"
homefile = "c: empGazette.htm"

fp1 = fopen(controlfile, "input")

fp2 = fopen(homefile, "output")

buf = "<html>"
r = fprint(fp2, buf)

buf = "<title>Gazette Local News</title>"
r = fprint(fp2, buf)

buf = "<body>"
r = fprint(fp2, buf)

buf = "<h2>Gazette Local News</h2>"
r = fprint(fp2, buf)

buf = "<hr>"
r = fprint(fp2, buf)

buf = "<ul>"
r = fprint(fp2, buf)

for i = 1 to 40
 
 r = finput(fp1, buf)
 
 if r <> 0 then exit for

 thedate = subtoken(buf, 1, "~")
 itemfile = subtoken(buf, 2, "~")  
 headline = subtoken(buf, 3, "~")

 logmsg "thedate: " & thedate
 logmsg "itemfile: " & itemfile
 logmsg "headline: " & headline

 if len(thedate) then
   buf = "</ul><p>"
   r = fprint(fp2, buf)

   buf = "News from " & thedate & "<p>"
   r = fprint(fp2, buf)

   buf = "<ul>"
   r = fprint(fp2, buf)

 else
   if len(itemfile) > 0 then
     ' --- make a link to it...
     buf = "<li> <a href=" & q & itemfile & q & ">" & headline & "</a><br>"
     r = fprint(fp2, buf)
   end if
 end if    

next

buf = "</ul>"
r = fprint(fp2, buf)

buf = "<p>News items snarfed from the Colorado Springs Gazette Telegraph by exec32, written by Robert Schoolfield.<br>Version 1.5<p>"
r = fprint(fp2, buf)

buf = "</body></html>"
r = fprint(fp2, buf)

fclose fp1
fclose fp2

end sub

function subtoken(thestr, elem, delim)
' ---------------------------------------------------------------------------------------
' --- retrieve elements in a delimited string...

dim tmp, buf, thestrc, p, i

thestrc = thestr

for i = 1 to elem
 p = instr(thestrc, delim)
 if p > 0 then
   tmp = left(thestrc, p - 1)
   buf = thestrc
   thestrc = mid(buf, p + 1)
 else
   tmp = ""
 end if

next

subtoken = tmp

end function

sub convert_to_isilo
' ------------------------------------------------------------------------
' --- convert the website to an

You might also like...

Comments

 exec32

Contribute

Why not write for us? Or you could submit an event or a user group in your area. Alternatively just tell us what you think!

Our tools

We've got automatic conversion tools to convert C# to VB.NET, VB.NET to C#. Also you can compress javascript and compress css and generate sql connection strings.

“You can stand on the shoulders of giants OR a big enough pile of dwarfs, works either way.”