Create your own bot that searches the net for the keywords you enter, it shows the results in the taskbar, you can run the application and continue with your routine work. The bot will show you the results after searching the net and the results will be stored in the database. You can open and run the search again any time. A separate database is created for each individual search. You can learn lots of concepts from this program. Good Luck!!!
For this to work, add 4 buttons to a form. Call all of them Command1, and set their index properties to 0,1,2,3 and 4. (0=New Search, 1=Open Search,2=Run Search,3=Cancel Search,4=Quit)
Option Explicit
Dim gCancel As Boolean
'Threshold for relevance; if a page's relevance is below this, its
'links are not are not added to the search. A setting of 0
'results in all of a page's links being added.
Const RELEVANCE_THRESHOLD = 0
Public Function GetLinks(s As String, baseURL As String) As String
'Passed some html text in s, locates all the hyperlinks and
'returns the URLs in a string seperated by the | character.
'Returns only http links and ignores mailto and ftp.
'baseurl is the base location of the html document that is
'passed in s. If a link is found that is a local link,this is
'added so the return value is a fully qualified URL.
Dim pos As Long, pos1 As Long, pos2 As Long
Dim buf As String, temp As String
Dim sq As String, dq As String
Dim qc As String, start As String
buf = ""
'Make sure a non empty string is passed
If s = Null Or Len(s) = 0 Then
GetLinks = buf
Exit Function
End If
'Make sure there is at least one link
start = InStr(1, s, "<a href=", vbTextCompare)
If start = 0 Then
GetLinks = buf
Exit Function
End If
'Declare the single and double quote characters.
dq = Chr$(34)
sq = Chr$(39)
Do
'Get the first double or single quote character.
'That marks the start of the URL.
pos = InStr(start, s, dq, vbTextCompare)
pos2 = InStr(start, s, sq, vbTextCompare)
If pos = 0 And pos2 = 0 Then Exit Do
'If both a single and double quote were found, the first one marks
'the start of the URL
If pos > 0 And pos2 > 0 Then 'both were found
If pos < pos2 Then 'It is the double quote
qc = dq
Else 'It is the single quote
qc = sq
pos = pos2
End If
ElseIf pos = 0 Then 'Only the single quote was found
qc = sq
pos = pos2
ElseIf pos2 = 0 Then 'Only a double quote was found
qc = dq
End If
'Get the next quote character. That marks the end of the URL.
pos1 = InStr(pos + 1, s, qc, vbTextCompare)
If pos1 = 0 Then Exit Do
temp = Mid$(s, pos + 1, pos1 - pos - 1)
'Dont accept mailto and ftp links
If LCase(Left(temp, 7)) = "mailto:" Or LCase(Left(temp, 3)) = "ftp" Then
GoTo DoNotAdd
End If
'See whether it is a full URL. if not, add the base URL
If LCase(Left(temp, 7)) <> "http://" Then
temp = baseURL & temp
End If
'Strip off anything following a # or ? character in the link
pos = InStr(1, temp, "#")
If pos > 0 Then
temp = Left(temp, pos - 1)
End If
pos = InStr(1, temp, "?")
If pos > 0 Then
temp = Left(temp, pos - 1)
End If
buf = buf & temp & "|"
DoNotAdd:
'Locate the next link
pos = InStr(pos1, s, "<a href=", vbTextCompare)
start = pos
'If there are no more links, quit
If pos = 0 Then Exit Do
DoEvents
Loop While True
'Strip off the tailing |.
GetLinks = Left(buf, Len(buf) - 1)
End Function
Public Function StripTags(ByVal HtmlDoc As String) As String
'Passed the text of an HTML document, returns the text with all
'HTML tags removed. Specifically, removes all < > pairs and
'the text between them.
Dim buf As String, pos As String
'Check for no tags situation
pos = InStr(HtmlDoc, "<")
If pos = 0 Then
StripTags = HtmlDoc
Exit Function
End If
Do
'Add text to left of first tag to buffer
buf = buf & Left$(HtmlDoc, pos - 1)
'Find end of tag
pos = InStr(HtmlDoc, ">")
'Remove everything up to the end of tag from the text.
If pos = 0 Then Exit Do
HtmlDoc = Mid$(HtmlDoc, pos + 1)
'Find the start of the next tag
pos = InStr(HtmlDoc, "<")
If pos = 0 Then Exit Do
Loop While True
StripTags = buf
End Function
Public Function SearchForKW(str As String, kw As String) As Long
'Returns the number of times that the string kw is found in the
'string str, case-insensitive. Maximum count is 10 because higher
'hit rates will probably not mean higher relevance.
Dim buf As String, pos As Long, count As Integer
'Make a local copy minus HTML tags.
buf = StripTags(str)
pos = InStr(1, buf, kw, vbTextCompare)
If pos = 0 Then
SearchForKW = 0
Exit Function
End If
count = 0
Do
count = count + 1
'Strip off everything up to and including the just-found
'keyword
buf = Right(buf, Len(buf) - pos - Len(kw))
pos = InStr(1, buf, kw, vbTextCompare)
Loop Until count = 10 Or pos = 0
SearchForKW = count
End Function
Private Sub Command1_Click(Index As Integer)
Dim reply As Integer
Select Case Index
Case 0 'New Search
Call NewSearch
Case 1 'Open Search
Call OpenSearch
Case 2 ' Run Search
Call RunSearch
reply = MsgBox("Done - display report?", vbYesNo)
If reply = vbYes Then Call frmReport.Show
Case 3 'Cancel Search
gCancel = True
Case 4 'quit
End
End Select
End Sub
Public Sub OpenSearch()
'Opens an existing search database.
Dim gDataFileName
With CD
.Filter = "Search databases | *.mdb"
.InitDir = App.Path
.ShowOpen
gDataFileName = .FileName
End With
If gDataFileName <> "" Then
Caption = gDataFileName
Command1(2).Enabled = True 'Open Search
End If
End Sub
Private Sub Form_Load()
gDataFileName = ""
Command1(0).Enabled = True 'new search
Command1(1).Enabled = True 'open search
Command1(2).Enabled = False 'run search
Command1(3).Enabled = False 'cancel search
Command1(4).Enabled = True 'quit search
gCancel = False
Caption = "No search loaded"
End Sub
Public Sub NewSearch()
'Create a new search
gDataFileName = ""
frmNewSearch.Show 1
If gDataFileName <> "" Then
Caption = gDataFileName
Command1(2).Enabled = True
End If
End Sub
Public Sub RunSearch()
'Runs a search based on the currently loaded search data.
'Terminates when MAXLINKS links have been examined or no
'more links are available.
Dim strConnect As String
Dim buf As String
Dim count As Long, relevance As Integer
Dim oldCaption As String
'For keywords and their importance values
Dim kw() As String
Dim impr() As Integer
'Number of keywords
Dim numkw As Integer
On Error Resume Next
'Initialize
numkw = 0
oldCaption = frmMain.Caption
'Enable Cancel button
Command1(3).Enabled = True
'Disable other buttons
Command1(0).Enabled = False
Command1(1).Enabled = False
Command1(2).Enabled = False
Command1(4).Enabled = False
'Create the database objects
strConnect = "Provider=Microsoft.Jet.OLEDB.3.51;"
strConnect = strConnect & "Persist Security Info = false;"
strConnect = strConnect & "Data Source = " & gDataFileName
Set cnLinks = New ADODB.Connection
Set rsURL = New ADODB.Recordset
Set rskeywords = New ADODB.Recordset
cnLinks.ConnectionString = strConnect
cnLinks.ConnectionTimeout = 10
cnLinks.CursorLocation = adUseNone
cnLinks.Open
'Get the keywords from the database and put them in an array.
'Put the importance values in their own array
rskeywords.Open "select * from keywords", cnLinks, _
adOpenDynamic, adLockOptimistic, adCmdText
Do
numkw = numkw + 1
ReDim Preserve kw(numkw)
ReDim Preserve impr(numkw)
kw(numkw) = rskeywords!keyword
impr(numkw) = rskeywords!importance
rskeywords.MoveNext
Loop Until rskeywords.EOF = True
'Destroy the keywords recordset because we are done with it
Set rskeywords = Nothing
'Open the URLs list in the database.
rsURL.Open "Select * from links", cnLinks, _
adOpenDynamic, adLockOptimistic, adCmdText
rsURL.MoveFirst
Do
'Debug.print "Trying " & rsurl!url & ", " & rsurl!Searched
If rsURL!searched = DateValue(BASEDATE) Then
rsURL!searched = Now
Call SearchURL(rsURL!URL, kw(), impr())
count = count + 1
frmMain.Caption = count
rsURL.MoveFirst
Else
rsURL.MoveNext
End If
DoEvents
Loop Until count > MAXLINKS Or rsURL.EOF Or gCancel = True
'Clean up
gCancel = False
Command1(3).Enabled = False
Command1(0).Enabled = True
Command1(1).Enabled = True
Command1(2).Enabled = True
Command1(4).Enabled = True
frmMain.Caption = oldCaption
'Destroy the database objects
Set cnLinks = Nothing
Set rsURL = Nothing
End Sub
Public Sub SearchURL(URL As String, kw() As String, impr() As Integer)
'URL is the full url to search
'kw() is a string array of the keywords to look for
'impr() is an integer array of the relevance importance
'of each keyword in kw()
Dim buf As String, i As Integer
Dim rel As Long
On Error Resume Next
'Set the timeout value in seconds
Inet1.RequestTimeout = 30
'Open the URL
buf = Inet1.OpenURL(URL, icString)
DoEvents
'Catch time out and server not found errors
If Err = 35761 Then
Err.Clear
Exit Sub
ElseIf buf = "" Then
Exit Sub
End If
'Search for the various keywords,
'one at a time
rel = 0
For i = 1 To UBound(kw)
rel = rel + (impr(i) / 5) * SearchForKW(buf, kw(i))
Next i
rel = rel \ UBound(kw)
rsURL!relevance = rel
Debug.Print rsURL!URL & ": " & rsURL!relevance
If rel >= RELEVANCE_THRESHOLD Then
Call AddLinksToSearch(buf, MakeBaseURL(URL))
End If
End Sub
Public Sub AddLinksToSearch(s As String, baseURL As String)
'Extracts the http:// links from s and adds them to the search
'database, s is the text contents of an HTML file
'Returns true if any links were added to the database, false if not
Dim links As String, newURL As String
Dim pos As Long
'Necessary so that "duplicate data" errors will not stop execution
On Error Resume Next
links = GetLinks(s, baseURL)
'No links to add
If Len(links) < 9 Then
Exit Sub
End If
pos = 1
Do
pos = InStr(1, links, "|") 'Remember links are stored with a |
'seperating them in the list
If pos = 0 Then Exit Do 'no links are found
newURL = Left(links, pos - 1) 'if links are found then extract the link and add it to the newurl variable
links = Right(links, Len(links) - pos) 'After extracting the first
'link, store the remaining links in the variable called
'links
'Open the url recordset and the links to the database
rsURL.AddNew
rsURL!URL = newURL
rsURL!relevance = 0
rsURL!starting = False 'This link is not the starting url
rsURL!searched = DateValue(BASEDATE) 'Store the date when this link
'was searched
rsURL.Update 'Update the database
'Uncomment the following debug.print statements to see
'a list of accepted and duplicate urls in the immediate
'window while executing in VB.
If Err.Number = &H80040E21 Then
'Debug.Print newURL & " rejected as duplicate"
'Debug.Print "description: " & Err.Description
MsgBox newURL & " rejected as duplicate"
Else
'Debug.Print newURL & " Added"
MsgBox newURL & " added"
End If
DoEvents 'yields execution so that the operating system
'can process other events. doevents function forces the
'processor to use its multitasking capabilities. The current
'process is run as well as other processes in the queue
'are taken care of. the control returns to the current
'application once the task is completed
Loop While True
End Sub
Public Function MakeBaseURL(URL As String) As String
'The function accepts the complete url, returns the base part - the
'URL minus any filenames and with a trailing "/".
'For Example:
'http://www.abc.com/index.htm --> http://www.abc.com/
'http://www.abc.com/sub/data.htm --> http://www.abc.com/sub
'http://www.abc.com --> http://www.abc.com/
'Remember the trailing "/" is appended to the url
'Declare the necessary variables
Dim s As String, pos As Integer
'look for the last / in the URL
pos = InStrRev(URL, "/")
'InStrRev has a different syntax than the instr function
'Instr function starts searching from the left side or the
'start of the string but instrrev function starts searching
'from the end of the string
'if it is less than 8,it is part of the http:// protocol specifier
'Therefore, we can just tack (tack: stick in) a / at the end and return it.
If pos < 8 Then
MakeBaseURL = URL & "/"
Exit Function
'Otherwise, strip off everything after the last /, thus removing
'the filename.
Else
MakeBaseURL = Left(URL, pos)
End If
End Function
Comments