PagesDataset.vb
This class is used to create and build the DataSet
. It consists of two methods StoreFile
. Create
method creates the DataSet
to store the searched results and the Storefile
is responsible for adding records to DataTable
in the DataSet
.
'*******************************************************
'
' Create Method - Shared method
'
' Creates a datset for the pages and returns the result
'
'********************************************************
Public Shared Function Create() As DataSet
'Objects are defined
Dim pgDataSet As New DataSet()
Dim keys(1) As DataColumn
'Table is created and added to table collection
pgDataSet.Tables.Add(New DataTable("Pages"))
'Schema of table is defined
pgDataSet.Tables("Pages").Columns.Add("PageId", _
System.Type.GetType("System.Int32"))
pgDataSet.Tables("Pages").Columns.Add("Title",_
System.Type.GetType("System.String"))
pgDataSet.Tables("Pages").Columns.Add("Description", _
System.Type.GetType("System.String"))
pgDataSet.Tables("Pages").Columns.Add("Path", _
System.Type.GetType("System.String"))
pgDataSet.Tables("Pages").Columns.Add("MatchCount", _
System.Type.GetType("System.Int32"))
pgDataSet.Tables("Pages").Columns.Add("Size", _
System.Type.GetType("System.Decimal"))
'PageId is defined as indentity
pgDataSet.Tables("Pages").Columns("PageID").AutoIncrement = True
pgDataSet.Tables("Pages").Columns("PageID").AutoIncrementSeed = 1
'PageId is defined as the primary key
keys(0) = pgDataSet.Tables("Pages").Columns("PageId")
pgDataSet.Tables("Pages").PrimaryKey = keys
Return pgDataSet
End Function
'********************************************************
'
' StoreFile Method - Shared method
'
' Creates a datset for the pages and returns the result
'
'********************************************************
Public Shared Sub StoreFile(ByVal dstPgs As DataSet,_
ByVal srchPg As Searchs.Page)
'Objects are defined
Dim pageRow As DataRow
'New row is created
pageRow = dstPgs.Tables("Pages").NewRow()
'Data is added
pageRow("Title") = srchPg.Title
pageRow("Description") = srchPg.Description
pageRow("Path") = srchPg.Path
pageRow("MatchCount") = srchPg.MatchCount
pageRow("Size") = srchPg.Size
'Row is added to the dataset
dstPgs.Tables("Pages").Rows.Add(pageRow)
End Sub
CleanHtml.vb
CleanHtml
class contains a single public shared method which uses regular expressions to clean the HTML content.
'*****************************************************
'
' CleanFileContent Method
'
' Subroutine to the clean the file of html content
'
'*****************************************************
Public Shared Function Clean(ByVal Contents As String) As String
Dim regexp As Regex
Dim strPattern As String
strPattern = ""
regexp = New Regex(strPattern, RegexOptions.IgnoreCase)
Contents = regexp.Replace(Contents, _
"<(select|option|script|style|title)(.*?)" & _
">((.|\n)*?)</(SELECT|OPTION|SCRIPT|STYLE|TITLE)>",_
" ", RegexOptions.IgnoreCase)
Contents = regexp.Replace(Contents, "&(nbsp|quot|copy);", "")
'Contents = regexp.Replace(Contents, "<[^>]*>", "")
Contents = regexp.Replace(Contents, "<([\s\S])+?>",_
" ", RegexOptions.IgnoreCase).Replace(" ", " ")
'Contents = regexp.Replace(Contents, "<[^<>]+>",_
" ", RegexOptions.IgnoreCase)
'Contents = regexp.Replace("(<(\w+)[^>]*?>(.*?)</\1>", "$1")
Contents = regexp.Replace(Contents, "\W", " ")
'Trace.Warn("File Contents", Contents)
Return Contents
End Function
Comments