Hi All,
I am a tester and trying to automate my s/w testing procedure. I have to do the following to test.
1. Open the two different xml files (which has html table structure; it has table, thead, tr, td tags)
2. Compare them (We are currently using Third Party Text comparison tool)
3. Find the difference
The purpose of below routine is to convert the XML file which has data to data table and then compare both programatically. Now my whole application is ready and working fine for small XML files. However not for larger, 150MB or more. It gives 'System.OutOfMemoryException' occurred in mscorlib.dll
The said error occurs when the file which is being converted in datatable is first converted to string which is 150 MB, after the completion of conversion of first XML file in datatable.
My gut feeling tells that this code below is only leaking memory.
Guys, Please save my life and remove this exception out of my life.
Private Function ConvertToDataTable(ByVal XMLString As String) As DataTable
Dim dt As DataTable
Dim dr As DataRow
Dim dc As DataColumn
Dim TableExpression As String = "<table[^>]*>(.*?)</table>"
Dim HeaderExpression As String = "<th[^>]*>(.*?)</th>"
Dim RowExpression As String = "<tr[^>]*>(.*?)</tr>"
Dim ColumnExpression As String = "<td[^>]*>(.*?)</td>"
Dim HeadersExist As Boolean = False
Dim iCurrentColumn As Integer = 0
Dim iCurrentRow As Integer = 0
Dim data As String
Dim substr1 As String
Dim substr2 As String
Dim substr3 As String
Dim counter As Integer = 0
Dim str1 As String
Dim s As String
Dim Tables As MatchCollection
Dim Headers As MatchCollection
Dim str() As Char
Try
' Get a match for all the tables in the HTML
Tables = Regex.Matches(XMLString, TableExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase Or RegexOptions.IgnorePatternWhitespace)
' Loop through each table element
For Each Table As Match In Tables
' Reset the current row counter and the header flag
iCurrentRow = 0
HeadersExist = False
' Add a new table to the DataSet
dt = New DataTable
' Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names)
Dim pattern As String = "*<th*"
If Table.Value.ToString Like pattern Then
' Set the HeadersExist flag
HeadersExist = True
' Get a match for all the rows in the table
Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase)
' Loop through each header element
For Each Header As Match In Headers
If Header.Groups(1).ToString() Like "*<tr><th>*" Then
str1 = Header.Groups(1).ToString().Replace("<tr><th>", Nothing)
str1.Trim(Nothing)
dt.Columns.Add(str1)
str1 = Nothing
ElseIf Header.Groups(1).ToString() Like "*<tr>*" And Not Header.Groups(1).ToString() Like "*<tr><th>*" Then
str = Header.Groups(1).ToString()
Dim i As Integer
Dim newstrindex As Integer = 0
Dim newstr(Header.Groups(1).ToString().Length) As Char
Dim c As Char
For i = 0 To str.Length - 1
c = str(i)
If Not Char.IsLetter(c) Then
Else
newstr(newstrindex) = str(i)
newstrindex = newstrindex + 1
End If
Next
s = newstr
s = s.Substring(4)
dt.Columns.Add(s)
str = Nothing
s = Nothing
c = Nothing
newstr = Nothing
newstrindex = Nothing
Else
dt.Columns.Add(Header.Groups(1).ToString)
End If
Next
Else
For iColumns As Integer = 1 To Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase).Item(0).ToString, RowExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase).Item(0).ToString, ColumnExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase).Count
dt.Columns.Add("Column " & iColumns)
Next
End If
' Get a match for all the rows in the table
Dim Rows As MatchCollection = Regex.Matches(Table.Value, RowExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase)
Tables = Nothing
GC.Collect()
' Loop through each row element
For Each Row As Match In Rows
' Only loop through the row if it isn't a header row
If Not (iCurrentRow = 0 And HeadersExist = True) Then
' Create a new row and reset the current column counter
dr = dt.NewRow
iCurrentColumn = 0
' Get a match for all the columns in the row
Dim Columns As MatchCollection = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase Or RegexOptions.IgnorePatternWhitespace)
' Loop through each column element
For Each Column As Match In Columns
data = Column.Groups(1).ToString()
'counter = 0
'Removing subsiquent <td\> tags as some columns may have null values
For Each Header As Match In Headers
If data.Length > 5 Then
substr1 = data.Substring(0, 5)
If substr1.Equals("<td/>") Then
data = data.Substring(5)
iCurrentColumn += 1
'dt.Rows(iCurrentRow).Item(iCurrentColumn) = Nothing
dr(iCurrentColumn) = Nothing
Else
End If
End If
If data.Length > 4 Then
substr2 = data.Substring(0, 4)
If substr2.Equals("<td>") Then
data = data.Substring(4)
iCurrentColumn += 1
'dt.Rows(iCurrentRow).Item(iCurrentColumn) = Nothing
dr(iCurrentColumn) = Nothing
Else
End If
End If
If data.Length >= 5 Then
If substr2 <> "<td>" And substr1 <> "<td/>" Then
dr(iCurrentColumn) = data
'dt.Rows(iCurrentRow).Item(iCurrentColumn) = data
iCurrentColumn += 1
Exit For
End If
Else
dr(iCurrentColumn) = data
iCurrentColumn += 1
Exit For
End If
substr1 = Nothing
substr2 = Nothing
Next
Next
' Add the DataRow to the DataTable
dt.Rows.Add(dr)
dr = Nothing
'GC.Collect()
End If
' Increase the current row counter
iCurrentRow += 1
dr = Nothing
'GC.Collect()
Next
dr = Nothing
Headers = Nothing
Rows = Nothing
Tables = Nothing
GC.Collect()
Next
'dg.DataSource = dt
Return dt
Finally
dt.Dispose()
MsgBox("in finally")
GC.Collect()
End Try
End Function
End Class