I have tried to convert one of my Visual Basic 6 programs to Visual Basic.NET. This program basically crawls email and mobile number data from a web page link. This program works great in Visual Basic 6 but after converting to Visual Basic.NET is not providing any function or result in .NET.
What I have tried:
Imports VBScript_RegExp_55
Imports System.Collections.Specialized
Imports System.Diagnostics
Public Class autoextratorform
Dim emailsearch As New ListViewItem()
Dim numbersearch As New ListViewItem
Private Declare Function SendMessageByString _
Lib "user32" _
Alias "SendMessageA" _
(ByVal hwnd As Long, _
ByVal wMsg As Long, _
ByVal wParam As Long, _
ByVal lParam As String) _
As Long
Private Const LB_SELECTSTRING = &H18C
Private Declare Function ShellExecute Lib "shell32.dll" Alias "ShellExecuteA" (ByVal hwnd As Long, ByVal lpOperation As String, ByVal lpFile As String, ByVal lpParameters As String, ByVal lpDirectory As String, ByVal nShowCmd As Long) As Long
Private WithEvents cIE As InternetHtml
Private WithEvents cExtLinks As cExtractLinks
Private cGSearch As cGoogleSearch
Dim Excel As Object
Dim Excelsheet As Object
Dim a() As String
Dim b() As String
Dim c() As String
Dim i As Integer
' Needs reference to Microsoft VBscript Regular Expressions I recomend ver 5.5.
'
Dim baseurl As String ' var to store base url so we can build the full path
Dim dVisited ' Dictionary to hold visited urls = I think Missing here
Dim dEmail ' dictionary to hold emails = I think Missing here
Dim dnumber ' = I think Missing here
Dim dweb '= I think Missing here
' We are putting the emails in a list also, for user feed back
'It would be less momery intensive and faster to just keep these in the dictionry object
'which allows to easily tell if the email already exist
Dim regxPage ' var to hold regular expression to extract urls
Dim regxEmail ' var to hold regular expression to extract emails
Dim regnumber
Dim regweb
Dim Match, Matches As String ' we use these to store are regx matches
Dim Match1, Matches1 As String
Dim Match2, Matches2 As String
' Regular expressions are super powerfull and have been a part of unix for a long time
' goto the form load event to see the regex initialization
' to learn more about regular expressions and to download the latest scripting runtime see
' http://msdn.microsoft.com/scripting/default.htm?/scripting/vbscript/techinfo/vbsdocs.htm
Dim email, pageurl, mnumber, sweb As String
' The above is only because dictionary.exist does not work directly on Match var
Dim stopcrawl As Integer ' Used to exit crawl loop
Private Sub ComboBox1_SelectedIndexChanged(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles ComboBox1.SelectedIndexChanged
Select Case ComboBox1.SelectedIndex
Case 0
ListBox2.Items.Clear()
ListBox2.Items.Add("http://classads.jang.com.pk/default.asp?cityid=1")
ListBox2.Items.Add("http://www.merapakistan.com/classifieds/")
ListBox2.Items.Add("http://olx.com.pk/")
ListBox2.SelectedIndex = 0
txtStartUrl.Text = ListBox2.Text
Case 1
ListBox2.Items.Clear()
ListBox2.Items.Add("http://www.locanto.com.pk/")
ListBox2.Items.Add("http://pakistanclassifiedads.com/")
ListBox2.Items.Add("http://pkr.com.pk/")
txtStartUrl.Text = ListBox2.Text
Case 2
ListBox2.Items.Clear()
ListBox2.Items.Add("http://adpost.pk/")
ListBox2.Items.Add("http://page.com.pk/")
ListBox2.Items.Add("http://www.clasf.pk/")
ListBox2.SelectedIndex = 0
txtStartUrl.Text = ListBox2.Text
Case 3
ListBox2.Items.Clear()
ListBox2.Items.Add("http://www.nelaam.com/")
ListBox2.Items.Add("http://www.asani.com.pk/")
ListBox2.Items.Add("http://freepost.com.pk/")
ListBox2.SelectedIndex = 0
txtStartUrl.Text = ListBox2.Text
End Select
End Sub
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Command1.Click
stopcrawl = 0 ' set stop crawl so we do not exit loop
If txtStartUrl.Text & "" = "" Then
lblresult.Text = ("Please Load your Desired Site List !")
Exit Sub
ElseIf txtStartUrl.Text & "" = "http://" Then
lblresult.Text = ("Please Load your Desired Site List !")
Exit Sub
ElseIf txtStartUrl.Text = "" Then
lblresult.Text = ("Please Load your Desired Site List !")
Exit Sub
End If
' the above should really check for a valid url, but I am a lazy PERL programmer
ListBox1.Items.Add(txtStartUrl.Text) 'add item to list
Label16.Text = (ListBox2.Items.Count - 1)
Label8.Text = (ListBox2.Items.Count)
lblresult.Text = " Start service for searching Email address and Mobile Numbers"
Command1.Enabled = False
ComboBox1.Enabled = False
LaVolpeButton4.Enabled = True
'UPGRADE_ISSUE: (2064) ComboBox property Combo1.Locked was not upgraded. More Information: https://docs.mobilize.net/vbuc/ewis#2064
LaVolpeButton8.Enabled = False
LaVolpeButton9.Enabled = False
txtStartUrl.ReadOnly = True
crawl() ' and were off
End Sub
Private Sub autoextrator_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
With listnumber
.Columns.Clear()
.Columns.Add("Count!", CInt(Width * 0.1))
.Columns.Add("Mobile Number!", CInt(Width * 0.2))
.Columns.Add("Web URL!", CInt(Width * 0.68))
End With
With listemail
.Columns.Clear()
.Columns.Add("Count!", CInt(Width * 0.1))
.Columns.Add("Email Address!", CInt(Width * 0.2))
.Columns.Add("Web URL!", CInt(Width * 0.68))
End With
'initialize dictionary and regx objects
dVisited = CreateObject("Scripting.Dictionary")
dVisited.CompareMode = CompareMethod.Binary
dEmail = CreateObject("Scripting.Dictionary")
dEmail.CompareMode = CompareMethod.Binary
dnumber = CreateObject("Scripting.Dictionary")
dnumber.CompareMode = CompareMethod.Binary
dweb = CreateObject("Scripting.Dictionary")
dweb.CompareMode = CompareMethod.Binary
Dim counting1 As String
Dim counting2 As String
regxPage = New VBScript_RegExp_55.RegExp() ' Create a regular expression.
regxPage.Pattern = "HREF=""[^""#]+[.][^""#]+" ' Set pattern."
regxPage.IgnoreCase = True ' Set case insensitivity.
regxPage.Global = True ' Set global applicability.
regxEmail = New VBScript_RegExp_55.RegExp() ' Create a regular expression.
regxEmail.Pattern = "\b[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,4}\b" ' Set pattern."
regxEmail.IgnoreCase = True ' Set case insensitivity.
regxEmail.Global = True ' Set global applicability.
regnumber = New VBScript_RegExp_55.RegExp()
counting1 = "(\+92)-?\d{3}-?\d{7}"
counting2 = "(?:0092|0|\+92)-?\d{3}-?\d{7}"
regnumber.Pattern = counting1
regnumber.Pattern = counting2
regnumber.IgnoreCase = True
regnumber.Global = True
cIE = New InternetHtml()
cGSearch = New cGoogleSearch()
cExtLinks = New cExtractLinks()
End Sub
Sub crawl()
While ListBox1.Items.Count > 0 ' loop while list has data
If stopcrawl = 1 Then GoTo exitcrawl
getpage(ListBox1.Items(0)) ' This is the heart of the prog, except for the regx = I think Missing here
' stuff in the form load event
ListBox1.Items.Remove(0) ' remove item from list
End While
ListBox2.Items.Remove(ListBox2.SelectedIndex)
If ListBox2.SelectedIndex < ListBox2.Items.Count - 1 Then
ListBox2.SelectedIndex += 1
txtStartUrl.Text = ListBox2.Items.ToString
exitcrawl:
If ListBox2.Items.Count = 0 Then
MessageBox.Show("Searching has completed !", "Done ! searching", MessageBoxButtons.OK, MessageBoxIcon.Information)
txtStartUrl.ReadOnly = False
txtStartUrl.Text = ""
Label3.Text = "-----------------------------------------------------------"
LaVolpeButton4.Enabled = False
Command1.Enabled = True
LaVolpeButton8.Enabled = True
LaVolpeButton9.Enabled = True
End If
End If
End Sub
Sub getpage(ByVal page As String)
On Error Resume Next
If dVisited.Contains(page) Then
Exit Sub
Else
dVisited.Add(page, 1) ' add page to dVisited dictionary
Label6.Text = CStr(ListBox1.Items.Count)
Label14.Text = CStr(dVisited.Count)
Label3.Text = page
End If
baseurl = getpath(page) ' build full url - see getpath
TextBox1.Text = ""
If ListBox1.Items.Count > 5000 Then Exit Sub ' sets the maximum cache (so we don't run out of mem)
Using wc As New System.Net.WebClient
TextBox1.Text = wc.DownloadString(page)
End Using
Matches = regxPage.Execute(TextBox1.Text) ' Execute search.
For Each Me.Match In Matches
pageurl = Match
If InStr(1, pageurl, "http://", vbTextCompare) Then
If dVisited.Exists(pageurl) = False Then ListBox1.Items.Add(Mid(pageurl, 7))
Else
If dVisited.Exists(baseurl & Mid(pageurl, 7)) = False Then ListBox1.Items.Add(baseurl & Mid(pageurl, 7))
End If
Next
' search for email
Matches = regxEmail.Execute(TextBox1.Text) ' Execute search.
For Each Me.Match In Matches ' Iterate Matches collection.
' check if email exist
email = Match
Debug.Print(email & dEmail.Exists(email))
If dEmail.Exists(email) = False Then
dEmail.Add((email), 1)
Dim d As Integer
d = listemail.Items.Count
d = d + 1
emailsearch = listemail.Items.Add(d)
emailsearch.SubItems.Add(email.ToString)
emailsearch.SubItems.Add(pageurl.ToString)
ListBox3.Items.Add(email)
Label10.Text = listemail.Items.Count
End If
Next
Matches1 = regnumber.Execute(TextBox1.Text)
For Each Me.Match1 In Matches1
mnumber = Match1
Debug.Print(mnumber & dnumber.Exists(mnumber))
If dnumber.Exists(mnumber) = False Then
dnumber.Add((mnumber), 1)
Dim c As Integer
c = listnumber.Items.Count
c = c + 1
numbersearch = listnumber.Items.Add(c)
numbersearch.SubItems.Add(mnumber.ToString)
numbersearch.SubItems.Add(pageurl.ToString)
Label12.Text = listnumber.Items.Count
End If
Next
End Sub
Function getpath(ByVal URL As String) As String
' look for the last / and get a string up to that location
Dim lastbar As Integer = URL.LastIndexOf("/") + 1
Dim tmppath As String = URL.Substring(0, Math.Min(lastbar, URL.Length))
If tmppath = "http://" Then tmppath = URL ' full path already so return url
Return tmppath
End Function
Public Function StripOut(ByVal From As String, ByVal What As String) As String
Dim result As String = ""
result = From
For i As Integer = 1 To Strings.Len(What)
result = result.Replace(What.Substring(i - 1, Math.Min(1, What.Length - (i - 1))), "")
Next i
Return result
End Function
Private Sub LaVolpeButton4_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles LaVolpeButton4.Click
stopcrawl = 1
MsgBox(" Service has Stoped by USER !")
LaVolpeButton4.Enabled = False
Command1.Enabled = True
End Sub
Private Sub LaVolpeButton8_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles LaVolpeButton8.Click
Dim strData As String = ""
Dim strQ() As String
Dim intFile As Integer
' Set up the Common Dialog initial Directory
' to the application path. Filter for Text and All files
' Prompt the user to select a file
'
cmd.InitialDirectory = My.Application.Info.DirectoryPath
cmd.Filter = "Text Files(*.txt)|*.txt|All Files(*.*)|*.*"
cmd.Title = "Select Email Addresses File"
cmd.ShowDialog()
If cmd.FileName <> "" Then
'
' If the user selected a file
' open and read the entire contents
' then split it into records
'
intFile = FileSystem.FreeFile()
FileSystem.FileOpen(intFile, cmd.FileName, OpenMode.Input)
strData = FileSystem.InputString(intFile, FileSystem.LOF(intFile))
FileSystem.FileClose(intFile)
strQ = strData.Split(CChar(Environment.NewLine))
'
' Populate the texbox array with the questions
' (either the number of textboxes in the control array or
' number of questions, which ever is the smaller)
For Each strQ_item As String In strQ
ListBox2.Items.Add(strQ_item)
Next strQ_item
txtStartUrl.Text = ListBox2.Text
End If
End Sub
End Class