Public Class Form1
Public WithEvents MyClient As System.Net.WebClient
Public TotalWorkingThreads As Integer = 0
Public TotalTags As Integer = 0
Public gExtractedLinks(50000) As String
Private Sub SpiderThePageCache(ByVal Webpage As String)
MyClient = New System.Net.WebClient
Dim WebpageDocument As String = MyClient.DownloadString(Webpage)
Dim AhrefIndex As Integer = 0
Dim ExtractedLink As String
Dim ExtractedLinks(5000) As String
Dim Flag As Boolean = False
While True
Flag = False
AhrefIndex = WebpageDocument.IndexOf("a href", AhrefIndex)
If AhrefIndex = -1 Then Exit While
If WebpageDocument.Chars(AhrefIndex + 8) = "#" Then
AhrefIndex += 8
Continue While
End If
ExtractedLink = vbNullString
For i = (AhrefIndex + 8) To WebpageDocument.Length - 1 ' (AhrefIndex + 25)
If WebpageDocument.Chars(i) = Chr(34) Then Exit For
ExtractedLink &= WebpageDocument.Chars(i)
Next
Debug.WriteLine(AhrefIndex & " : " & ExtractedLink)
For i = 0 To ExtractedLinks.Length
If ExtractedLinks(i) = ExtractedLink Then
Flag = True
Exit For
End If
If ExtractedLinks(i) = Nothing Then
Exit For
End If
Next
If Flag = False Then
For i = 0 To ExtractedLinks.Length
If ExtractedLinks(i) = Nothing Then
ExtractedLinks(i) = ExtractedLink
Exit For
End If
Next
End If
AhrefIndex += 8
End While
End Sub
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
'SpiderThePage("Http://WebsiteToAudit/")
Dim SpiderClass = New SpideringThreads
SpiderClass.CallBack = Me
SpiderClass.Webpage = "Http://Ikenet"
Dim T = New Threading.Thread(AddressOf SpiderClass.SpiderMainPageWithThreads)
T.IsBackground = False
T.Priority = Threading.ThreadPriority.Highest
T.Start()
End Sub
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
End Sub
End Class
Class SpideringThreads
Public CallBack As Form1
Public WithEvents MyClient As System.Net.WebClient
Delegate Sub MyDelPtr()
Public Webpage As String
Enum Marker
AHref = 1
AutoComplete = 2
Three = 3
End Enum
Public Sub SpiderMainPageWithThreads()
MyClient = New System.Net.WebClient
Dim WebpageDocument As String
Try
WebpageDocument = MyClient.DownloadString(Webpage)
Catch ex As Exception
'ListBox2.Items.Add(Webpage)
Dim MyGUIClass = New GUIThreads
MyGUIClass.Data = Webpage
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox2Add)) 'Invoke on main thread
Exit Sub
End Try
If InStr(WebpageDocument, "PASSWORD=", CompareMethod.Text) > 0 Or InStr(WebpageDocument, "developer for debugging", CompareMethod.Text) > 0 Then
'ListBox1.Items.Add(Webpage)
Dim MyGUIClass = New GUIThreads
MyGUIClass.Data = Webpage
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox1Add)) 'Invoke on main thread
End If
Dim MasterIndex As Integer = 0
Dim ExtractedLink As String
Dim Marker1, Marker2 As Integer
Dim MarkerPath As Marker = 0
While True
Marker1 = WebpageDocument.IndexOf("a href=""", MasterIndex)
Marker2 = WebpageDocument.IndexOf(".autocomplete(", MasterIndex)
If Marker1 = -1 And Marker2 = -1 Then
Exit While
End If
If Marker1 = -1 Then
MasterIndex = Marker2
MarkerPath = Marker.AutoComplete
ElseIf Marker2 = -1 Then
MasterIndex = Marker1
MarkerPath = Marker.AHref
Else
If Marker1 > Marker2 And Marker2 <> -1 Then
MasterIndex = Marker2
MarkerPath = Marker.AutoComplete
ElseIf Marker2 > Marker1 And Marker1 <> -1 Then
MasterIndex = Marker1
MarkerPath = Marker.AHref
End If
End If
Dim JumpInteger As Integer = 0
Select Case MarkerPath
Case Marker.AHref
JumpInteger = 8
Case Marker.AutoComplete
JumpInteger = 15
Case Marker.Three
Debug.WriteLine("Marker 3")
Case Else
Debug.WriteLine("Unknown value")
End Select
If WebpageDocument.Chars(MasterIndex + JumpInteger) = "#" Then
MasterIndex += JumpInteger
Continue While
End If
ExtractedLink = vbNullString
For i = (MasterIndex + JumpInteger) To WebpageDocument.Length - 1 ' (AhrefIndex + 25)
If WebpageDocument.Chars(i) = Chr(34) Then
If ExtractedLink = Nothing Then
MasterIndex += JumpInteger
Continue While
End If
Exit For
End If
ExtractedLink &= WebpageDocument.Chars(i)
Next
Debug.WriteLine(MasterIndex & " : " & ExtractedLink)
If Not ExtractedLink.StartsWith("HTTP", True, Nothing) Then
ExtractedLink = "Http://Ikenet/" & ExtractedLink
Else
If Not ExtractedLink.StartsWith("Http://Ikenet/", True, Nothing) Or Not ExtractedLink.StartsWith("Https://Ikenet/", True, Nothing) Then
Debug.WriteLine("NOT IKENET")
MasterIndex += 8
Continue While
End If
End If
If AddStringToGArray(ExtractedLink) Then
'SpiderThePage(ExtractedLink)
Dim MyTotalClass = New GUIThreads
CallBack.Invoke(New MyDelPtr(AddressOf MyTotalClass.AddWorkingThreads))
CallBack.Invoke(New MyDelPtr(AddressOf MyTotalClass.AddTotal)) 'Invoke on main thread
Dim Spider = New SpideringThreads
Spider.Webpage = ExtractedLink
Spider.CallBack = CallBack
Dim T As New Threading.Thread(AddressOf Spider.SpiderNewPageWithThreads)
T.Start()
Else
Debug.WriteLine("Skipped Dubbed: " & ExtractedLink)
End If
MasterIndex += JumpInteger
End While
End Sub
Public Sub SpiderNewPageWithThreads()
MyClient = New System.Net.WebClient
Dim WebpageDocument As String
Try
WebpageDocument = MyClient.DownloadString(Webpage)
Catch ex As Exception
'ListBox2.Items.Add(Webpage)
Dim MyGUIClass = New GUIThreads
If ex.Message = "The remote server returned an error: (500) Internal Server Error." Then
MyGUIClass.Data = Webpage
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox1Add)) 'Invoke on main thread
Exit Sub
End If
MyGUIClass.Data = Webpage
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox2Add)) 'Invoke on main thread
Exit Sub
End Try
If InStr(WebpageDocument, "PASSWORD=", CompareMethod.Text) > 0 Or InStr(WebpageDocument, "developer for debugging", CompareMethod.Text) > 0 Then
'ListBox1.Items.Add(Webpage)
Dim MyGUIClass = New GUIThreads
MyGUIClass.Data = Webpage
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox1Add)) 'Invoke on main thread
End If
Dim AhrefIndex As Integer = 0
Dim ExtractedLink As String
While True
AhrefIndex = WebpageDocument.IndexOf("a href", AhrefIndex)
If AhrefIndex = -1 Then Exit While
If WebpageDocument.Chars(AhrefIndex + 8) = "#" Then
AhrefIndex += 8
Continue While
End If
ExtractedLink = vbNullString
For i = (AhrefIndex + 8) To WebpageDocument.Length - 1 ' (AhrefIndex + 25)
If WebpageDocument.Chars(i) = Chr(34) Then
If ExtractedLink = Nothing Then
AhrefIndex += 8
Continue While
End If
Exit For
End If
ExtractedLink &= WebpageDocument.Chars(i)
Next
Debug.WriteLine(AhrefIndex & " : " & ExtractedLink)
If Not ExtractedLink.StartsWith("HTTP", True, Nothing) Then
ExtractedLink = "Http://Ikenet/" & ExtractedLink
Else
If Not ExtractedLink.StartsWith("Http://Ikenet/", True, Nothing) Or Not ExtractedLink.StartsWith("Https://Ikenet/", True, Nothing) Then
Debug.WriteLine("NOT IKENET")
AhrefIndex += 8
Continue While
End If
End If
If AddStringToGArray(ExtractedLink) Then
Dim MyTotalClass = New GUIThreads
CallBack.Invoke(New MyDelPtr(AddressOf MyTotalClass.AddTotal)) 'Invoke on main thread
SpiderThePage(ExtractedLink)
Else
Debug.WriteLine("Skipped Dubbed: " & ExtractedLink)
End If
AhrefIndex += 8
End While
End Sub
Public Sub SpiderThePage(ByVal WebSite As String)
MyClient = New System.Net.WebClient
Dim WebSiteDocument As String
Try
WebSiteDocument = MyClient.DownloadString(WebSite)
Catch ex As Exception
'ListBox2.Items.Add(WebSite)
Dim MyGUIClass = New GUIThreads
MyGUIClass.Data = WebSite
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox2Add)) 'Invoke on main thread
Exit Sub
End Try
If InStr(WebSiteDocument, "PASSWORD=", CompareMethod.Text) > 0 Or InStr(WebSiteDocument, "developer for debugging", CompareMethod.Text) > 0 Then
'ListBox1.Items.Add(WebSite)
Dim MyGUIClass = New GUIThreads
MyGUIClass.Data = WebSite
CallBack.Invoke(New MyDelPtr(AddressOf MyGUIClass.Listbox1Add)) 'Invoke on main thread
End If
Dim AhrefIndex As Integer = 0
Dim ExtractedLink As String
While True
AhrefIndex = WebSiteDocument.IndexOf("a href", AhrefIndex)
If AhrefIndex = -1 Then Exit While
If WebSiteDocument.Chars(AhrefIndex + 8) = "#" Then
AhrefIndex += 8
Continue While
End If
ExtractedLink = vbNullString
For i = (AhrefIndex + 8) To WebSiteDocument.Length - 1 ' (AhrefIndex + 25)
If WebSiteDocument.Chars(i) = Chr(34) Then
If ExtractedLink = Nothing Then
AhrefIndex += 8
Continue While
End If
Exit For
End If
ExtractedLink &= WebSiteDocument.Chars(i)
Next
Debug.WriteLine(AhrefIndex & " : " & ExtractedLink)
If Not ExtractedLink.StartsWith("HTTP", True, Nothing) Then
ExtractedLink = WebSite & ExtractedLink
Else
If Not ExtractedLink.StartsWith("Http://Ikenet/", True, Nothing) Or Not ExtractedLink.StartsWith("Https://Ikenet/", True, Nothing) Then
Debug.WriteLine("NOT IKENET")
AhrefIndex += 8
Continue While
End If
End If
If AddStringToGArray(ExtractedLink) Then
Dim MyTotalClass = New GUIThreads
CallBack.Invoke(New MyDelPtr(AddressOf MyTotalClass.AddTotal)) 'Invoke on main thread
SpiderThePage(ExtractedLink)
Else
Debug.WriteLine("Skipped Dubbed: " & ExtractedLink)
End If
AhrefIndex += 8
End While
End Sub
Private Function AddStringToGArray(ByVal Webpage As String) As Boolean
For i = 0 To CallBack.gExtractedLinks.Length
If CallBack.gExtractedLinks(i) = Webpage Then
Return False
Exit For
End If
If CallBack.gExtractedLinks(i) = Nothing Then
CallBack.gExtractedLinks(i) = Webpage
Return True
End If
Next
Return True
End Function
End Class
Class GUIThreads
Public Data As String
Public Sub Listbox1Add()
Form1.ListBox1.Items.Add(Data)
End Sub
Public Sub Listbox2Add()
Form1.ListBox2.Items.Add(Data)
End Sub
Public Sub AddTotal()
Form1.TotalTags += 1
Form1.Text = "Working Threads : " & Form1.TotalWorkingThreads & "Total Spidered: " & Form1.TotalTags
End Sub
Public Sub AddWorkingThreads()
Form1.TotalWorkingThreads += 1
Form1.Text = "Working Threads : " & Form1.TotalWorkingThreads & "Total Spidered: " & Form1.TotalTags
End Sub
End Class