Click here to Skip to main content
16,019,435 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
<pre lang="Python">import requests
from bs4 import BeautifulSoup
import re
import json


def lol(url):
    with requests.Session() as req:
        r = req.get(url)
        soup = BeautifulSoup(r.content, 'html.parser')
        vs = soup.find("input", id="__VIEWSTATE").get("value")
        ev = soup.find("input", id="__EVENTVALIDATION").get("value")
        data = {
            "__EVENTTARGET": "",
            "__EVENTARGUMENT": "",
            "__VIEWSTATE": vs,
            "__VIEWSTATEGENERATOR": "FE3EF141",
            "": [
                "{2}",
                ""
            ],
            "ctl00_rwmWindowManager_ClientState": "",
            "ctl00_rwTimeoutWarning_ClientState": "",
            "s": "",
            "ctl00$PageContent$orgOption": "rbHaveOrg",
            "ctl00$PageContent$ddlOrganization": "-",
            "ddlOrganization_ClientState": "{\"logEntries\":[],\"value\":\"048447c3-0007-c47b-7c9e-0b3e39483880\",\"text\":\"-\",\"enabled\":true}",
            "ctl00$PageContent$tbLoginID": "",
            "ctl00$PageContent$tbPassword": "",
            "ctl00$PageContent$tbConfirmPassword": "",
            "ctl00$PageContent$tbIndividualFirstName": "",
            "ctl00$PageContent$tbIndividualLastName": "",
            "ctl00$PageContent$tbIndividualSuffix": "",
            "ctl00_PageContent_tbIndividualSuffix_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
            "ctl00$PageContent$tbIndividualEmail": "",
            "ctl00$PageContent$gvIndividualPhoneNumbers$ctl02$tbIndividualPhoneNumber": "",
            "ctl00$PageContent$gvIndividualPhoneNumbers$ctl03$tbIndividualPhoneNumber": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$hfIndividualAddressCode": "Home",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Line1": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Line2": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$PostalCode": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$City": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$State": "",
            "ctl00_PageContent_rptIndividualAddresses_ctl00_acIndividualAddress_State_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Country": "No Country",
            "ctl00_PageContent_rptIndividualAddresses_ctl00_acIndividualAddress_Country_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"No Country\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$County": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CongressionalDistrict": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CASSCertificationDate": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CarrierRoute": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$DeliveryPointCheckDigit": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$DeliveryPointCode": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$hfIndividualAddressCode": "Work",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Line1": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Line2": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$PostalCode": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$City": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$State": "",
            "ctl00_PageContent_rptIndividualAddresses_ctl01_acIndividualAddress_State_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Country": "No Country",
            "ctl00_PageContent_rptIndividualAddresses_ctl01_acIndividualAddress_Country_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"No Country\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$County": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CongressionalDistrict": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CASSCertificationDate": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CarrierRoute": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$DeliveryPointCheckDigit": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$DeliveryPointCode": "",
            "ctl00$PageContent$ddlIndividualPreferredAddress": "048447c3-000f-c2ac-8bfc-0b3d04988fbc",
            "ctl00$PageContent$chkDoNotMail": "on",
            "ctl00$PageContent$chkDoNotFax": "on",
            "ctl00_PageContent_dlbMessageCategories_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_dlbMessageCategories_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox1": "",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox3": "",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox4": "",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox5": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox5_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox6": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox6_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox7": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox7_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox8": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox8_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox9_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox9_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox10_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox10_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadDatePicker11": "",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_dateInput_text": "",
            "ctl00$PageContent$cfsIndividualCustomFields$RadDatePicker11$dateInput": "",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_dateInput_ClientState": "{\"enabled\":true,\"emptyMessage\":\"\",\"minDateStr\":\"1/1/1 0:0:0\",\"maxDateStr\":\"12/31/9999 0:0:0\"}",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_calendar_SD": "[]",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_calendar_AD": "[[1,1,1],[9999,12,31],[2020,3,24]]",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_ClientState": "{\"minDateStr\":\"1/1/0001 0:0:0\",\"maxDateStr\":\"12/31/9999 0:0:0\"}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox12": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox12_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox13": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox13_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox14": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox14_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox15": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox15_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox16": "",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox17_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox17_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox18": "",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox19": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox19_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox20": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox20_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox21_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox21_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "__CALLBACKID": "ctl00$PageContent$ddlOrganization",
            "__CALLBACKPARAM": "{\"Command\":\"LOD\",\"Text\":\"-\",\"ClientState\":{\"value\":\"\",\"text\":\"\",\"enabled\":true,\"logEntries\":[]},\"Context\":{\"Text\":\"-\",\"NumberOfItems\":40},\"NumberOfItems\":40}",
            "__EVENTVALIDATION": ev
        }
        r = requests.post(url, data=data)
        try:
            goal = re.search(r"\=(\[.+])", r.text).group(1)
        except:
            goal = None

        clear = json.loads(goal)
        print(json.dumps(clear, indent=4))


lol("https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx")


What I have tried:

Consider that it's giving you the first 20 rows :) but no worries. You can loop over the POST request with +20 on the following parameter __CALLBACKPARAM where you will need to change "NumberOfItems\":0 with +20 on each POST :)
Posted
Updated 3-Apr-20 10:47am
Comments
Kris Lantz 3-Apr-20 16:11pm    
So... where are you stuck?
ZurdoDev 3-Apr-20 16:19pm    
And?
Patrice T 3-Apr-20 16:42pm    
An dyou have a problem apart that you didn't try nothing ?
Member 14791717 4-Apr-20 4:59am    
I am not asking to do it completely for me i think i didn't explain it well may be what I need. let me try again so,

Here is website link # https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx

There is drop down tab below ( I am affiliated with the company below). when we click it it shows different organization information which basically i want to scrape.

The code above just give me 20 rows but when I try to change the following parameters "NumberOfItems\":0 from _CALLBACK. Then it gives me error. When i follow the post request from headers by inspecting elements it shows that this parameter is iterated by +20 for each request. But when i try it for "NumberOfItems\":40 or "NumberOfItems\":40 which is valid parameters of request there but i couldn't find the results. I don't may be some other parameters needs to be change as well which I am missing so that's why i posted a question so anyone can help me to find that. let me post out of this code as well.


Here is output of 20 rows:

[
{
"text": "-",
"value": "048447c3-0007-c47b-7c9e-0b3e39483880",
"attributes": {
"ROW_NUMBER": "1",
"LocalID": "10619",
"EmailAddress": "jamiebolton@hotmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "-",
"value": "048447c3-0007-ced2-814b-0b400d0f470f",
"attributes": {
"ROW_NUMBER": "2",
"LocalID": "11477",
"EmailAddress": "rpt@gwu.edu",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Banco Internacional del Per\u00fa - Interbank",
"value": "048447c3-0007-c0e0-6c76-0b3e66b7e1ec",
"attributes": {
"ROW_NUMBER": "3",
"LocalID": "10703",
"EmailAddress": "dalvarezc84@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Bishop-McDonald Wealth Mgmt Group",
"value": "048447c3-0007-c6d0-d748-03bda528b59f",
"attributes": {
"ROW_NUMBER": "4",
"LocalID": "11697",
"EmailAddress": "",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "BOB-Caridif Life Insurance Co.,Ltd.",
"value": "048447c3-0007-c8b1-bbf2-0b3d578797ea",
"attributes": {
"ROW_NUMBER": "5",
"LocalID": "10094",
"EmailAddress": "bingxinshi@163.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CEFS Verm\u00f6gensverwaltungs- und Beteiligungs GmbH",
"value": "048447c3-0007-c88c-9064-0b3df4010a50",
"attributes": {
"ROW_NUMBER": "6",
"LocalID": "10467",
"EmailAddress": "sergiufala@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CGS-CIMB",
"value": "048447c3-0007-c74a-918b-0b3e7c42f6a0",
"attributes": {
"ROW_NUMBER": "7",
"LocalID": "10753",
"EmailAddress": "joelap0506@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CMT - Denver Chapter",
"value": "048447c3-0007-c603-99e4-0b3deeb54833",
"attributes": {
"ROW_NUMBER": "8",
"LocalID": "10446",

1 solution

We are more than willing to help those that are stuck: but that doesn't mean that we are here to do it all for you! We can't do all the work, you are either getting paid for this, or it's part of your grades and it wouldn't be at all fair for us to do it all for you.

So we need you to do the work, and we will help you when you get stuck. That doesn't mean we will give you a step by step solution you can hand in!
Start by explaining where you are at the moment, and what the next step in the process is. Then tell us what you have tried to get that next step working, and what happened when you did.
Just posting your homework question isn't going to cut the ice here, I'm afraid.
 
Share this answer
 
Comments
Member 14791717 4-Apr-20 4:58am    
I am not asking to do it completely for me i think i didn't explain it well may be what I need. let me try again so,

Here is website link # https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx

There is drop down tab below ( I am affiliated with the company below). when we click it it shows different organization information which basically i want to scrape.

The code above just give me 20 rows but when I try to change the following parameters "NumberOfItems\":0 from _CALLBACK. Then it gives me error. When i follow the post request from headers by inspecting elements it shows that this parameter is iterated by +20 for each request. But when i try it for "NumberOfItems\":40 or "NumberOfItems\":40 which is valid parameters of request there but i couldn't find the results. I don't may be some other parameters needs to be change as well which I am missing so that's why i posted a question so anyone can help me to find that. let me post out of this code as well.


Here is output of 20 rows:

[
{
"text": "-",
"value": "048447c3-0007-c47b-7c9e-0b3e39483880",
"attributes": {
"ROW_NUMBER": "1",
"LocalID": "10619",
"EmailAddress": "jamiebolton@hotmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "-",
"value": "048447c3-0007-ced2-814b-0b400d0f470f",
"attributes": {
"ROW_NUMBER": "2",
"LocalID": "11477",
"EmailAddress": "rpt@gwu.edu",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Banco Internacional del Per\u00fa - Interbank",
"value": "048447c3-0007-c0e0-6c76-0b3e66b7e1ec",
"attributes": {
"ROW_NUMBER": "3",
"LocalID": "10703",
"EmailAddress": "dalvarezc84@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Bishop-McDonald Wealth Mgmt Group",
"value": "048447c3-0007-c6d0-d748-03bda528b59f",
"attributes": {
"ROW_NUMBER": "4",
"LocalID": "11697",
"EmailAddress": "",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "BOB-Caridif Life Insurance Co.,Ltd.",
"value": "048447c3-0007-c8b1-bbf2-0b3d578797ea",
"attributes": {
"ROW_NUMBER": "5",
"LocalID": "10094",
"EmailAddress": "bingxinshi@163.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CEFS Verm\u00f6gensverwaltungs- und Beteiligungs GmbH",
"value": "048447c3-0007-c88c-9064-0b3df4010a50",
"attributes": {
"ROW_NUMBER": "6",
"LocalID": "10467",
"EmailAddress": "sergiufala@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CGS-CIMB",
"value": "048447c3-0007-c74a-918b-0b3e7c42f6a0",
"attributes": {
"ROW_NUMBER": "7",
"LocalID": "10753",
"EmailAddress": "joelap0506@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CMT - Denver Chapter",
"value": "048447c3-0007-c603-99e4-0b3deeb54833",
"attributes": {
"ROW_NUMBER": "8",
"LocalID": "10446",
Member 14791717 5-Apr-20 6:27am    
I have done it btw thanks
OriginalGriff 5-Apr-20 6:32am    
Good! Well done!
Member 14791717 5-Apr-20 13:24pm    
thanks (:

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900