this is my code to scrape the player data from this link
http://howstat.com/cricket/Statistics/IPL/PlayerList.asp
from bs4 import BeautifulSoup
import pandas as pd
import requests as rq
import numpy as np
def remove(string):
ns=""
for i in string:
if(not i.isspace()):
ns+=i
return ns
def remove_char(str):
ns=""
bad_chars = [':']
for i in str:
if i not in bad_chars:
ns+=i
return ns
def player_stat(url):
get_url=rq.get(url)
soup = BeautifulSoup(get_url.text,"html.parser")
try:
table = soup.find('table',attrs= {'border': '0', 'width': '270' , 'cellpadding': '4', 'class': 'desktop'})
table2 = soup.find('table', attrs= {'border': '0', 'width': '600', 'cellpadding': '0', 'cellspacing': '0'})
name=table2.find('td', attrs={'width': '125'})
nm= name.text
res= " ".join(nm.split())
play={}
namedic = {'Name': res}
play.update(namedic)
rows = table.findAll('tr')
for row in rows:
cells= row.findAll('td')
try:
a=" ".join(cells[0].text.split())
b=" ".join(cells[1].text.split())
a = remove_char(a)
print(a)
if(not b):
b = 0
if b!= 'N/A':
updict = {a : b}
else:
updict = {a : np.nan}
except:
updict = {a : "na"}
play.update(updict)
return play
except:
return()
base_url = "http://howstat.com/cricket/Statistics/IPL/PlayerOverview.asp?PlayerID="
result = pd.DataFrame()
num=0
player_id=['3916', '4002', '4331', '5863', '3694', '3180', '4756', '4191', '2159', '4104', '4658', '4059', '4539', '5859', '4131', '4271', '4826', '4138', '3236', '4205', '3983', '4134', '4136', '4106', '5019', '4940', '5858', '4155', '4119', '4120', '4777', '5970', '4933', '4121', '4170', '4107', '4556', '3788', '4749', '4146', '4673', '3982', '2113', '6538', '3959', '3608', '3929', '3900', '3057', '2999', '5660', '3739', '4663', '4659', '6528', '4655', '3951', '4705', '4384', '4054', '4211', '4040', '4409', '4159', '4428', '4089', '4041', '4075', '3757', '4152', '4055', '4061', '4511', '3631', '2996', '3460', '4158', '3329', '2116', '3924', '3911', '3908', '3166', '3703', '6517', '4062', '3888', '4726', '4394', '4169', '4063', '4557', '4670', '4380', '4030', '1979', '4091', '4092', '3351', '4157', '4042', '3136', '4779', '4043', '6507', '3766', '3067', '2973', '5924', '4093', '4249', '4001', '3909', '4787', '4693', '3995', '4739', '3973', '4056', '4176', '5332', '6537', '3936', '3988', '3927', '4696', '3241', '6533', '3724', '5849', '4207', '2668', '4064', '3832', '3243', '2197', '3740', '4180', '4190', '4195', '2060', '3847', '4171', '4945', '4559', '3208', '4337', '4074', '3125', '6082', '3930', '4185', '5955', '3643', '4609', '2223', '3845', '1977', '2148', '2263', '4162', '4931', '3107', '4188', '2059', '5866', '6203', '4032', '3159', '2217', '3615', '4161', '2080', '2192', '4206', '4139', '4076', '3569', '4378', '4094', '4668', '5853', '3633', '4212', '4307', '2707', '3887', '4558', '2138', '4045', '4095', '4935', '4077', '4209', '3638', '3662', '3831', '1976', '3799', '4386', '3679', '4273', '3680', '4257', '4681', '3455', '3330', '2068', '3998', '4404', '3273', '3595', '3160', '3850', '4217', '3823', '4057', '4544', '4387', '5971', '4122', '4198', '3644', '4033', '2208', '4751', '4196', '5857', '4130', '5774', '5974', '4135', '0872', '2104', '4179', '3244', '3334', '4238', '4575', '2053', '4029', '2213', '2043', '3746', '4150', '3056', '4149', '3340', '4303', '3210', '2209', '5856', '2743', '4034', '4168', '4545', '4204', '2258', '6512', '4408', '2245', '3842', '4669', '4144', '5846', '4731', '4202', '4151', '3600', '4685', '3147', '4652', '4066', '3991', '3514', '6579', '0962', '4398', '4053', '5848', '4757', '2991', '3657', '2079', '2205', '2139', '3939', '4538', '4692', '4541', '3781', '4108', '4193', '4244', '3171', '4145', '4079', '4694', '4203', '4096', '3178', '4046', '4080', '6527', '6534', '4772', '4701', '3912', '3561', '1098', '3478', '2995', '3628', '3969', '3984', '4825', '3008', '3685', '3632', '1133', '3735', '3736', '4097', '3545', '3787', '5949', '5969', '4780', '3783', '4567', '3839', '2262', '3108', '4186', '3345', '4850', '3767', '2964', '3245', '3101', '3665', '3993', '4675', '4773', '4753', '4026', '3416', '3162', '3451', '3989', '4140', '4714', '3864', '4208', '3985', '1234', '4081', '4414', '4411', '4035', '4036', '4766', '4392', '4123', '4936', '4173', '3697', '4164', '3922', '4183', '4664', '4543', '3704', '3986', '4038', '2168', '2970', '4201', '4636', '4200', '3531', '4929', '2137', '3789', '3574', '2811', '4949', '4948', '3725', '4082', '4083', '4399', '4552', '3790', '4542', '4943', '4124', '4189', '3642', '4311', '4310', '4125', '3352', '4147', '3035', '5968', '4197', '3156', '3550', '6613', '5980', '4167', '3854', '4742', '4194', '4175', '3994', '3747', '3049', '5860', '4647', '3238', '3331', '3465', '2039', '3691', '2041', '4587', '5850', '3164', '4605', '4937', '5979', '4109', '4930', '4110', '4589', '2201', '4939', '3826', '3974', '4339', '3889', '4137', '3287', '5509', '4133', '4216', '3149', '6570', '4400', '4111', '3696', '4484', '4942', '4154', '5851', '4112', '4085', '4069', '4113', '4650', '3996', '4199', '3611', '4000', '3573', '4321', '4775', '4325', '3836', '4838', '3532', '4098', '3759', '4405', '6535', '4047', '4656', '5978', '3132', '4141', '3106', '4114', '3228', '5847', '4099', '2255', '4048', '4126', '4374', '2226', '4058', '4667', '4429', '3907', '4116', '4174', '4172', '2220', '4049', '4070', '4767', '2878', '4732', '6544', '3928', '2974', '4132', '5861', '2152', '5976', '4178', '4177', '3426', '4562', '4027', '3470', '4554', '6539', '4115', '4028', '3923', '3474', '4759', '5662', '4768', '4377', '4160', '2124', '2879', '6520', '4755', '4729', '4769', '2880', '4100', '2882', '3447', '4101', '4555', '6523', '4665', '3332', '4657', '6509', '3354', '3157', '3014', '4748', '3756', '4243', '2888', '3502', '4127', '6508', '3542', '4512', '3327', '2893', '4184', '4156', '4626', '3830', '3242', '4389', '3883', '6607', '2899', '4401', '4210', '4086', '4192', '4213', '4050', '4071', '4549', '4128', '2907', '2975', '3319', '4166', '4118', '4072', '3122', '3348', '3407', '1735', '4379', '4393', '6178', '3824', '3700', '4822', '4143', '3126', '6522', '3530', '3833', '6510', '4679', '4102', '4648', '3743', '4383', '3699', '3100', '6204', '3838', '4181', '4087', '3355', '2000', '4051', '4817', '3658', '3339', '3129', '4938', '3288', '2090', '4163', '4117', '3626', '4165', '4513', '3782', '5865', '4678', '3463', '4052', '3910', '4088', '1856', '3637', '4684', '3017', '3325', '4228', '4420', '3817', '3846', '4421', '3499', '4142', '4390', '5977', '4073', '3786', '4103', '4153', '4148', '2211', '2949', '4527', '4129', '2095']
for x in player_id:
url=base_url+str(x)
stats = player_stat(url)
if stats:
df1=pd.Dataframe(stats, index=[num])
num=num+1
pd.set_option('display.max_columns', None)
results = result.drop(['Batting', 'Fielding', 'Bowling'], axis=0)
results = result
results.drop(results.columns[16],axis=1, inplace=True)
result1=results
result1.fillna("0", inplace=True)
result.head()
print(len(result1.columns))
print(len(result1))
data= result.to_csv('ssssample.csv', index=False)
but I get this error: "['Batting' 'Fielding' 'Bowling'] not found in axis"
can someone tell me how to clear this error or how to modify this code to make it scrape the players data from the website
What I have tried:
i tried to resolve the problem but i couldnt