I've written a python code to check the unique values of the specified column names from a pandas dataframe.
What I have tried:
Main Code:
Checking the unique values & the frequency of their occurence
def uniq_fun(df, col_name):
try:
logging.info(f"Calculating the unique values and frequency for column '{col_name}'...")
uniq_freq = df[col_name].value_counts(ascending=False)
logging.info(f"Returning unique values and frequency for column '{col_name}' in a dictionary.")
return uniq_freq.to_dict()
except KeyError as e:
logging.error(f"Column '{col_name}' not found in the dataframe: {e}")
except Exception as e:
logging.error(f"An unexpected error occurred while calculating the unique values and frequency for column '{col_name}': {e}")
def cleaning_and_analysis(filepath, operation=None, raw_data=None):
file_extension = os.path.splitext(filepath)[1]
if file_extension == '.csv':
csv_dataframe = pd.read_csv(filepath, encoding='utf-8')
result = {}
if raw_data is not None and isinstance(raw_data, list):
for i in raw_data:
if operation=='uniqueValueFreq':
unique_value_freq_input= i
result12= uniq_fun(csv_dataframe, unique_value_freq_input)
if not result:
result[operation] = [(tuple(unique_value_freq_input), result12)]
else:
result[operation].append((tuple(unique_value_freq_input), result12))
API Code:
from typing import Union
from typing import Optional
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel
from Data_Cleaning import cleaning_and_analysis
from typing import Any, Optional
app = FastAPI()
class Item(BaseModel):
filepath: str
operation: Optional[str] = None
operand: Optional[Union[list, dict, str, tuple]] = None
@app.post("/data_cleaning_route")
async def data_cleaning(item: Item):
filepath = item.filepath
operation_name = item.operation
operands = item.operand
cleaned_data = {} # initialize cleaned_data to an empty dictionary
if operands is not None and len(operands) >= 1:
for operand in operands:
if isinstance(operand, dict): # check if i is a dictionary
for operation_name,operand_values in operand.items():
cleaned_data.update(cleaning_and_analysis(filepath, operation=operation_name, raw_data=operand_values))
return cleaned_data
else:
data_clean_var2 = cleaning_and_analysis(filepath=item.filepath, operation=item.operation)
#print(data_clean_var2)
return data_clean_var2
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
Postman Payload:
{
"filepath": "C:/Downloads/shootings.csv",
"operand":[ {"uniqueValueFreq":[["flee"], ["race"]]}
]
}
Error:
TypeError: unhashable type: 'list'
What I've tried:
I converted the highlighted part from
Main Code which was previously a dictionary, to a tuple. The same error continues. How do I fix this? The error is exactly at:
result[operation] = [(tuple(unique_value_freq_input), result12)]
Debugging Info:
API debugging
print(filepath)
print(operation_name)
print(operand_values)
C:/shootings.csv
uniqueValueFreq
[['flee'], ['race']]
As we can see above, the API is able to read the postman inputs.
Main Code debugging:
print(result12)
Output:
{('White',): 2476, ('Black',): 1298, ('Hispanic',): 902, ('Asian',): 93, ('Native',): 78, ('Other',): 48}
This is the expected output. Not getting this when returning via API.
Note: Please let me know if any more inputs are needed to answer my query.