I have created a function that removes the atypical values on the column of a dataframe by passing the df and the column as parameters:
import numpy as np
def outlier(df, col_name):
q1 = np.percentile(np.array(df[col_name].tolist()), 25)
q3 = np.percentile(np.array(df[col_name].tolist()), 75)
IQR = q3 - q1
Q3 = q1+(3*IQR)
Q1 = q3-(3*IQR)
outlier_num = 0
for value in df[col_name].values.tolist():
if (value < Q1) | (value > Q3):
outlier_num +=1
return Q1, Q3, outlier_num
The problem is when trying to pass the parameters:
df_covtype = df_covtype[(df_covtype['column_name'] > outlier(df_covtype, 'column_name')[0]) &
(df_covtype['colum_name'] < outlier(df_covtype, 'column_name')[1])]
It tells me the following:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-122-e4962bb5c2b0> in <module>()
----> 1 df_covtype = df_covtype[(df_covtype['column_name'] > outlier(df_covtype, 'column_name')[0]) &
2 (df_covtype['column_name'] < outlier(df_covtype, 'column_name')[1])]
3 df_covtype.shape
1 frames
<ipython-input-119-f1e12f2fd893> in outlier(df, col_name)
2 import numpy as np
3 def outlier(df, col_name):
----> 4 q1 = np.percentile(np.array(df[col_name].tolist()), 25)
5 q3 = np.percentile(np.array(df[col_name].tolist()), 75)
6 IQR = q3 - q1
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in __getattr__(self, name)
5485 ):
5486 return self[name]
-> 5487 return object.__getattribute__(self, name)
5488
5489 def __setattr__(self, name: str, value) -> None:
AttributeError: 'DataFrame' object has no attribute 'tolist'
If anyone can give me a hand, I'd appreciate it. Greetings and thank you