Dre1k23 · April 22, 2024 14:59 · Dre1k23 · Apr 20, 2024
diff --git a/for data processing b/for data processing
 #Convert all attributes to lowercase
 (
  df = df.map(lambda x : x.lower() if isinstance(x, str) else x)
 )

 #Assign the correct data format to the attributes that need it.
 (
  df['your column name'] = pd.to_datetime(df['your column name'])
  dfq = df
 )

 #Let's reduce the attributes to one data type
 (
  column_factorize = df.select_dtypes(include = 'object')

  df2 = column_factorize.apply(lambda x: pd.factorize(x)[0])
  df = pd.concat([df2, df[['your column name', 'your column name']]], axis = 1)
  df
 )
diff --git a/for work with python b/for work with python
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    import warnings 
    warnings.filterwarnings('ignore')
diff --git a/Working with null values b/Working with null values
 #we need to chek how many nulls we have
 (
  df.isnull().sum()
 )

 #We can also determine critical values from missing values
 (
  critical_nulls = 0.3

  missing_ratios = dfq.isnull().mean()
  critical_columns = missing_ratios[missing_ratios > critical_nulls]

  if not critical_columns.empty:
      print("Critical column:")
      print(critical_columns)
  else:
      print("No critical columns.")
 )

 #To estimate the error with the permissible number of missing values, we can use the following
 (
  df.describe(include = "all")
 )
	#Convert all attributes to lowercase
	(
	df = df.map(lambda x : x.lower() if isinstance(x, str) else x)
	)

	#Assign the correct data format to the attributes that need it.
	(
	df['your column name'] = pd.to_datetime(df['your column name'])
	dfq = df
	)

	#Let's reduce the attributes to one data type
	(
	column_factorize = df.select_dtypes(include = 'object')

	df2 = column_factorize.apply(lambda x: pd.factorize(x)[0])
	df = pd.concat([df2, df[['your column name', 'your column name']]], axis = 1)
	df
	)
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import warnings
	warnings.filterwarnings('ignore')
	#we need to chek how many nulls we have
	(
	df.isnull().sum()
	)

	#We can also determine critical values from missing values
	(
	critical_nulls = 0.3

	missing_ratios = dfq.isnull().mean()
	critical_columns = missing_ratios[missing_ratios > critical_nulls]

	if not critical_columns.empty:
	print("Critical column:")
	print(critical_columns)
	else:
	print("No critical columns.")
	)

	#To estimate the error with the permissible number of missing values, we can use the following
	(
	df.describe(include = "all")
	)