Command to open jupiter
python -m jupyter lab
Read the file
import pandas as pd
# Specify the path to your CSV file
csv_file_path = 'path/to/your/file.csv'
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
# Display the DataFrame
print(df)
# Display the first 5 rows of the DataFrame
print(df.head())
# Display specific columns
print(df[['Column1', 'Column2']])
------------------------------------------------------------------------------------------
#install
pip install pandas
----------------------------------------------------------------------------
histogram
import matplotlib.pyplot as plt
import pandas as pd
# Specify the path to your CSV file
csv_file_path = 'manish.csv'
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
# Choose the column for which you want to create a histogram
selected_column = 'Number of employees'
# Create a histogram
plt.hist(df[selected_column], bins=10, color='blue', edgecolor='black')
# Customize the plot
plt.title('Histogram of {}'.format(selected_column))
plt.xlabel(selected_column)
plt.ylabel('Frequency')
# Show the plot
plt.show()
---------------------------------------------------------------------------------------------------------------
DATA CLEANINIG
import pandas as pd
# Specify the path to your CSV file
csv_file_path = 'manish.csv'
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
# Check for missing values
print(df.isnull().sum())
# Drop rows with missing values
df_cleaned = df.dropna()
# Alternatively, fill missing values with a specific value
# df_cleaned = df.fillna(value)
df_cleaned = df.drop_duplicates()
No comments:
Post a Comment