In [1]:
# Import all libraries needed for the tutorial
# General syntax to import specific functions in a library:
##from (library) import (specific library function)
from pandas import DataFrame, read_csv
# General syntax to import a library but no functions:
##import (library) as (give the library a nickname/alias)
import matplotlib.pyplot as plt
import pandas as pd #this is how I usually import pandas
import sys #only needed to determine Python version number
import matplotlib #only needed to determine Matplotlib version number
# Enable inline plotting
%matplotlib inline
In [2]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)
In [3]:
# The inital set of baby names and bith rates
names = ['Bob','Jessica','Mary','John','Mel']
births = [968, 155, 77, 578, 973]
In [4]:
zip?
In [5]:
BabyDataSet = list(zip(names,births))
BabyDataSet
Out[5]:
In [6]:
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])
df
Out[6]:
In [7]:
df.to_csv?
In [8]:
df.to_csv('births1880.csv',index=False,header=False)
In [9]:
read_csv?
In [10]:
Location = r'C:\Users\david\notebooks\update\births1880.csv'
df = pd.read_csv(Location)
In [11]:
df
Out[11]:
In [12]:
df = pd.read_csv(Location, header=None)
df
Out[12]:
In [13]:
df = pd.read_csv(Location, names=['Names','Births'])
df
Out[13]:
In [14]:
import os
os.remove(Location)
In [15]:
# Check data type of the columns
df.dtypes
Out[15]:
In [16]:
# Check data type of Births column
df.Births.dtype
Out[16]:
In [17]:
# Method 1:
Sorted = df.sort_values(['Births'], ascending=False)
Sorted.head(1)
Out[17]:
In [18]:
# Method 2:
df['Births'].max()
Out[18]:
In [19]:
# Create graph
df['Births'].plot()
# Maximum value in the data set
MaxValue = df['Births'].max()
# Name associated with the maximum value
MaxName = df['Names'][df['Births'] == df['Births'].max()].values
# Text to display on graph
Text = str(MaxValue) + " - " + MaxName
# Add text to graph
plt.annotate(Text, xy=(1, MaxValue), xytext=(8, 0),
xycoords=('axes fraction', 'data'), textcoords='offset points')
print("The most popular name")
df[df['Births'] == df['Births'].max()]
#Sorted.head(1) can also be used
Out[19]:
No comments:
Post a Comment