# import pandas as pd
import pandas as pd
# import numpy as np
import numpy as np
# simple array
v = np.array([1,2,3,65,765])
data=v
print("Content of", type(v), "is:",v)
print("VALUE AT POS1 in array is:",(v[0]))
data = pd.Series([1,2,3,65,765])
#data=pd.Series([1,2,3,65,765],index=['a', 'b', 'c', 'd','e'])
#Creating data series
ser = pd.Series(data) ###Series function is used to create a data series
print("Series content is: \n", ser)
print("1st element of a dataseries is:", ser[0])
#retrieve the first N elements of series
print(ser[:3])
"""NOTE: Default index starts from 0"""
### EXPLICITLY ASSIGNING INDEX
ser1 = pd.Series([1,2,3,4,5],index=[11,12,13,14,15])
print(ser1)
ser = pd.Series(data,index=['a','b','c','d','e'])
print(ser)
newser = pd.Series(data,index=range(1,6))
print(newser)
"""NOTE: Data series is a single dimensional datatype holding heterogeneous data.
The axis labels are collectively referred to as the index."""
import pandas as pd
D1 = {"Numbers": [0,1,2,3,4]}
print(type(D1),D1)
ds = pd.Series(D1)
print(type(ds),ds)
D2 = {'Quarter':['Q1','Q2','Q3','Q4'], 'Sold':[100,120,90,150]}
print(D1)
df=pd.DataFrame(D2)
print(df)
print(df['Quarter'])
print(df[ ['Quarter'] ])
print(df['Sold'])
df['Sold'] > 100
df[df['Sold'] <110]
data = pd.Series([1,2,3,65,765], index=['a', 'b', 'c', 'd','e'])
data
"""Before importing pandas it needs to be installed using command pip install pandas."""
import pandas as pd
import numpy as np
v1=[1,2,3,65,765] ##LIST
#v1 = np.array([1,2,3,65,765]) ###ARRAY
data = pd.Series(v1)
print(data)
#Creating data series
ser = pd.Series(data) ###Series function is used to create a data series
print("Series content is: \n", ser)
print("1st element of a dataseries is:", ser[0])
#retrieve the first N elements of series
print(ser[:3])
"""NOTE: Default index starts from 0"""
### EXPLICITLY ASSIGNING INDEX
ser = pd.Series(v1,index=[11,12,13,14,15])
#ser = pd.Series(v1,index=['a','b','c','d','e'])
print(ser)
newser = pd.Series(data,index=range(0,5))
print(newser)
"""NOTE: Data series is a single dimensional datatype holding heterogeneous data.
The axis labels are collectively referred to as the index."""
# importing pandas module
import pandas as pd
### reading csv file
df = pd.read_csv("WBCDdata.csv")
"""read_csv() automatically converts the file content into a 2D dataframe(table)"""
### Extracting country column(series) from the dataframe df
###Indexing a Series using indexing operator [] :
ser = pd.Series(df['radius_mean'])
### Reading first 20 instances of the series
data = ser.head(10)
print(data)
# using indexing operator
print(data[3:6])
# using .iloc() function ###implicit indexing
print(data.iloc[3:6])
####PERFORMING OPERATIONS ON SERIES DATA
# importing pandas module
import pandas as pd
# creating a series
data1 = pd.Series([5, 2, 3,7], index=['a', 'b', 'c', 'd'])
# creating a series
data2 = pd.Series([1, 6, 4, 9], index=['a', 'b', 'd', 'e'])
print("data2 details \n", data2, "\n\n", "data1 details \n", data1)
print(data1+data2)
#### Handling nan
data1.add(data2, fill_value=0)
### DATATYPE CONVERSION OPERATIONS
# importing pandas module
import pandas as pd
# reading csv file from url
data = pd.read_csv("WBCDdata.csv")
# dropping null value columns to avoid errors
data.dropna(inplace = True)
# storing dtype before converting
before = data.dtypes
### converting dtypes using astype
data["diagnosis"]= data["diagnosis"].astype(str)
data["radius_mean"]= data["radius_mean"].astype(str)
# storing dtype after converting
after = data.dtypes
# printing to compare
print("BEFORE CONVERSION\n", before, "\n")
print("AFTER CONVERSION\n", after, "\n")
### Converting to list
# converting to list
country_list = data["diagnosis"].tolist()
country_list
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
x=range(1,1000)
print("original array:", x)
ts = pd.Series(x)
ts = ts.cumsum()
print("data series", ts)
ts.plot()
"""Indexing means referring to an element of an iterable by its position within the iterable. “Slicing” means getting a subset of elements from an iterable based on their indices"""
import pandas as pd
data = pd.Series([0.25, 0.5, 0.75, 1.0, 1.25, 1.5], index=['a', 'b', 'c', 'd', 'e', 'f'])
#ind = pd.Index([1, 3, 5, 7])
#data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[ind])
data
import pandas as pd
r=('a', 'b', 'c', 'd', 'e', 'f') #Tuple
print(type(r))
c=[0.25, 0.5, 0.75, 1.0, 2, 1.75] #List of elements
print(type(c))
data = pd.Series(c, index=r)
print("original dataseries is: \n", data)
s3 = pd.Series([1.25], index=['g'])
"""You can also include more than one new elements into your data series (as given in line9).
Also you can create index using Index constructor (as given in line10)."""
#my_indx = pd.Index(['g','h','i','j'])
#s3 = pd.Series([40,50,60,70], index=[my_indx])
res=data.append(s3)
print("Appended value is:",s3)
print("Dataseries after appending is\n",res)
print(" result in sorted order is:",res.sort_values(ascending=True))##Check with False
print(data)
print(data['b'])
#print(data[3])
print(data[1]) #implicit
"""Please note that output of print(data['b']) and print(data[1]) is same but the former uses explicit/userdefined indexing
where as the later uses implicit indexing scheme which begins with zeroth index."""
###Checking for presence of explicit row indices in pandas
#print(1 in data)
print('b' in data)
print(data.keys())
###Checking for presence of implicit row indices in pandas
print(2 in data)
####Check what happens if you uncomment the next line
#ind[1]
print(data.values)
data.values[0] #indexing
# using indexing operator
print(data['b':'d'])
# using .iloc() function ###implicit indexing
print(data[1:2])
# using .iloc() function ###explicit indexing using loc (location)
print(data.loc['b'])
# using .iloc() function ###implicit indexing (indexed location)
print(data.iloc[1:2])