# import pandas as pd
import pandas as pd
import numpy as np
## array of numbers
lst=[11,23,54,65, 75]
print("list content", type(lst),lst)
arr=np.array([11,23,54,65, 75])
print("array content", type(arr),arr)
# Calling DataFrame constructor on list
df = pd.DataFrame(arr)
print(df)
list content <class 'list'> [11, 23, 54, 65, 75]
array content <class 'numpy.ndarray'> [11 23 54 65 75]
0
0 11
1 23
2 54
3 65
4 75
#####Creating Data Frame using array
# Import pandas library
import pandas as pd
# initialize list of lists
data = [540095,600005,560029,800023]
# Create the pandas Data Frame with column name
df = pd.DataFrame(data, columns = ['Pincode'])
# print data frame.
print(df)
# Declare a list of cities that is to be converted into a column
Place = ['Delhi', 'Patna','Bangalore', 'Chennai' ]
# Declare a list of city type that is to be converted into a column
PLACE_TYPE = ['Urban','Suburb', 'Urban', 'Suburb']
# Using 'Addr' as the column name
# and equating it to the list
df['PLACE'] = Place
df['PLACE_TYPE'] = PLACE_TYPE
df.rename(columns = {'Pincode':'PINCODE'}, inplace = True)
print("df contains the following content:\n",df)
print("df Sorted according to PLACE:\n",df.sort_values(by=['PLACE']))
print("df Sorted according to PLACE_TYPE followed by PINCODE: \n",df.sort_values(by=['PLACE_TYPE','PINCODE']))
Pincode
0 540095
1 600005
2 560029
3 800023
df contains the following content:
PINCODE PLACE PLACE_TYPE
0 540095 Delhi Urban
1 600005 Patna Suburb
2 560029 Bangalore Urban
3 800023 Chennai Suburb
df Sorted according to PLACE:
PINCODE PLACE PLACE_TYPE
2 560029 Bangalore Urban
3 800023 Chennai Suburb
0 540095 Delhi Urban
1 600005 Patna Suburb
df Sorted according to PLACE_TYPE followed by PINCODE:
PINCODE PLACE PLACE_TYPE
1 600005 Patna Suburb
3 800023 Chennai Suburb
0 540095 Delhi Urban
2 560029 Bangalore Urban
import pandas as pd
# intialise data of lists.
data = {'Name':['Satyug', 'Tretayug', 'Dwaparyug', 'Kalyug'],
'Years':[1728000, 1296000, 864000 , 432000]}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
print(df)
Name Years 0 Satyug 1728000 1 Tretayug 1296000 2 Dwaparyug 864000 3 Kalyug 432000
#ADDING ROWS IN DATAFRAMES
#Importing pandas as pd
import pandas as pd
# Creating the first Dataframe using dictionary
df1 = pd.DataFrame({"a":[1, 2, 3, 4],
"b":[5, 6, 7, 8]})
# Creating the Second Dataframe using dictionary
df2 = pd.DataFrame({"a":[1, 2, 3],
"b":[5, 6, 7],
"c":[1, 5, 4]})
# for appending df2 at the end of df1
newdf=df1.append(df2, ignore_index = True)
print("df1", df1)
print("df2", df2)
print("newdf", newdf)
# for computing correlations
print("correlation: \n",df2.corr())
# computes numerical data ranks
print("Rank: \n", df2.rank())
df1 a b
0 1 5
1 2 6
2 3 7
3 4 8
df2 a B c
0 1 5 1
1 2 6 5
2 3 7 4
newdf B a b c
0 NaN 1 5.0 NaN
1 NaN 2 6.0 NaN
2 NaN 3 7.0 NaN
3 NaN 4 8.0 NaN
4 5.0 1 NaN 1.0
5 6.0 2 NaN 5.0
6 7.0 3 NaN 4.0
correlation:
a B c
a 1.000000 1.000000 0.720577
B 1.000000 1.000000 0.720577
c 0.720577 0.720577 1.000000
Rank:
a B c
0 1.0 1.0 1.0
1 2.0 2.0 3.0
2 3.0 3.0 2.0
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. sort=sort)
# Import pandas package
import pandas as pd
# Define a dictionary containing employee data
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
print("DF", df)
# select two columns
print(df[['Name', 'Qualification']])
DF Name Age Address Qualification
0 Jai 27 Delhi Msc
1 Princi 24 Kanpur MA
2 Gaurav 22 Allahabad MCA
3 Anuj 32 Kannauj Phd
Name Qualification
0 Jai Msc
1 Princi MA
2 Gaurav MCA
3 Anuj Phd
# importing pandas module
import pandas as pd
### making data frame
df = pd.read_csv("WBCDdata.csv")
#print(df)
df2 = pd.DataFrame(df)
#print(df2)
### Extracting country column(series) from the dataframe df
###Indexing a Series using indexing operator [] :
ser = pd.Series(df['diagnosis'])
### Reading first 20 instances of the series
data = ser.head(20)
data
0 M 1 M 2 M 3 M 4 M 5 M 6 M 7 M 8 M 9 M 10 M 11 M 12 M 13 M 14 M 15 M 16 M 17 M 18 M 19 B Name: diagnosis, dtype: object
#### SELECT ROWS AND COLUMNS FROM DATAFRAME (TABLE)
import pandas as pd
df = pd.read_csv("WBCDdata.csv")
#print(df)
df2 = pd.DataFrame(df)
#print(df2)
ser = pd.Series(df['diagnosis'])
# select two columns
df[['diagnosis', 'radius_mean']]
data = df.head(3)
print("data", data)
# retrieving rows by iloc method
row2 = data.iloc[1:3]
print("row2",row2)
# retrieving individual element by iloc method
row2 = data.iloc[0,0]
print("\n row2 for dat.iloc[0,0] \n", row2)
print("===============================================================")
# retrieving row by loc method
data = pd.read_csv("WBCDdata.csv", index_col ="diagnosis")
first = data.loc["M"]
print("Count of malignant",len(first))
#second = data.loc["B"]
print("1st",first, "\n\n\n")
"""DataFrame.loc[] is a method that takes only index labels and returns row or dataframe if the index label exists in the caller data frame."""
data id diagnosis radius_mean texture_mean perimeter_mean area_mean \
0 842302 M 17.99 10.38 122.8 1001.0
1 842517 M 20.57 17.77 132.9 1326.0
2 84300903 M 19.69 21.25 130.0 1203.0
smoothness_mean compactness_mean concavity_mean concave points_mean \
0 0.11840 0.27760 0.3001 0.14710
1 0.08474 0.07864 0.0869 0.07017
2 0.10960 0.15990 0.1974 0.12790
... texture_worst perimeter_worst area_worst smoothness_worst \
0 ... 17.33 184.6 2019.0 0.1622
1 ... 23.41 158.8 1956.0 0.1238
2 ... 25.53 152.5 1709.0 0.1444
compactness_worst concavity_worst concave points_worst symmetry_worst \
0 0.6656 0.7119 0.2654 0.4601
1 0.1866 0.2416 0.1860 0.2750
2 0.4245 0.4504 0.2430 0.3613
fractal_dimension_worst Unnamed: 32
0 0.11890 NaN
1 0.08902 NaN
2 0.08758 NaN
[3 rows x 33 columns]
row2 id diagnosis radius_mean texture_mean perimeter_mean area_mean \
1 842517 M 20.57 17.77 132.9 1326.0
2 84300903 M 19.69 21.25 130.0 1203.0
smoothness_mean compactness_mean concavity_mean concave points_mean \
1 0.08474 0.07864 0.0869 0.07017
2 0.10960 0.15990 0.1974 0.12790
... texture_worst perimeter_worst area_worst smoothness_worst \
1 ... 23.41 158.8 1956.0 0.1238
2 ... 25.53 152.5 1709.0 0.1444
compactness_worst concavity_worst concave points_worst symmetry_worst \
1 0.1866 0.2416 0.186 0.2750
2 0.4245 0.4504 0.243 0.3613
fractal_dimension_worst Unnamed: 32
1 0.08902 NaN
2 0.08758 NaN
[2 rows x 33 columns]
row2 for dat.iloc[0,0]
842302
===============================================================
Count of malignant 212
1st id radius_mean texture_mean perimeter_mean area_mean \
diagnosis
M 842302 17.99 10.38 122.80 1001.0
M 842517 20.57 17.77 132.90 1326.0
M 84300903 19.69 21.25 130.00 1203.0
M 84348301 11.42 20.38 77.58 386.1
M 84358402 20.29 14.34 135.10 1297.0
M 843786 12.45 15.70 82.57 477.1
M 844359 18.25 19.98 119.60 1040.0
M 84458202 13.71 20.83 90.20 577.9
M 844981 13.00 21.82 87.50 519.8
M 84501001 12.46 24.04 83.97 475.9
M 845636 16.02 23.24 102.70 797.8
M 84610002 15.78 17.89 103.60 781.0
M 846226 19.17 24.80 132.40 1123.0
M 846381 15.85 23.95 103.70 782.7
M 84667401 13.73 22.61 93.60 578.3
M 84799002 14.54 27.54 96.73 658.8
M 848406 14.68 20.13 94.74 684.5
M 84862001 16.13 20.68 108.10 798.8
M 849014 19.81 22.15 130.00 1260.0
M 8511133 15.34 14.26 102.50 704.4
M 851509 21.16 23.04 137.20 1404.0
M 852552 16.65 21.38 110.00 904.6
M 852631 17.14 16.40 116.00 912.7
M 852763 14.58 21.53 97.41 644.8
M 852781 18.61 20.25 122.10 1094.0
M 852973 15.30 25.27 102.40 732.4
M 853201 17.57 15.05 115.00 955.1
M 853401 18.63 25.11 124.80 1088.0
M 853612 11.84 18.70 77.93 440.6
M 85382601 17.02 23.98 112.80 899.3
... ... ... ... ... ...
M 9110127 18.03 16.85 117.50 990.0
M 9110732 17.75 28.03 117.30 981.6
M 911157302 21.10 20.52 138.10 1384.0
M 9111805 19.59 25.00 127.70 1191.0
M 911296201 17.08 27.15 111.20 930.9
M 911296202 27.42 26.27 186.90 2501.0
M 9113538 17.60 23.33 119.00 980.5
M 911916 16.25 19.51 109.80 815.8
M 913505 19.44 18.82 128.10 1167.0
M 913535 16.69 20.20 107.10 857.6
M 914062 18.01 20.56 118.40 1007.0
M 914769 18.49 17.52 121.30 1068.0
M 91485 20.59 21.24 137.80 1320.0
M 91504 13.82 24.49 92.33 595.9
M 915143 23.09 19.83 152.10 1682.0
M 915460 15.46 23.95 103.80 731.3
M 915691 13.40 20.52 88.64 556.7
M 91594602 15.05 19.07 97.26 701.9
M 916799 18.31 20.58 120.80 1052.0
M 916838 19.89 20.26 130.50 1214.0
M 91762702 24.63 21.60 165.50 1841.0
M 91930402 20.47 20.67 134.70 1299.0
M 919555 20.55 20.86 137.80 1308.0
M 91979701 14.27 22.55 93.77 629.8
M 925622 15.22 30.62 103.40 716.9
M 926125 20.92 25.09 143.00 1347.0
M 926424 21.56 22.39 142.00 1479.0
M 926682 20.13 28.25 131.20 1261.0
M 926954 16.60 28.08 108.30 858.1
M 927241 20.60 29.33 140.10 1265.0
smoothness_mean compactness_mean concavity_mean \
diagnosis
M 0.11840 0.27760 0.30010
M 0.08474 0.07864 0.08690
M 0.10960 0.15990 0.19740
M 0.14250 0.28390 0.24140
M 0.10030 0.13280 0.19800
M 0.12780 0.17000 0.15780
M 0.09463 0.10900 0.11270
M 0.11890 0.16450 0.09366
M 0.12730 0.19320 0.18590
M 0.11860 0.23960 0.22730
M 0.08206 0.06669 0.03299
M 0.09710 0.12920 0.09954
M 0.09740 0.24580 0.20650
M 0.08401 0.10020 0.09938
M 0.11310 0.22930 0.21280
M 0.11390 0.15950 0.16390
M 0.09867 0.07200 0.07395
M 0.11700 0.20220 0.17220
M 0.09831 0.10270 0.14790
M 0.10730 0.21350 0.20770
M 0.09428 0.10220 0.10970
M 0.11210 0.14570 0.15250
M 0.11860 0.22760 0.22290
M 0.10540 0.18680 0.14250
M 0.09440 0.10660 0.14900
M 0.10820 0.16970 0.16830
M 0.09847 0.11570 0.09875
M 0.10640 0.18870 0.23190
M 0.11090 0.15160 0.12180
M 0.11970 0.14960 0.24170
... ... ... ...
M 0.08947 0.12320 0.10900
M 0.09997 0.13140 0.16980
M 0.09684 0.11750 0.15720
M 0.10320 0.09871 0.16550
M 0.09898 0.11100 0.10070
M 0.10840 0.19880 0.36350
M 0.09289 0.20040 0.21360
M 0.10260 0.18930 0.22360
M 0.10890 0.14480 0.22560
M 0.07497 0.07112 0.03649
M 0.10010 0.12890 0.11700
M 0.10120 0.13170 0.14910
M 0.10850 0.16440 0.21880
M 0.11620 0.16810 0.13570
M 0.09342 0.12750 0.16760
M 0.11830 0.18700 0.20300
M 0.11060 0.14690 0.14450
M 0.09215 0.08597 0.07486
M 0.10680 0.12480 0.15690
M 0.10370 0.13100 0.14110
M 0.10300 0.21060 0.23100
M 0.09156 0.13130 0.15230
M 0.10460 0.17390 0.20850
M 0.10380 0.11540 0.14630
M 0.10480 0.20870 0.25500
M 0.10990 0.22360 0.31740
M 0.11100 0.11590 0.24390
M 0.09780 0.10340 0.14400
M 0.08455 0.10230 0.09251
M 0.11780 0.27700 0.35140
concave points_mean symmetry_mean ... texture_worst \
diagnosis ...
M 0.14710 0.2419 ... 17.33
M 0.07017 0.1812 ... 23.41
M 0.12790 0.2069 ... 25.53
M 0.10520 0.2597 ... 26.50
M 0.10430 0.1809 ... 16.67
M 0.08089 0.2087 ... 23.75
M 0.07400 0.1794 ... 27.66
M 0.05985 0.2196 ... 28.14
M 0.09353 0.2350 ... 30.73
M 0.08543 0.2030 ... 40.68
M 0.03323 0.1528 ... 33.88
M 0.06606 0.1842 ... 27.28
M 0.11180 0.2397 ... 29.94
M 0.05364 0.1847 ... 27.66
M 0.08025 0.2069 ... 32.01
M 0.07364 0.2303 ... 37.13
M 0.05259 0.1586 ... 30.88
M 0.10280 0.2164 ... 31.48
M 0.09498 0.1582 ... 30.88
M 0.09756 0.2521 ... 19.08
M 0.08632 0.1769 ... 35.59
M 0.09170 0.1995 ... 31.56
M 0.14010 0.3040 ... 21.40
M 0.08783 0.2252 ... 33.21
M 0.07731 0.1697 ... 27.26
M 0.08751 0.1926 ... 36.71
M 0.07953 0.1739 ... 19.52
M 0.12440 0.2183 ... 34.01
M 0.05182 0.2301 ... 28.12
M 0.12030 0.2248 ... 32.09
... ... ... ... ...
M 0.06254 0.1720 ... 22.02
M 0.08293 0.1713 ... 38.54
M 0.11550 0.1554 ... 32.07
M 0.09063 0.1663 ... 30.96
M 0.06431 0.1793 ... 34.49
M 0.16890 0.2061 ... 31.37
M 0.10020 0.1696 ... 28.87
M 0.09194 0.2151 ... 23.05
M 0.11940 0.1823 ... 30.39
M 0.02307 0.1846 ... 26.56
M 0.07762 0.2116 ... 26.06
M 0.09183 0.1832 ... 22.88
M 0.11210 0.1848 ... 30.76
M 0.06759 0.2275 ... 32.94
M 0.10030 0.1505 ... 23.87
M 0.08520 0.1807 ... 36.33
M 0.08172 0.2116 ... 29.66
M 0.04335 0.1561 ... 28.06
M 0.09451 0.1860 ... 26.20
M 0.09431 0.1802 ... 25.23
M 0.14710 0.1991 ... 26.93
M 0.10150 0.2166 ... 27.15
M 0.13220 0.2127 ... 25.48
M 0.06139 0.1926 ... 34.27
M 0.09429 0.2128 ... 42.79
M 0.14740 0.2149 ... 29.41
M 0.13890 0.1726 ... 26.40
M 0.09791 0.1752 ... 38.25
M 0.05302 0.1590 ... 34.12
M 0.15200 0.2397 ... 39.42
perimeter_worst area_worst smoothness_worst compactness_worst \
diagnosis
M 184.60 2019.0 0.1622 0.6656
M 158.80 1956.0 0.1238 0.1866
M 152.50 1709.0 0.1444 0.4245
M 98.87 567.7 0.2098 0.8663
M 152.20 1575.0 0.1374 0.2050
M 103.40 741.6 0.1791 0.5249
M 153.20 1606.0 0.1442 0.2576
M 110.60 897.0 0.1654 0.3682
M 106.20 739.3 0.1703 0.5401
M 97.65 711.4 0.1853 1.0580
M 123.80 1150.0 0.1181 0.1551
M 136.50 1299.0 0.1396 0.5609
M 151.70 1332.0 0.1037 0.3903
M 112.00 876.5 0.1131 0.1924
M 108.80 697.7 0.1651 0.7725
M 124.10 943.2 0.1678 0.6577
M 123.40 1138.0 0.1464 0.1871
M 136.80 1315.0 0.1789 0.4233
M 186.80 2398.0 0.1512 0.3150
M 125.10 980.9 0.1390 0.5954
M 188.00 2615.0 0.1401 0.2600
M 177.00 2215.0 0.1805 0.3578
M 152.40 1461.0 0.1545 0.3949
M 122.40 896.9 0.1525 0.6643
M 139.90 1403.0 0.1338 0.2117
M 149.30 1269.0 0.1641 0.6110
M 134.90 1227.0 0.1255 0.2812
M 160.50 1670.0 0.1491 0.4257
M 119.40 888.7 0.1637 0.5775
M 136.10 1344.0 0.1634 0.3559
... ... ... ... ...
M 133.30 1292.0 0.1263 0.2666
M 145.40 1437.0 0.1401 0.3762
M 168.20 2022.0 0.1368 0.3101
M 139.80 1421.0 0.1528 0.1845
M 152.10 1648.0 0.1600 0.2444
M 251.20 4254.0 0.1357 0.4256
M 143.60 1437.0 0.1207 0.4785
M 122.10 939.7 0.1377 0.4462
M 153.90 1740.0 0.1514 0.3725
M 127.30 1084.0 0.1009 0.2920
M 143.40 1426.0 0.1309 0.2327
M 146.40 1600.0 0.1412 0.3089
M 163.20 1760.0 0.1464 0.3597
M 106.00 788.0 0.1794 0.3966
M 211.50 2782.0 0.1199 0.3625
M 117.70 909.4 0.1732 0.4967
M 113.30 844.4 0.1574 0.3856
M 113.80 967.0 0.1246 0.2101
M 142.20 1493.0 0.1492 0.2536
M 160.50 1646.0 0.1417 0.3309
M 205.70 2642.0 0.1342 0.4188
M 152.00 1645.0 0.1097 0.2534
M 160.20 1809.0 0.1268 0.3135
M 104.30 728.3 0.1380 0.2733
M 128.70 915.0 0.1417 0.7917
M 179.10 1819.0 0.1407 0.4186
M 166.10 2027.0 0.1410 0.2113
M 155.00 1731.0 0.1166 0.1922
M 126.70 1124.0 0.1139 0.3094
M 184.60 1821.0 0.1650 0.8681
concavity_worst concave points_worst symmetry_worst \
diagnosis
M 0.7119 0.26540 0.4601
M 0.2416 0.18600 0.2750
M 0.4504 0.24300 0.3613
M 0.6869 0.25750 0.6638
M 0.4000 0.16250 0.2364
M 0.5355 0.17410 0.3985
M 0.3784 0.19320 0.3063
M 0.2678 0.15560 0.3196
M 0.5390 0.20600 0.4378
M 1.1050 0.22100 0.4366
M 0.1459 0.09975 0.2948
M 0.3965 0.18100 0.3792
M 0.3639 0.17670 0.3176
M 0.2322 0.11190 0.2809
M 0.6943 0.22080 0.3596
M 0.7026 0.17120 0.4218
M 0.2914 0.16090 0.3029
M 0.4784 0.20730 0.3706
M 0.5372 0.23880 0.2768
M 0.6305 0.23930 0.4667
M 0.3155 0.20090 0.2822
M 0.4695 0.20950 0.3613
M 0.3853 0.25500 0.4066
M 0.5539 0.27010 0.4264
M 0.3446 0.14900 0.2341
M 0.6335 0.20240 0.4027
M 0.2489 0.14560 0.2756
M 0.6133 0.18480 0.3444
M 0.6956 0.15460 0.4761
M 0.5588 0.18470 0.3530
... ... ... ...
M 0.4290 0.15350 0.2842
M 0.6399 0.19700 0.2972
M 0.4399 0.22800 0.2268
M 0.3977 0.14660 0.2293
M 0.2639 0.15550 0.3010
M 0.6833 0.26250 0.2641
M 0.5165 0.19960 0.2301
M 0.5897 0.17750 0.3318
M 0.5936 0.20600 0.3266
M 0.2477 0.08737 0.4677
M 0.2544 0.14890 0.3251
M 0.3533 0.16630 0.2510
M 0.5179 0.21130 0.2480
M 0.3381 0.15210 0.3651
M 0.3794 0.22640 0.2908
M 0.5911 0.21630 0.3013
M 0.5106 0.20510 0.3585
M 0.2866 0.11200 0.2282
M 0.3759 0.15100 0.3074
M 0.4185 0.16130 0.2549
M 0.4658 0.24750 0.3157
M 0.3092 0.16130 0.3220
M 0.4433 0.21480 0.3077
M 0.4234 0.13620 0.2698
M 1.1700 0.23560 0.4089
M 0.6599 0.25420 0.2929
M 0.4107 0.22160 0.2060
M 0.3215 0.16280 0.2572
M 0.3403 0.14180 0.2218
M 0.9387 0.26500 0.4087
fractal_dimension_worst Unnamed: 32
diagnosis
M 0.11890 NaN
M 0.08902 NaN
M 0.08758 NaN
M 0.17300 NaN
M 0.07678 NaN
M 0.12440 NaN
M 0.08368 NaN
M 0.11510 NaN
M 0.10720 NaN
M 0.20750 NaN
M 0.08452 NaN
M 0.10480 NaN
M 0.10230 NaN
M 0.06287 NaN
M 0.14310 NaN
M 0.13410 NaN
M 0.08216 NaN
M 0.11420 NaN
M 0.07615 NaN
M 0.09946 NaN
M 0.07526 NaN
M 0.09564 NaN
M 0.10590 NaN
M 0.12750 NaN
M 0.07421 NaN
M 0.09876 NaN
M 0.07919 NaN
M 0.09782 NaN
M 0.14020 NaN
M 0.08482 NaN
... ... ...
M 0.08225 NaN
M 0.09075 NaN
M 0.07425 NaN
M 0.06091 NaN
M 0.09060 NaN
M 0.07427 NaN
M 0.12240 NaN
M 0.09136 NaN
M 0.09009 NaN
M 0.07623 NaN
M 0.07625 NaN
M 0.09445 NaN
M 0.08999 NaN
M 0.11830 NaN
M 0.07277 NaN
M 0.10670 NaN
M 0.11090 NaN
M 0.06954 NaN
M 0.07863 NaN
M 0.09136 NaN
M 0.09671 NaN
M 0.06386 NaN
M 0.07569 NaN
M 0.08351 NaN
M 0.14090 NaN
M 0.09873 NaN
M 0.07115 NaN
M 0.06637 NaN
M 0.07820 NaN
M 0.12400 NaN
[212 rows x 32 columns]
'DataFrame.loc[] is a method that takes only index labels and returns row or dataframe if the index label exists in the caller data frame.'
# retrieving row by loc method
data = pd.read_csv("WBCDdata.csv", index_col ="diagnosis")
first = data.loc["M"]
print("Count of malignant",len(first), "\n ",first, "\n\n\n")
second = data.loc["B"]
print("Count of benign",len(second), "\n ",second, "\n\n\n")
Count of malignant 212
id radius_mean texture_mean perimeter_mean area_mean \
diagnosis
M 842302 17.99 10.38 122.80 1001.0
M 842517 20.57 17.77 132.90 1326.0
M 84300903 19.69 21.25 130.00 1203.0
M 84348301 11.42 20.38 77.58 386.1
M 84358402 20.29 14.34 135.10 1297.0
M 843786 12.45 15.70 82.57 477.1
M 844359 18.25 19.98 119.60 1040.0
M 84458202 13.71 20.83 90.20 577.9
M 844981 13.00 21.82 87.50 519.8
M 84501001 12.46 24.04 83.97 475.9
M 845636 16.02 23.24 102.70 797.8
M 84610002 15.78 17.89 103.60 781.0
M 846226 19.17 24.80 132.40 1123.0
M 846381 15.85 23.95 103.70 782.7
M 84667401 13.73 22.61 93.60 578.3
M 84799002 14.54 27.54 96.73 658.8
M 848406 14.68 20.13 94.74 684.5
M 84862001 16.13 20.68 108.10 798.8
M 849014 19.81 22.15 130.00 1260.0
M 8511133 15.34 14.26 102.50 704.4
M 851509 21.16 23.04 137.20 1404.0
M 852552 16.65 21.38 110.00 904.6
M 852631 17.14 16.40 116.00 912.7
M 852763 14.58 21.53 97.41 644.8
M 852781 18.61 20.25 122.10 1094.0
M 852973 15.30 25.27 102.40 732.4
M 853201 17.57 15.05 115.00 955.1
M 853401 18.63 25.11 124.80 1088.0
M 853612 11.84 18.70 77.93 440.6
M 85382601 17.02 23.98 112.80 899.3
... ... ... ... ... ...
M 9110127 18.03 16.85 117.50 990.0
M 9110732 17.75 28.03 117.30 981.6
M 911157302 21.10 20.52 138.10 1384.0
M 9111805 19.59 25.00 127.70 1191.0
M 911296201 17.08 27.15 111.20 930.9
M 911296202 27.42 26.27 186.90 2501.0
M 9113538 17.60 23.33 119.00 980.5
M 911916 16.25 19.51 109.80 815.8
M 913505 19.44 18.82 128.10 1167.0
M 913535 16.69 20.20 107.10 857.6
M 914062 18.01 20.56 118.40 1007.0
M 914769 18.49 17.52 121.30 1068.0
M 91485 20.59 21.24 137.80 1320.0
M 91504 13.82 24.49 92.33 595.9
M 915143 23.09 19.83 152.10 1682.0
M 915460 15.46 23.95 103.80 731.3
M 915691 13.40 20.52 88.64 556.7
M 91594602 15.05 19.07 97.26 701.9
M 916799 18.31 20.58 120.80 1052.0
M 916838 19.89 20.26 130.50 1214.0
M 91762702 24.63 21.60 165.50 1841.0
M 91930402 20.47 20.67 134.70 1299.0
M 919555 20.55 20.86 137.80 1308.0
M 91979701 14.27 22.55 93.77 629.8
M 925622 15.22 30.62 103.40 716.9
M 926125 20.92 25.09 143.00 1347.0
M 926424 21.56 22.39 142.00 1479.0
M 926682 20.13 28.25 131.20 1261.0
M 926954 16.60 28.08 108.30 858.1
M 927241 20.60 29.33 140.10 1265.0
smoothness_mean compactness_mean concavity_mean \
diagnosis
M 0.11840 0.27760 0.30010
M 0.08474 0.07864 0.08690
M 0.10960 0.15990 0.19740
M 0.14250 0.28390 0.24140
M 0.10030 0.13280 0.19800
M 0.12780 0.17000 0.15780
M 0.09463 0.10900 0.11270
M 0.11890 0.16450 0.09366
M 0.12730 0.19320 0.18590
M 0.11860 0.23960 0.22730
M 0.08206 0.06669 0.03299
M 0.09710 0.12920 0.09954
M 0.09740 0.24580 0.20650
M 0.08401 0.10020 0.09938
M 0.11310 0.22930 0.21280
M 0.11390 0.15950 0.16390
M 0.09867 0.07200 0.07395
M 0.11700 0.20220 0.17220
M 0.09831 0.10270 0.14790
M 0.10730 0.21350 0.20770
M 0.09428 0.10220 0.10970
M 0.11210 0.14570 0.15250
M 0.11860 0.22760 0.22290
M 0.10540 0.18680 0.14250
M 0.09440 0.10660 0.14900
M 0.10820 0.16970 0.16830
M 0.09847 0.11570 0.09875
M 0.10640 0.18870 0.23190
M 0.11090 0.15160 0.12180
M 0.11970 0.14960 0.24170
... ... ... ...
M 0.08947 0.12320 0.10900
M 0.09997 0.13140 0.16980
M 0.09684 0.11750 0.15720
M 0.10320 0.09871 0.16550
M 0.09898 0.11100 0.10070
M 0.10840 0.19880 0.36350
M 0.09289 0.20040 0.21360
M 0.10260 0.18930 0.22360
M 0.10890 0.14480 0.22560
M 0.07497 0.07112 0.03649
M 0.10010 0.12890 0.11700
M 0.10120 0.13170 0.14910
M 0.10850 0.16440 0.21880
M 0.11620 0.16810 0.13570
M 0.09342 0.12750 0.16760
M 0.11830 0.18700 0.20300
M 0.11060 0.14690 0.14450
M 0.09215 0.08597 0.07486
M 0.10680 0.12480 0.15690
M 0.10370 0.13100 0.14110
M 0.10300 0.21060 0.23100
M 0.09156 0.13130 0.15230
M 0.10460 0.17390 0.20850
M 0.10380 0.11540 0.14630
M 0.10480 0.20870 0.25500
M 0.10990 0.22360 0.31740
M 0.11100 0.11590 0.24390
M 0.09780 0.10340 0.14400
M 0.08455 0.10230 0.09251
M 0.11780 0.27700 0.35140
concave points_mean symmetry_mean ... texture_worst \
diagnosis ...
M 0.14710 0.2419 ... 17.33
M 0.07017 0.1812 ... 23.41
M 0.12790 0.2069 ... 25.53
M 0.10520 0.2597 ... 26.50
M 0.10430 0.1809 ... 16.67
M 0.08089 0.2087 ... 23.75
M 0.07400 0.1794 ... 27.66
M 0.05985 0.2196 ... 28.14
M 0.09353 0.2350 ... 30.73
M 0.08543 0.2030 ... 40.68
M 0.03323 0.1528 ... 33.88
M 0.06606 0.1842 ... 27.28
M 0.11180 0.2397 ... 29.94
M 0.05364 0.1847 ... 27.66
M 0.08025 0.2069 ... 32.01
M 0.07364 0.2303 ... 37.13
M 0.05259 0.1586 ... 30.88
M 0.10280 0.2164 ... 31.48
M 0.09498 0.1582 ... 30.88
M 0.09756 0.2521 ... 19.08
M 0.08632 0.1769 ... 35.59
M 0.09170 0.1995 ... 31.56
M 0.14010 0.3040 ... 21.40
M 0.08783 0.2252 ... 33.21
M 0.07731 0.1697 ... 27.26
M 0.08751 0.1926 ... 36.71
M 0.07953 0.1739 ... 19.52
M 0.12440 0.2183 ... 34.01
M 0.05182 0.2301 ... 28.12
M 0.12030 0.2248 ... 32.09
... ... ... ... ...
M 0.06254 0.1720 ... 22.02
M 0.08293 0.1713 ... 38.54
M 0.11550 0.1554 ... 32.07
M 0.09063 0.1663 ... 30.96
M 0.06431 0.1793 ... 34.49
M 0.16890 0.2061 ... 31.37
M 0.10020 0.1696 ... 28.87
M 0.09194 0.2151 ... 23.05
M 0.11940 0.1823 ... 30.39
M 0.02307 0.1846 ... 26.56
M 0.07762 0.2116 ... 26.06
M 0.09183 0.1832 ... 22.88
M 0.11210 0.1848 ... 30.76
M 0.06759 0.2275 ... 32.94
M 0.10030 0.1505 ... 23.87
M 0.08520 0.1807 ... 36.33
M 0.08172 0.2116 ... 29.66
M 0.04335 0.1561 ... 28.06
M 0.09451 0.1860 ... 26.20
M 0.09431 0.1802 ... 25.23
M 0.14710 0.1991 ... 26.93
M 0.10150 0.2166 ... 27.15
M 0.13220 0.2127 ... 25.48
M 0.06139 0.1926 ... 34.27
M 0.09429 0.2128 ... 42.79
M 0.14740 0.2149 ... 29.41
M 0.13890 0.1726 ... 26.40
M 0.09791 0.1752 ... 38.25
M 0.05302 0.1590 ... 34.12
M 0.15200 0.2397 ... 39.42
perimeter_worst area_worst smoothness_worst compactness_worst \
diagnosis
M 184.60 2019.0 0.1622 0.6656
M 158.80 1956.0 0.1238 0.1866
M 152.50 1709.0 0.1444 0.4245
M 98.87 567.7 0.2098 0.8663
M 152.20 1575.0 0.1374 0.2050
M 103.40 741.6 0.1791 0.5249
M 153.20 1606.0 0.1442 0.2576
M 110.60 897.0 0.1654 0.3682
M 106.20 739.3 0.1703 0.5401
M 97.65 711.4 0.1853 1.0580
M 123.80 1150.0 0.1181 0.1551
M 136.50 1299.0 0.1396 0.5609
M 151.70 1332.0 0.1037 0.3903
M 112.00 876.5 0.1131 0.1924
M 108.80 697.7 0.1651 0.7725
M 124.10 943.2 0.1678 0.6577
M 123.40 1138.0 0.1464 0.1871
M 136.80 1315.0 0.1789 0.4233
M 186.80 2398.0 0.1512 0.3150
M 125.10 980.9 0.1390 0.5954
M 188.00 2615.0 0.1401 0.2600
M 177.00 2215.0 0.1805 0.3578
M 152.40 1461.0 0.1545 0.3949
M 122.40 896.9 0.1525 0.6643
M 139.90 1403.0 0.1338 0.2117
M 149.30 1269.0 0.1641 0.6110
M 134.90 1227.0 0.1255 0.2812
M 160.50 1670.0 0.1491 0.4257
M 119.40 888.7 0.1637 0.5775
M 136.10 1344.0 0.1634 0.3559
... ... ... ... ...
M 133.30 1292.0 0.1263 0.2666
M 145.40 1437.0 0.1401 0.3762
M 168.20 2022.0 0.1368 0.3101
M 139.80 1421.0 0.1528 0.1845
M 152.10 1648.0 0.1600 0.2444
M 251.20 4254.0 0.1357 0.4256
M 143.60 1437.0 0.1207 0.4785
M 122.10 939.7 0.1377 0.4462
M 153.90 1740.0 0.1514 0.3725
M 127.30 1084.0 0.1009 0.2920
M 143.40 1426.0 0.1309 0.2327
M 146.40 1600.0 0.1412 0.3089
M 163.20 1760.0 0.1464 0.3597
M 106.00 788.0 0.1794 0.3966
M 211.50 2782.0 0.1199 0.3625
M 117.70 909.4 0.1732 0.4967
M 113.30 844.4 0.1574 0.3856
M 113.80 967.0 0.1246 0.2101
M 142.20 1493.0 0.1492 0.2536
M 160.50 1646.0 0.1417 0.3309
M 205.70 2642.0 0.1342 0.4188
M 152.00 1645.0 0.1097 0.2534
M 160.20 1809.0 0.1268 0.3135
M 104.30 728.3 0.1380 0.2733
M 128.70 915.0 0.1417 0.7917
M 179.10 1819.0 0.1407 0.4186
M 166.10 2027.0 0.1410 0.2113
M 155.00 1731.0 0.1166 0.1922
M 126.70 1124.0 0.1139 0.3094
M 184.60 1821.0 0.1650 0.8681
concavity_worst concave points_worst symmetry_worst \
diagnosis
M 0.7119 0.26540 0.4601
M 0.2416 0.18600 0.2750
M 0.4504 0.24300 0.3613
M 0.6869 0.25750 0.6638
M 0.4000 0.16250 0.2364
M 0.5355 0.17410 0.3985
M 0.3784 0.19320 0.3063
M 0.2678 0.15560 0.3196
M 0.5390 0.20600 0.4378
M 1.1050 0.22100 0.4366
M 0.1459 0.09975 0.2948
M 0.3965 0.18100 0.3792
M 0.3639 0.17670 0.3176
M 0.2322 0.11190 0.2809
M 0.6943 0.22080 0.3596
M 0.7026 0.17120 0.4218
M 0.2914 0.16090 0.3029
M 0.4784 0.20730 0.3706
M 0.5372 0.23880 0.2768
M 0.6305 0.23930 0.4667
M 0.3155 0.20090 0.2822
M 0.4695 0.20950 0.3613
M 0.3853 0.25500 0.4066
M 0.5539 0.27010 0.4264
M 0.3446 0.14900 0.2341
M 0.6335 0.20240 0.4027
M 0.2489 0.14560 0.2756
M 0.6133 0.18480 0.3444
M 0.6956 0.15460 0.4761
M 0.5588 0.18470 0.3530
... ... ... ...
M 0.4290 0.15350 0.2842
M 0.6399 0.19700 0.2972
M 0.4399 0.22800 0.2268
M 0.3977 0.14660 0.2293
M 0.2639 0.15550 0.3010
M 0.6833 0.26250 0.2641
M 0.5165 0.19960 0.2301
M 0.5897 0.17750 0.3318
M 0.5936 0.20600 0.3266
M 0.2477 0.08737 0.4677
M 0.2544 0.14890 0.3251
M 0.3533 0.16630 0.2510
M 0.5179 0.21130 0.2480
M 0.3381 0.15210 0.3651
M 0.3794 0.22640 0.2908
M 0.5911 0.21630 0.3013
M 0.5106 0.20510 0.3585
M 0.2866 0.11200 0.2282
M 0.3759 0.15100 0.3074
M 0.4185 0.16130 0.2549
M 0.4658 0.24750 0.3157
M 0.3092 0.16130 0.3220
M 0.4433 0.21480 0.3077
M 0.4234 0.13620 0.2698
M 1.1700 0.23560 0.4089
M 0.6599 0.25420 0.2929
M 0.4107 0.22160 0.2060
M 0.3215 0.16280 0.2572
M 0.3403 0.14180 0.2218
M 0.9387 0.26500 0.4087
fractal_dimension_worst Unnamed: 32
diagnosis
M 0.11890 NaN
M 0.08902 NaN
M 0.08758 NaN
M 0.17300 NaN
M 0.07678 NaN
M 0.12440 NaN
M 0.08368 NaN
M 0.11510 NaN
M 0.10720 NaN
M 0.20750 NaN
M 0.08452 NaN
M 0.10480 NaN
M 0.10230 NaN
M 0.06287 NaN
M 0.14310 NaN
M 0.13410 NaN
M 0.08216 NaN
M 0.11420 NaN
M 0.07615 NaN
M 0.09946 NaN
M 0.07526 NaN
M 0.09564 NaN
M 0.10590 NaN
M 0.12750 NaN
M 0.07421 NaN
M 0.09876 NaN
M 0.07919 NaN
M 0.09782 NaN
M 0.14020 NaN
M 0.08482 NaN
... ... ...
M 0.08225 NaN
M 0.09075 NaN
M 0.07425 NaN
M 0.06091 NaN
M 0.09060 NaN
M 0.07427 NaN
M 0.12240 NaN
M 0.09136 NaN
M 0.09009 NaN
M 0.07623 NaN
M 0.07625 NaN
M 0.09445 NaN
M 0.08999 NaN
M 0.11830 NaN
M 0.07277 NaN
M 0.10670 NaN
M 0.11090 NaN
M 0.06954 NaN
M 0.07863 NaN
M 0.09136 NaN
M 0.09671 NaN
M 0.06386 NaN
M 0.07569 NaN
M 0.08351 NaN
M 0.14090 NaN
M 0.09873 NaN
M 0.07115 NaN
M 0.06637 NaN
M 0.07820 NaN
M 0.12400 NaN
[212 rows x 32 columns]
Count of benign 357
id radius_mean texture_mean perimeter_mean area_mean \
diagnosis
B 8510426 13.540 14.36 87.46 566.3
B 8510653 13.080 15.71 85.63 520.0
B 8510824 9.504 12.44 60.34 273.9
B 854941 13.030 18.42 82.61 523.8
B 85713702 8.196 16.84 51.71 201.9
B 857155 12.050 14.63 78.04 449.3
B 857156 13.490 22.30 86.91 561.0
B 857343 11.760 21.60 74.72 427.9
B 857373 13.640 16.34 87.21 571.8
B 857374 11.940 18.24 75.71 437.6
B 85759902 11.520 18.75 73.34 409.0
B 857810 13.050 19.31 82.61 527.2
B 858477 8.618 11.79 54.34 224.5
B 858970 10.170 14.88 64.55 311.9
B 858981 8.598 20.98 54.66 221.8
B 859196 9.173 13.86 59.20 260.9
B 859464 9.465 21.01 60.11 269.4
B 859465 11.310 19.04 71.80 394.1
B 859471 9.029 17.33 58.79 250.5
B 859487 12.780 16.49 81.37 502.5
B 859711 8.888 14.64 58.79 244.0
B 8610175 12.310 16.52 79.19 470.9
B 8610629 13.530 10.94 87.91 559.2
B 8610908 12.860 18.00 83.19 506.3
B 861103 11.450 20.97 73.81 401.5
B 8611161 13.340 15.86 86.49 520.0
B 8612080 12.000 15.65 76.95 443.3
B 861597 12.360 21.80 79.78 466.1
B 861598 14.640 15.24 95.77 651.9
B 861648 14.620 24.02 94.57 662.7
... ... ... ... ... ...
B 91858 11.750 17.56 75.89 422.9
B 91903901 11.670 20.02 75.21 416.2
B 91903902 13.680 16.33 87.76 575.5
B 919537 10.960 17.62 70.79 365.6
B 919812 11.690 24.44 76.37 406.4
B 921092 7.729 25.49 47.98 178.8
B 921362 7.691 25.44 48.34 170.4
B 921385 11.540 14.44 74.65 402.9
B 921386 14.470 24.99 95.81 656.4
B 921644 14.740 25.42 94.70 668.6
B 922296 13.210 28.06 84.88 538.4
B 922297 13.870 20.70 89.77 584.8
B 922576 13.620 23.23 87.19 573.2
B 922577 10.320 16.35 65.31 324.9
B 922840 10.260 16.58 65.85 320.8
B 923169 9.683 19.34 61.05 285.7
B 923465 10.820 24.21 68.89 361.6
B 923748 10.860 21.48 68.51 360.5
B 923780 11.130 22.44 71.49 378.4
B 924084 12.770 29.43 81.35 507.9
B 924342 9.333 21.94 59.01 264.0
B 924632 12.880 28.92 82.50 514.3
B 924934 10.290 27.61 65.67 321.4
B 924964 10.160 19.59 64.73 311.7
B 925236 9.423 27.88 59.26 271.3
B 925277 14.590 22.68 96.39 657.1
B 925291 11.510 23.93 74.52 403.5
B 925292 14.050 27.15 91.38 600.4
B 925311 11.200 29.37 70.67 386.0
B 92751 7.760 24.54 47.92 181.0
smoothness_mean compactness_mean concavity_mean \
diagnosis
B 0.09779 0.08129 0.066640
B 0.10750 0.12700 0.045680
B 0.10240 0.06492 0.029560
B 0.08983 0.03766 0.025620
B 0.08600 0.05943 0.015880
B 0.10310 0.09092 0.065920
B 0.08752 0.07698 0.047510
B 0.08637 0.04966 0.016570
B 0.07685 0.06059 0.018570
B 0.08261 0.04751 0.019720
B 0.09524 0.05473 0.030360
B 0.08060 0.03789 0.000692
B 0.09752 0.05272 0.020610
B 0.11340 0.08061 0.010840
B 0.12430 0.08963 0.030000
B 0.07721 0.08751 0.059880
B 0.10440 0.07773 0.021720
B 0.08139 0.04701 0.037090
B 0.10660 0.14130 0.313000
B 0.09831 0.05234 0.036530
B 0.09783 0.15310 0.086060
B 0.09172 0.06829 0.033720
B 0.12910 0.10470 0.068770
B 0.09934 0.09546 0.038890
B 0.11020 0.09362 0.045910
B 0.10780 0.15350 0.116900
B 0.09723 0.07165 0.041510
B 0.08772 0.09445 0.060150
B 0.11320 0.13390 0.099660
B 0.08974 0.08606 0.031020
... ... ... ...
B 0.10730 0.09713 0.052820
B 0.10160 0.09453 0.042000
B 0.09277 0.07255 0.017520
B 0.09687 0.09752 0.052630
B 0.12360 0.15520 0.045150
B 0.08098 0.04878 0.000000
B 0.08668 0.11990 0.092520
B 0.09984 0.11200 0.067370
B 0.08837 0.12300 0.100900
B 0.08275 0.07214 0.041050
B 0.08671 0.06877 0.029870
B 0.09578 0.10180 0.036880
B 0.09246 0.06747 0.029740
B 0.09434 0.04994 0.010120
B 0.08877 0.08066 0.043580
B 0.08491 0.05030 0.023370
B 0.08192 0.06602 0.015480
B 0.07431 0.04227 0.000000
B 0.09566 0.08194 0.048240
B 0.08276 0.04234 0.019970
B 0.09240 0.05605 0.039960
B 0.08123 0.05824 0.061950
B 0.09030 0.07658 0.059990
B 0.10030 0.07504 0.005025
B 0.08123 0.04971 0.000000
B 0.08473 0.13300 0.102900
B 0.09261 0.10210 0.111200
B 0.09929 0.11260 0.044620
B 0.07449 0.03558 0.000000
B 0.05263 0.04362 0.000000
concave points_mean symmetry_mean ... texture_worst \
diagnosis ...
B 0.047810 0.1885 ... 19.26
B 0.031100 0.1967 ... 20.49
B 0.020760 0.1815 ... 15.66
B 0.029230 0.1467 ... 22.81
B 0.005917 0.1769 ... 21.96
B 0.027490 0.1675 ... 20.70
B 0.033840 0.1809 ... 31.82
B 0.011150 0.1495 ... 25.72
B 0.017230 0.1353 ... 23.19
B 0.013490 0.1868 ... 21.33
B 0.022780 0.1920 ... 22.47
B 0.004167 0.1819 ... 22.25
B 0.007799 0.1683 ... 15.40
B 0.012900 0.2743 ... 17.45
B 0.009259 0.1828 ... 27.04
B 0.021800 0.2341 ... 19.23
B 0.015040 0.1717 ... 31.56
B 0.022300 0.1516 ... 23.84
B 0.043750 0.2111 ... 22.65
B 0.028640 0.1590 ... 19.76
B 0.028720 0.1902 ... 15.67
B 0.022720 0.1720 ... 23.21
B 0.065560 0.2403 ... 12.49
B 0.023150 0.1718 ... 24.82
B 0.022330 0.1842 ... 32.16
B 0.069870 0.1942 ... 23.19
B 0.018630 0.2079 ... 24.90
B 0.037450 0.1930 ... 30.50
B 0.070640 0.2116 ... 18.24
B 0.029570 0.1685 ... 29.11
... ... ... ... ...
B 0.044400 0.1598 ... 27.98
B 0.021570 0.1859 ... 28.81
B 0.018800 0.1631 ... 20.20
B 0.027880 0.1619 ... 26.51
B 0.045310 0.2131 ... 32.19
B 0.000000 0.1870 ... 30.92
B 0.013640 0.2037 ... 31.89
B 0.025940 0.1818 ... 19.68
B 0.038900 0.1872 ... 31.73
B 0.030270 0.1840 ... 32.29
B 0.032750 0.1628 ... 37.17
B 0.023690 0.1620 ... 24.75
B 0.024430 0.1664 ... 29.09
B 0.005495 0.1885 ... 21.77
B 0.024380 0.1669 ... 22.04
B 0.009615 0.1580 ... 25.59
B 0.008160 0.1976 ... 31.45
B 0.000000 0.1661 ... 24.77
B 0.022570 0.2030 ... 28.26
B 0.014990 0.1539 ... 36.00
B 0.012820 0.1692 ... 25.05
B 0.023430 0.1566 ... 35.74
B 0.027380 0.1593 ... 34.91
B 0.011160 0.1791 ... 22.88
B 0.000000 0.1742 ... 34.24
B 0.037360 0.1454 ... 27.27
B 0.041050 0.1388 ... 37.16
B 0.043040 0.1537 ... 33.17
B 0.000000 0.1060 ... 38.30
B 0.000000 0.1587 ... 30.37
perimeter_worst area_worst smoothness_worst compactness_worst \
diagnosis
B 99.70 711.2 0.14400 0.17730
B 96.09 630.5 0.13120 0.27760
B 65.13 314.9 0.13240 0.11480
B 84.46 545.9 0.09701 0.04619
B 57.26 242.2 0.12970 0.13570
B 89.88 582.6 0.14940 0.21560
B 99.00 698.8 0.11620 0.17110
B 82.98 516.5 0.10850 0.08615
B 96.08 656.7 0.10890 0.15820
B 83.67 527.2 0.11440 0.08906
B 81.81 506.2 0.12490 0.08720
B 90.24 624.1 0.10210 0.06191
B 59.90 274.9 0.17330 0.12390
B 69.86 368.6 0.12750 0.09866
B 62.06 273.9 0.16390 0.16980
B 65.59 310.1 0.09836 0.16780
B 67.03 330.7 0.15480 0.16640
B 78.00 466.7 0.12900 0.09148
B 65.50 324.7 0.14820 0.43650
B 85.67 554.9 0.12960 0.07061
B 62.56 284.4 0.12070 0.24360
B 89.71 611.1 0.11760 0.18430
B 91.36 605.5 0.14510 0.13790
B 91.88 622.1 0.12890 0.21410
B 84.53 525.1 0.15570 0.16760
B 96.66 614.9 0.15360 0.47910
B 87.78 567.9 0.13770 0.20030
B 91.46 574.7 0.13040 0.24630
B 109.40 803.6 0.12770 0.30890
B 102.90 803.7 0.11150 0.17660
... ... ... ... ...
B 88.52 552.3 0.13490 0.18540
B 87.00 550.6 0.15500 0.29640
B 101.60 773.4 0.12640 0.15640
B 76.43 407.5 0.14280 0.25100
B 86.12 487.7 0.17680 0.32510
B 57.17 248.0 0.12560 0.08340
B 54.49 223.6 0.15960 0.30640
B 78.78 457.8 0.13450 0.21180
B 113.50 808.9 0.13400 0.42020
B 107.40 826.4 0.10600 0.13760
B 92.48 629.6 0.10720 0.13810
B 99.17 688.6 0.12640 0.20370
B 97.58 729.8 0.12160 0.15170
B 71.12 384.9 0.12850 0.08842
B 71.08 357.4 0.14610 0.22460
B 69.10 364.2 0.11990 0.09546
B 83.90 505.6 0.12040 0.16330
B 74.08 412.3 0.10010 0.07348
B 77.80 436.6 0.10870 0.17820
B 88.10 594.7 0.12340 0.10640
B 62.86 295.8 0.11030 0.08298
B 88.84 595.7 0.12270 0.16200
B 69.57 357.6 0.13840 0.17100
B 67.88 347.3 0.12650 0.12000
B 66.50 330.6 0.10730 0.07158
B 105.90 733.5 0.10260 0.31710
B 82.28 474.2 0.12980 0.25170
B 100.20 706.7 0.12410 0.22640
B 75.19 439.6 0.09267 0.05494
B 59.16 268.6 0.08996 0.06444
concavity_worst concave points_worst symmetry_worst \
diagnosis
B 0.239000 0.12880 0.2977
B 0.189000 0.07283 0.3184
B 0.088670 0.06227 0.2450
B 0.048330 0.05013 0.1987
B 0.068800 0.02564 0.3105
B 0.305000 0.06548 0.2747
B 0.228200 0.12820 0.2871
B 0.055230 0.03715 0.2433
B 0.105000 0.08586 0.2346
B 0.092030 0.06296 0.2785
B 0.090760 0.06316 0.3306
B 0.001845 0.01111 0.2439
B 0.116800 0.04419 0.3220
B 0.021680 0.02579 0.3557
B 0.090010 0.02778 0.2972
B 0.139700 0.05087 0.3282
B 0.094120 0.06517 0.2878
B 0.144400 0.06961 0.2400
B 1.252000 0.17500 0.4228
B 0.103900 0.05882 0.2383
B 0.143400 0.04786 0.2254
B 0.170300 0.08660 0.2618
B 0.085390 0.07407 0.2710
B 0.173100 0.07926 0.2779
B 0.175500 0.06127 0.2762
B 0.485800 0.17080 0.3527
B 0.226700 0.07632 0.3379
B 0.243400 0.12050 0.2972
B 0.260400 0.13970 0.3151
B 0.091890 0.06946 0.2522
... ... ... ...
B 0.136600 0.10100 0.2478
B 0.275800 0.08120 0.3206
B 0.120600 0.08704 0.2806
B 0.212300 0.09861 0.2289
B 0.139500 0.13080 0.2803
B 0.000000 0.00000 0.3058
B 0.339300 0.05000 0.2790
B 0.179700 0.06918 0.2329
B 0.404000 0.12050 0.3187
B 0.161100 0.10950 0.2722
B 0.106200 0.07958 0.2473
B 0.137700 0.06845 0.2249
B 0.104900 0.07174 0.2642
B 0.043840 0.02381 0.2681
B 0.178300 0.08333 0.2691
B 0.093500 0.03846 0.2552
B 0.061940 0.03264 0.3059
B 0.000000 0.00000 0.2458
B 0.156400 0.06413 0.3169
B 0.086530 0.06498 0.2407
B 0.079930 0.02564 0.2435
B 0.243900 0.06493 0.2372
B 0.200000 0.09127 0.2226
B 0.010050 0.02232 0.2262
B 0.000000 0.00000 0.2475
B 0.366200 0.11050 0.2258
B 0.363000 0.09653 0.2112
B 0.132600 0.10480 0.2250
B 0.000000 0.00000 0.1566
B 0.000000 0.00000 0.2871
fractal_dimension_worst Unnamed: 32
diagnosis
B 0.07259 NaN
B 0.08183 NaN
B 0.07773 NaN
B 0.06169 NaN
B 0.07409 NaN
B 0.08301 NaN
B 0.06917 NaN
B 0.06563 NaN
B 0.08025 NaN
B 0.07408 NaN
B 0.07036 NaN
B 0.06289 NaN
B 0.09026 NaN
B 0.08020 NaN
B 0.07712 NaN
B 0.08490 NaN
B 0.09211 NaN
B 0.06641 NaN
B 0.11750 NaN
B 0.06410 NaN
B 0.10840 NaN
B 0.07609 NaN
B 0.07191 NaN
B 0.07918 NaN
B 0.08851 NaN
B 0.10160 NaN
B 0.07924 NaN
B 0.09261 NaN
B 0.08473 NaN
B 0.07246 NaN
... ... ...
B 0.07757 NaN
B 0.08950 NaN
B 0.07782 NaN
B 0.08278 NaN
B 0.09970 NaN
B 0.09938 NaN
B 0.10660 NaN
B 0.08134 NaN
B 0.10230 NaN
B 0.06956 NaN
B 0.06443 NaN
B 0.08492 NaN
B 0.06953 NaN
B 0.07399 NaN
B 0.09479 NaN
B 0.07920 NaN
B 0.07626 NaN
B 0.06592 NaN
B 0.08032 NaN
B 0.06484 NaN
B 0.07393 NaN
B 0.07242 NaN
B 0.08283 NaN
B 0.06742 NaN
B 0.06969 NaN
B 0.08004 NaN
B 0.08732 NaN
B 0.08321 NaN
B 0.05905 NaN
B 0.07039 NaN
[357 rows x 32 columns]
### Working with notnull() and isnull() methods
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from list
df = pd.DataFrame(dict)
df_knn=df
print(df_knn)
# using isnull() function
print("output of isnull() method", df.isnull())
# using notnull() function
print("output of notnull() method, ",df.notnull())
# handling missing values using fillna() and dropna()
#df.fillna("INF")
#df.fillna(0)
df.dropna()
First Score Second Score Third Score 0 100.0 30.0 NaN 1 90.0 45.0 40.0 2 NaN 56.0 80.0 3 95.0 NaN 98.0 output of isnull() method First Score Second Score Third Score 0 False False True 1 False False False 2 True False False 3 False True False output of notnull() method, First Score Second Score Third Score 0 True True False 1 True True True 2 False True True 3 True False True
| First Score | Second Score | Third Score | |
|---|---|---|---|
| 1 | 90.0 | 45.0 | 40.0 |
print(len(df.columns)) ###No.of columns
print(len(df)) #### No.of rows
### Accessing rows and columns
# iterating over rows using iterrows() function
for i, j in df.iterrows():
print(i, j)
print()
# iterating over rcolumns
# creating a list of dataframe columns
columns = list(df)
print("column details:", columns)
for i in columns:
print (df[i]) ### Printing All columns
print (df[i],[1])### Printing 2nd column only
3 4 0 First Score 100.0 Second Score 30.0 Third Score NaN Name: 0, dtype: float64 1 First Score 90.0 Second Score 45.0 Third Score 40.0 Name: 1, dtype: float64 2 First Score NaN Second Score 56.0 Third Score 80.0 Name: 2, dtype: float64 3 First Score 95.0 Second Score NaN Third Score 98.0 Name: 3, dtype: float64 column details: ['First Score', 'Second Score', 'Third Score'] 0 100.0 1 90.0 2 NaN 3 95.0 Name: First Score, dtype: float64 0 100.0 1 90.0 2 NaN 3 95.0 Name: First Score, dtype: float64 [1] 0 30.0 1 45.0 2 56.0 3 NaN Name: Second Score, dtype: float64 0 30.0 1 45.0 2 56.0 3 NaN Name: Second Score, dtype: float64 [1] 0 NaN 1 40.0 2 80.0 3 98.0 Name: Third Score, dtype: float64 0 NaN 1 40.0 2 80.0 3 98.0 Name: Third Score, dtype: float64 [1]
import sklearn
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
df_imputed = imputer.fit_transform(df_knn)
df_imputed
array([[100. , 30. , 72.66666667],
[ 90. , 45. , 40. ],
[ 95. , 56. , 80. ],
[ 95. , 43.66666667, 98. ]])
import pandas as pd
x = pd.DataFrame([1,2,3])
print(type(x))
x
<class 'pandas.core.frame.DataFrame'>
| 0 | |
|---|---|
| 0 | 1 |
| 1 | 2 |
| 2 | 3 |