import numpy as np
from astropy.io import ascii
import matplotlib.pyplot as plt
dir = u'../'
dat = ascii.read(dir+"exoplanet.eu_catalog.csv")
dat.colnames
#creating logical statments are import for cutting data
#to use only the data that is useful or real
#There are two basic ways to cut data
#e.g. use a logical variable
#keep stars with time period errors less than 10 percent
keep = dat['tperi_error_max']/dat['tperi'] < .1
#or if we are only interested in stars cooler than the sun
cool = dat['star_teff'] < 5777. #K
#Now we can call the logical variable into the array two different ways
#First we can do it the old fashion way
print(cool)
print(np.max(dat['star_teff'][cool]),dat['star_teff'][cool].max())
#or we can use the index location of the values
cooler, = np.where(dat['star_teff']<5777.)
print(cooler)
print(np.max(dat['star_teff'][cooler]),dat['star_teff'][cooler].max())
#or if we feel more object like we could do
coolest = dat['star_teff'] >= 5777.
dat.mask = coolest
print(dat['star_teff'].max())
#As you can see we go the same maximum value for all
#Just different ways to attack the same problem
#And depending on the case you will perfer one over the other
#of course we can reset the mask with
dat.mask = False
print(dat['star_teff'].max())
#Now that we are looking at the Stellar Temperatures for
#this objects what else can we tell the Temperatures without looking
#directly at the data?
#Well we can get the min,max,standard deviation, median, and mean
print('min')
print(np.min(dat['star_teff']),dat['star_teff'].min())
print('max')
print(np.max(dat['star_teff']),dat['star_teff'].max())
print('Standard Dev')
print(np.std(dat['star_teff']),dat['star_teff'].std())
print('Mean')
print(np.mean(dat['star_teff']),dat['star_teff'].mean())
print('Median')
print(np.median(dat['star_teff']))
#Now lets go back to some data we know works
dat['years'] = dat['orbital_period']/365.25
#Now lets learn something about this data
def quickstat(X):
print('Min = '+str(np.min(X)))
print('Max = '+str(np.max(X)))
print('Standard Dev = '+str(np.std(X)))
print('Mean = '+str(np.mean(X)))
print('Median = '+str(np.median(X)))
print('Size = '+str(np.size(X)))
quickstat(dat['years'])
quickstat(dat['semi_major_axis'])
#Obviously there are some values here which
#are well outside the normal values so lets limit our
#data points
masked, = np.where((dat['years'] < 5.) & (dat['semi_major_axis'] < 5.**(2./3.)))
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'][masked]**3.,dat['years'][masked]**2.,color='black')
ax.set_ylabel(u'P$^2$ (years)',fontsize=24)
ax.set_xlabel(u'A$^3$ (AU)',fontsize=24)
#This looks linear so lets us try fitting a line
m,b = np.polyfit(dat['semi_major_axis'][masked]**3.,dat['years'][masked]**2.,1)#linear fit
print(m,b)
#What values did you expect?
#Are they close?
fig,ax = plt.subplots()
x = np.arange(25)
ax.scatter(dat['semi_major_axis'][masked]**3.,dat['years'][masked]**2.,color='black')
ax.set_ylabel(u'P$^2$ (years)',fontsize=24)
ax.set_xlabel(u'A$^3$ (AU)',fontsize=24)
ax.plot(x,x*m+b,color='red',linewidth=3)
#Well now that we know there is a correlation here why not try
#to see if we can find more about this data
stuff = ['mass','radius','log_g','star_metallicity']
import itertools as it
cstuff = it.combinations(stuff,2)
for i in cstuff:
print i
fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(16,12))
cstuff = it.combinations(stuff,2)
j = 0
k = 0
for i in cstuff:
ax[k,j].scatter(dat[i[0]],dat[i[1]],color='black')
ax[k,j].set_xlabel(i[0])
ax[k,j].set_ylabel(i[1])
j += 1
if j == 3:
j = 0
k += 1
#Doing loops like this are what make object oriented programing like python
#Simple and easy
# Insteresting could be Planet density with distance AU
dat['rho'] = dat['mass']/(4.*np.pi*dat['radius']**3.)
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'],dat['rho'],color='black')
ax.set_ylabel(u'Densitiy ($\oplus$)',fontsize=24)
ax.set_xlabel(u'A',fontsize=24)
#Lets only look for Earth like plantets
keep, = np.where((dat['rho'] < 2.5) & (dat['rho'] > 0.5) & (dat['semi_major_axis'] < 2.5))
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'][keep],dat['rho'][keep],color='black')
ax.set_ylabel(u'Densitiy ($\oplus$)',fontsize=24)
ax.set_xlabel(u'A (AU)',fontsize=24)
# is there a trend?
binmax = 1.0
res = .1
bins = np.arange(0,binmax+2.*res,res)-res/2
avebin = np.zeros(bins.size-1)
for x in np.arange(bins.size-1):
calc, = np.where((dat['semi_major_axis'][keep] >= bins[x]) &(dat['semi_major_axis'][keep] < bins[x+1]) )
if calc.size == 0:
avebin[x] = np.nan
else:
avebin[x] = np.mean(dat['rho'][keep][calc])
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'][keep],dat['rho'][keep],color='black')
ax.plot(bins[0:-1],avebin,color='red',label='Average',linewidth=3)
ax.legend(loc='upper right')
ax.set_ylabel(u'Densitiy ($\oplus$)',fontsize=24)
ax.set_xlabel(u'A (AU)',fontsize=24)