import numpy as np
from astropy.io import ascii
import matplotlib.pyplot as plt

dir = u'../'
dat = ascii.read(dir+"exoplanet.eu_catalog.csv")
dat.colnames

['name',
 'mass',
 'mass_error_min',
 'mass_error_max',
 'radius',
 'radius_error_min',
 'radius_error_max',
 'orbital_period',
 'orbital_period_err_min',
 'orbital_period_err_max',
 'semi_major_axis',
 'semi_major_axis_error_min',
 'semi_major_axis_error_max',
 'eccentricity',
 'eccentricity_error_min',
 'eccentricity_error_max',
 'angular_distance',
 'inclination',
 'inclination_error_min',
 'inclination_error_max',
 'tzero_tr',
 'tzero_tr_error_min',
 'tzero_tr_error_max',
 'tzero_tr_sec',
 'tzero_tr_sec_error_min',
 'tzero_tr_sec_error_max',
 'lambda_angle',
 'lambda_angle_error_min',
 'lambda_angle_error_max',
 'impact_parameter',
 'impact_parameter_error_min',
 'impact_parameter_error_max',
 'tzero_vr',
 'tzero_vr_error_min',
 'tzero_vr_error_max',
 'K',
 'K_error_min',
 'K_error_max',
 'temp_calculated',
 'temp_measured',
 'hot_point_lon',
 'albedo',
 'albedo_error_min',
 'albedo_error_max',
 'log_g',
 'publication_status',
 'discovered',
 'updated',
 'omega',
 'omega_error_min',
 'omega_error_max',
 'tperi',
 'tperi_error_min',
 'tperi_error_max',
 'detection_type',
 'mass_detection_type',
 'radius_detection_type',
 'alternate_names',
 'molecules',
 'star_name',
 'ra',
 'dec',
 'mag_v',
 'mag_i',
 'mag_j',
 'mag_h',
 'mag_k',
 'star_distance',
 'star_metallicity',
 'star_mass',
 'star_radius',
 'star_sp_type',
 'star_age',
 'star_teff',
 'star_detected_disc',
 'star_magnetic_field']

#creating logical statments are import for cutting data
#to use only the data that is useful or real
#There are two basic ways to cut data 
#e.g. use a logical variable
#keep stars with time period errors less than 10 percent
keep = dat['tperi_error_max']/dat['tperi'] < .1
#or if we are only interested in stars cooler than the sun
cool = dat['star_teff'] < 5777. #K

#Now we can call the logical variable into the array two different ways
#First we can do it the old fashion way
print(cool)
print(np.max(dat['star_teff'][cool]),dat['star_teff'][cool].max())

[True True True ..., False False False]
(5776.0, 5776.0)

#or we can use the index location of the values
cooler, = np.where(dat['star_teff']<5777.)
print(cooler)
print(np.max(dat['star_teff'][cooler]),dat['star_teff'][cooler].max())

[   0    1    2 ..., 1868 1869 1871]
(5776.0, 5776.0)

#or if we feel more object like we could do
coolest = dat['star_teff'] >= 5777.
dat.mask = coolest
print(dat['star_teff'].max())

5776.0

#As you can see we go the same maximum value for all 
#Just different ways to attack the same problem
#And depending on the case you will perfer one over the other
#of course we can reset the mask with 
dat.mask = False
print(dat['star_teff'].max())

33000.0

#Now that we are looking at the Stellar Temperatures for
#this objects what else can we tell the Temperatures without looking
#directly at the data?
#Well we can get the min,max,standard deviation, median, and mean
print('min')
print(np.min(dat['star_teff']),dat['star_teff'].min())
print('max')
print(np.max(dat['star_teff']),dat['star_teff'].max())
print('Standard Dev')
print(np.std(dat['star_teff']),dat['star_teff'].std())
print('Mean')
print(np.mean(dat['star_teff']),dat['star_teff'].mean())
print('Median')
print(np.median(dat['star_teff']))

min
(0.0, 0.0)
max
(33000.0, 33000.0)
Standard Dev
(2273.8466579775518, 2273.8466579775518)
Mean
(5024.0150426439222, 5024.0150426439222)
Median
5520.0

#Now lets go back to some data we know works
dat['years'] = dat['orbital_period']/365.25

#Now lets learn something about this data 
def quickstat(X):
    print('Min = '+str(np.min(X)))
    print('Max = '+str(np.max(X)))
    print('Standard Dev = '+str(np.std(X)))
    print('Mean = '+str(np.mean(X)))
    print('Median = '+str(np.median(X)))
    print('Size = '+str(np.size(X)))

quickstat(dat['years'])

Min = 0.0
Max = 1998.63107461
Standard Dev = 52.5110964775
Mean = 3.26028109742
Median = 0.0429889979466
Size = 1876

quickstat(dat['semi_major_axis'])

Min = 0.0
Max = 3200.0
Standard Dev = 123.074071101
Mean = 11.2849427216
Median = 0.123
Size = 1876

#Obviously there are some values here which
#are well outside the normal values so lets limit our 
#data points
masked, = np.where((dat['years'] < 5.) & (dat['semi_major_axis'] < 5.**(2./3.)))

fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'][masked]**3.,dat['years'][masked]**2.,color='black')
ax.set_ylabel(u'P$^2$ (years)',fontsize=24)
ax.set_xlabel(u'A$^3$ (AU)',fontsize=24)

<matplotlib.text.Text at 0x7f009ffb3610>

#This looks linear so lets us try fitting a line
m,b = np.polyfit(dat['semi_major_axis'][masked]**3.,dat['years'][masked]**2.,1)#linear fit

print(m,b)

(0.87550968801784246, 0.0057459257326819644)

#What values did you expect?
#Are they close?
fig,ax = plt.subplots()
x = np.arange(25)
ax.scatter(dat['semi_major_axis'][masked]**3.,dat['years'][masked]**2.,color='black')
ax.set_ylabel(u'P$^2$ (years)',fontsize=24)
ax.set_xlabel(u'A$^3$ (AU)',fontsize=24)
ax.plot(x,x*m+b,color='red',linewidth=3)

[<matplotlib.lines.Line2D at 0x7f00a405b8d0>]

#Well now that we know there is a correlation here why not try
#to see if we can find more about this data
stuff = ['mass','radius','log_g','star_metallicity']
import itertools as it
cstuff = it.combinations(stuff,2)

for i in cstuff:
    print i

('mass', 'radius')
('mass', 'log_g')
('mass', 'star_metallicity')
('radius', 'log_g')
('radius', 'star_metallicity')
('log_g', 'star_metallicity')

fig,ax = plt.subplots(nrows=2,ncols=3,figsize=(16,12))
cstuff = it.combinations(stuff,2)
j = 0
k = 0
for i in cstuff:
    ax[k,j].scatter(dat[i[0]],dat[i[1]],color='black')
    ax[k,j].set_xlabel(i[0])
    ax[k,j].set_ylabel(i[1])
    j += 1
    if j == 3:
        j = 0
        k += 1

#Doing loops like this are what make object oriented programing like python
#Simple and easy

# Insteresting could be Planet density with distance AU
dat['rho'] = dat['mass']/(4.*np.pi*dat['radius']**3.)
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'],dat['rho'],color='black')
ax.set_ylabel(u'Densitiy ($\oplus$)',fontsize=24)
ax.set_xlabel(u'A',fontsize=24)

<matplotlib.text.Text at 0x7f00a40a0bd0>

#Lets only look for Earth like plantets
keep, = np.where((dat['rho'] < 2.5) & (dat['rho'] > 0.5) & (dat['semi_major_axis'] < 2.5))
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'][keep],dat['rho'][keep],color='black')
ax.set_ylabel(u'Densitiy ($\oplus$)',fontsize=24)
ax.set_xlabel(u'A (AU)',fontsize=24)

<matplotlib.text.Text at 0x7f009f13c290>

# is there a trend?
binmax = 1.0
res = .1
bins = np.arange(0,binmax+2.*res,res)-res/2
avebin = np.zeros(bins.size-1)
for x in np.arange(bins.size-1):
    calc, = np.where((dat['semi_major_axis'][keep] >= bins[x]) &(dat['semi_major_axis'][keep] < bins[x+1]) )
    if calc.size == 0:
        avebin[x] = np.nan
    else:
        avebin[x] = np.mean(dat['rho'][keep][calc])
fig,ax = plt.subplots()
ax.scatter(dat['semi_major_axis'][keep],dat['rho'][keep],color='black')
ax.plot(bins[0:-1],avebin,color='red',label='Average',linewidth=3)
ax.legend(loc='upper right')
ax.set_ylabel(u'Densitiy ($\oplus$)',fontsize=24)
ax.set_xlabel(u'A (AU)',fontsize=24)

<matplotlib.text.Text at 0x7f009e8fb110>

Exoplanet EU data