import numpy as np
import matplotlib.pyplot as plt
from astropy.io import ascii
#read in white dwarf data from sloan
wd = ascii.read("WD_sspp.csv",header_start=0,data_start=1)
wd.colnames
#Note TEFFHA24, TEFFNGS1, TEFFWBG these are all estimates for the
#Effective temperature of a star (which is the blackbody which fits
#the spectral energy distribution of the star)
#All of these star are targeted as white dwarfs so they should be hot
#lets explore how well pipe lines can retrieve their temperature
def stat_teff(x,y):
#x and y will be the strings corresponding to the columns for analysis
#define the wd array as a global variable
global wd
#Print the mean difference in temperature
keep, = np.where((wd[x] > 0) & (wd[y] > 0))
diff = wd[x]-wd[y]
print("Comparing "+x+" to "+y)
print("Number of sources = "+str(keep.size))
print("mean = "+str(np.mean(diff[keep])))
print("max = "+str(np.max(np.abs(diff[keep]))))
print("median = "+str(np.median(diff[keep])))
print("Standard Dev. = "+str(np.std(diff[keep])))
#lets use itertools to make this easy
import itertools as it
#the variables we want to compare
var = ['TEFFHA24','TEFFNGS1','TEFFWBG']
comb = it.combinations(var,2)
#Now print all of the data
for i in comb:
stat_teff(i[0],i[1])
#plot the distributions on a histogram for the differences in
#temperature
fig,ax = plt.subplots(ncols=3,sharex=True,figsize=(18.5,8))
comb = it.combinations(var,2)
#resolution of bins
res = 500.
for j,i in enumerate(comb):
keep, = np.where((wd[i[0]] > 0) & (wd[i[1]] > 0))
diff = wd[i[0]]-wd[i[1]]
minr = int(diff[keep].min()/res)*res
maxr = int(diff[keep].max()/res)*res+res
#Change the bin range for every observation but presurve bin resolution
#Puts center of the bin on 0
bins = np.arange(minr,maxr+2.*res,res)-res/2.
ax[j].hist(diff[keep],bins=bins,color='black')
ax[0].set_ylabel(u'Number of Stars')
ax[j].set_xlabel(i[0]+'-'+i[1] )
#While these distributions are centrally peaked temperature difference as great as 3000K are seen!
#And the bins sizes are 500K when typically Teff temperature errors are quoted in the 100s for
#Enmasss spectral classification
#However, lets make sure signal-to-noise is not affecting the results
fig,ax = plt.subplots(ncols=3,sharex=True,figsize=(20.5,8))
comb = it.combinations(var,2)
#resolution of bins
res = 500.
#colors for stacked array
carray = ['red','blue','green','black']
#Break S/N up into 4 equal bins
snarray = np.linspace(0,80,5,endpoint=True)
#Now lets break up the binning in S/N to detect the cause for the outliers
for j,i in enumerate(comb):
keep, = np.where((wd[i[0]] > 0) & (wd[i[1]] > 0))
diff = wd[i[0]]-wd[i[1]]
minr = int(diff[keep].min()/res)*res
maxr = int(diff[keep].max()/res)*res+res
bins = np.arange(minr,maxr+2.*res,res)-res/2.
ax[0].set_ylabel(u'Number of Stars',fontsize=18)
#set the x label based on what you are subtracting
ax[j].set_xlabel(i[0]+'-'+i[1],fontsize=18 )
for m in np.arange(4):
snkeep, = np.where((wd['SNR'][keep] >= snarray[m]) &(wd['SNR'][keep] < snarray[m+1]))
#don't tray to plot if nothing is in the bin
if snkeep.size > 0:
ax[j].hist(diff[keep][snkeep],bins=bins,color=carray[m],stacked=True,label=str(snarray[m])+"< SNR <"+str(snarray[m+1]))
ax[j].legend(loc='upper right')
#You can see stars with S/N < 20 have more scatter in temperature than the higher S/N objects
#Also we must remember these are white dwarfs which are bizarre objects and therefore difficult
#For algorithms to handle