import numpy as np
import matplotlib.pyplot as plt
from astropy.io import ascii

#read in white dwarf data from sloan
wd = ascii.read("WD_sspp.csv",header_start=0,data_start=1)

wd.colnames

['objID',
 'plate',
 'mjd',
 'fiberID',
 'ra',
 'dec',
 'l',
 'b',
 'psfMag_u',
 'psfMag_g',
 'psfMag_r',
 'psfMag_i',
 'psfMag_z',
 'psfMagErr_u',
 'psfMagErr_g',
 'psfMagErr_r',
 'psfMagErr_i',
 'psfMagErr_z',
 'extinction_u',
 'extinction_g',
 'extinction_r',
 'extinction_i',
 'extinction_z',
 'TEFFHA24',
 'TEFFNGS1',
 'TEFFWBG',
 'SNR']

#Note TEFFHA24, TEFFNGS1, TEFFWBG these are all estimates for the 
#Effective temperature of a star (which is the blackbody which fits
#the spectral energy distribution of the star)
#All of these star are targeted as white dwarfs so they should be hot
#lets explore how well pipe lines can retrieve their temperature
def stat_teff(x,y):
#x and y will be the strings corresponding to the columns for analysis
#define the wd array as a global variable
    global wd
#Print the mean difference in temperature
    keep, = np.where((wd[x] > 0) & (wd[y] > 0))
    diff = wd[x]-wd[y]
    print("Comparing "+x+" to "+y)
    print("Number of sources = "+str(keep.size))
    print("mean = "+str(np.mean(diff[keep])))
    print("max = "+str(np.max(np.abs(diff[keep]))))
    print("median = "+str(np.median(diff[keep])))
    print("Standard Dev. = "+str(np.std(diff[keep])))

#lets use itertools to make this easy
import itertools as it
#the variables we want to compare
var = ['TEFFHA24','TEFFNGS1','TEFFWBG']
comb = it.combinations(var,2)

#Now print all of the data
for i in comb:
    stat_teff(i[0],i[1])

Comparing TEFFHA24 to TEFFNGS1
Number of sources = 507
mean = 134.315372781
max = 3259.195
median = 63.848
Standard Dev. = 591.447805887
Comparing TEFFHA24 to TEFFWBG
Number of sources = 43
mean = 73.519627907
max = 3017.436
median = 107.462
Standard Dev. = 829.881559322
Comparing TEFFNGS1 to TEFFWBG
Number of sources = 43
mean = -94.2851395349
max = 2915.022
median = 11.849
Standard Dev. = 610.350167883

#plot the distributions on a histogram for the differences in 
#temperature
fig,ax = plt.subplots(ncols=3,sharex=True,figsize=(18.5,8))
comb = it.combinations(var,2)
#resolution of bins
res = 500.
for j,i in enumerate(comb):
    keep, = np.where((wd[i[0]] > 0) & (wd[i[1]] > 0))
    diff = wd[i[0]]-wd[i[1]]
    minr = int(diff[keep].min()/res)*res
    maxr = int(diff[keep].max()/res)*res+res
    #Change the bin range for every observation but presurve bin resolution
    #Puts center of the bin on 0
    bins = np.arange(minr,maxr+2.*res,res)-res/2.
    ax[j].hist(diff[keep],bins=bins,color='black')
    ax[0].set_ylabel(u'Number of Stars')
    ax[j].set_xlabel(i[0]+'-'+i[1] )

#While these distributions are centrally peaked temperature difference as great as 3000K are seen!
#And the bins sizes are 500K when typically Teff temperature errors are quoted in the 100s for 
#Enmasss spectral classification
#However, lets make sure signal-to-noise is not affecting the results

fig,ax = plt.subplots(ncols=3,sharex=True,figsize=(20.5,8))
comb = it.combinations(var,2)
#resolution of bins
res = 500.
#colors for stacked array
carray = ['red','blue','green','black']
#Break S/N up into 4 equal bins
snarray = np.linspace(0,80,5,endpoint=True)
#Now lets break up the binning in S/N to detect the cause for the outliers
for j,i in enumerate(comb):
    keep, = np.where((wd[i[0]] > 0) & (wd[i[1]] > 0))
    diff = wd[i[0]]-wd[i[1]]
    minr = int(diff[keep].min()/res)*res
    maxr = int(diff[keep].max()/res)*res+res
    bins = np.arange(minr,maxr+2.*res,res)-res/2.
    ax[0].set_ylabel(u'Number of Stars',fontsize=18)
    #set the x label based on what you are subtracting
    ax[j].set_xlabel(i[0]+'-'+i[1],fontsize=18 )
    for m in np.arange(4):
        snkeep, = np.where((wd['SNR'][keep] >= snarray[m]) &(wd['SNR'][keep] < snarray[m+1]))
        #don't tray to plot if nothing is in the bin
        if snkeep.size > 0:
           ax[j].hist(diff[keep][snkeep],bins=bins,color=carray[m],stacked=True,label=str(snarray[m])+"< SNR <"+str(snarray[m+1]))
    ax[j].legend(loc='upper right')

#You can see stars with S/N < 20 have more scatter in temperature than the higher S/N objects
#Also we must remember these are white dwarfs which are bizarre objects and therefore difficult
#For algorithms to handle

White Dwarf Data