#import plotting module
import matplotlib.pyplot as plt
#Import file read in 
from astropy.io import ascii
#Import basic array man.
import numpy as np
#Import modules to calc. the time of things
import time

#read in Galaxy Zoo File
start = time.time()
dat = ascii.read('GalaxyZoo1_DR_table2.csv')
end = time.time()
print(end-start)#time for file to read in seconds

34.1528859138

#Now what happens if we tell ascii read more useful information
#About our file
start = time.time()
dat = ascii.read('GalaxyZoo1_DR_table2.csv',delimiter=',',header_start=0,data_start=1,guess=False)
end = time.time()
print(end-start)

12.6086659431

#WOW almost 3 times as fast
#The moral of the story is the more correct information you send in the faster a file will be read in

#So what did we tell the file?
#header_start=0 means the header for the file is on the first uncommented line
#data_start=1 means the data starts on the second uncommented line
#header_start can be less than 0 if the header is in the comment section

#Now let us manipulate some data
#ignore how I do this for now it is not useful to you yet
import astropy.io.fits as pyfits
fitdat = pyfits.open('schawinski_GZ_2010_catalogue.fits')
agndat = fitdat[1].data

#Since we cannot measure distances directly to these AGN
#We can use Hubble Expansion and Redshift to do indirectly
#define the speed of light 
c = np.float(3.0*10.**5.) #km/s always label units
#Define Hubble Constant
H0 = np.float(70.)#km/s/Mpc
#Calculate distance
#d = cz/H

#Now we have this agndat array wouldn't it be nice if we could
#we could read the distance into this stucture?
agndat['Dist'] = c*agndat['Z']/H0 #Z is the redshift

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-df1aebbb5cd2> in <module>()
      1 #Now we have this agndat array wouldn't it be nice if we could
      2 #we could read the distance into this stucture?
----> 3 agndat['Dist'] = c*agndat['Z']/H0 #Z is the redshift

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/fitsrec.pyc in __setitem__(self, key, value)
    523     def __setitem__(self, key, value):
    524         if isinstance(key, string_types):
--> 525             self[key][:] = value
    526             return
    527 

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/fitsrec.pyc in __getitem__(self, key)
    482     def __getitem__(self, key):
    483         if isinstance(key, string_types):
--> 484             return self.field(key)
    485         elif isinstance(key, (slice, np.ndarray, tuple, list)):
    486             # Have to view as a recarray then back as a FITS_rec, otherwise the

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/fitsrec.pyc in field(self, key)
    608         # NOTE: The *column* index may not be the same as the field index in
    609         # the recarray, if the column is a phantom column
--> 610         col_indx = _get_index(self.columns.names, key)
    611         if self.columns[col_indx]._phantom:
    612             warnings.warn(

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/column.pyc in _get_index(names, key)
   1543                 indx = names.index(_key)
   1544             elif count == 0:
-> 1545                 raise KeyError("Key '%s' does not exist." % key)
   1546             else:              # multiple match
   1547                 raise KeyError("Ambiguous key name '%s'." % key)

KeyError: "Key 'Dist' does not exist."

#Hmm looks link we cannot do that
#this is why I like using ascii.read becaue it puts it is a table
#so you can keep all of your variables in the same object
#but if you don't want to do that you can just do 
dist = c*agndat['Z']/H0

#Or we can do it retroactively in this case
from astropy.table import Table
agntable = Table(agndat)

#Now let us try this again
agntable['Dist'] = c*agntable['Z']/H0

#IT WORKS!!!

#But it is also useful to keep track of units for your now table
#entry
agntable.colnames

['OBJID',
 'RA',
 'DEC',
 'REDSHIFT',
 'GZ1_MORPHOLOGY',
 'BPT_CLASS',
 'U',
 'G',
 'R',
 'I',
 'Z',
 'SIGMA',
 'SIGMA_ERR',
 'LOG_MSTELLAR',
 'L_O3',
 'Dist']

#Units already'] exist for the stuff that was read in
agntable['Dist']

<Column name='Dist' unit=None format=None description=None>
array([[ 64949.8203125 ,  70874.7890625 ,  67814.171875  , ...,
         67081.3515625 ,  65246.69921875,  57507.7265625 ]], dtype=float32)

agntable['Dist'].unit = 'Mpc'

agntable['Dist']

<Column name='Dist' unit=u'Mpc' format=None description=None>
array([[ 64949.8203125 ,  70874.7890625 ,  67814.171875  , ...,
         67081.3515625 ,  65246.69921875,  57507.7265625 ]], dtype=float32)

#So what is the range of this data?
agntable['Dist'].max()-agntable['Dist'].min()

24951.359

#That range is quite large perhaps we should switch units

#let us Change to Gpc
agntable['Dist'] = agntable['Dist']/1000. #Gpc
agntable['Dist'].unit = 'Gpc'

agntable['Dist'].unit

agntable['Dist'].max()-agntable['Dist'].min()

24.951359

#Much nicer range
#Now let us look at another column from this data set
agntable['LOG_MSTELLAR']

<Column name='LOG_MSTELLAR' unit=None format=None description=None>
array([[  9.90809536,   9.50520515,   9.83625889, ...,  10.6171217 ,
         10.15844727,  11.17951012]], dtype=float32)

#This is in log solar units suppose we wanted it in regular solar masses
agntable['MSTELLAR'] = 10.**agntable['LOG_MSTELLAR']

agntable['MSTELLAR']

<Column name='MSTELLAR' unit=None format=None description=None>
array([[  8.09273600e+09,   3.20040653e+09,   6.85896960e+09, ...,
          4.14115717e+10,   1.44028109e+10,   1.51185490e+11]], dtype=float32)

#Now let us check our sanity
check = np.log10(agntable['MSTELLAR'])-agntable['LOG_MSTELLAR'] #Note the log base
print(check.min(),check.max())

(0.0, 0.0)

#Good we reconvered the data we sent in

Galaxy Zoo Data

AGN Data