Galaxy Zoo Data

AGN Data


In [1]:
#import plotting module
import matplotlib.pyplot as plt
#Import file read in 
from astropy.io import ascii
#Import basic array man.
import numpy as np
#Import modules to calc. the time of things
import time
In [2]:
#read in Galaxy Zoo File
start = time.time()
dat = ascii.read('GalaxyZoo1_DR_table2.csv')
end = time.time()
print(end-start)#time for file to read in seconds
34.1528859138

In [4]:
#Now what happens if we tell ascii read more useful information
#About our file
start = time.time()
dat = ascii.read('GalaxyZoo1_DR_table2.csv',delimiter=',',header_start=0,data_start=1,guess=False)
end = time.time()
print(end-start)
12.6086659431

In [5]:
#WOW almost 3 times as fast
#The moral of the story is the more correct information you send in the faster a file will be read in
In [7]:
#So what did we tell the file?
#header_start=0 means the header for the file is on the first uncommented line
#data_start=1 means the data starts on the second uncommented line
#header_start can be less than 0 if the header is in the comment section
In [9]:
#Now let us manipulate some data
#ignore how I do this for now it is not useful to you yet
import astropy.io.fits as pyfits
fitdat = pyfits.open('schawinski_GZ_2010_catalogue.fits')
agndat = fitdat[1].data
In [11]:
#Since we cannot measure distances directly to these AGN
#We can use Hubble Expansion and Redshift to do indirectly
#define the speed of light 
c = np.float(3.0*10.**5.) #km/s always label units
#Define Hubble Constant
H0 = np.float(70.)#km/s/Mpc
#Calculate distance
#d = cz/H
In [12]:
#Now we have this agndat array wouldn't it be nice if we could
#we could read the distance into this stucture?
agndat['Dist'] = c*agndat['Z']/H0 #Z is the redshift
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-df1aebbb5cd2> in <module>()
      1 #Now we have this agndat array wouldn't it be nice if we could
      2 #we could read the distance into this stucture?
----> 3 agndat['Dist'] = c*agndat['Z']/H0 #Z is the redshift

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/fitsrec.pyc in __setitem__(self, key, value)
    523     def __setitem__(self, key, value):
    524         if isinstance(key, string_types):
--> 525             self[key][:] = value
    526             return
    527 

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/fitsrec.pyc in __getitem__(self, key)
    482     def __getitem__(self, key):
    483         if isinstance(key, string_types):
--> 484             return self.field(key)
    485         elif isinstance(key, (slice, np.ndarray, tuple, list)):
    486             # Have to view as a recarray then back as a FITS_rec, otherwise the

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/fitsrec.pyc in field(self, key)
    608         # NOTE: The *column* index may not be the same as the field index in
    609         # the recarray, if the column is a phantom column
--> 610         col_indx = _get_index(self.columns.names, key)
    611         if self.columns[col_indx]._phantom:
    612             warnings.warn(

/home/jakub/anaconda/lib/python2.7/site-packages/astropy/io/fits/column.pyc in _get_index(names, key)
   1543                 indx = names.index(_key)
   1544             elif count == 0:
-> 1545                 raise KeyError("Key '%s' does not exist." % key)
   1546             else:              # multiple match
   1547                 raise KeyError("Ambiguous key name '%s'." % key)

KeyError: "Key 'Dist' does not exist."
In [13]:
#Hmm looks link we cannot do that
#this is why I like using ascii.read becaue it puts it is a table
#so you can keep all of your variables in the same object
#but if you don't want to do that you can just do 
dist = c*agndat['Z']/H0
In [14]:
#Or we can do it retroactively in this case
from astropy.table import Table
agntable = Table(agndat)
In [30]:
#Now let us try this again
agntable['Dist'] = c*agntable['Z']/H0
In [16]:
#IT WORKS!!!
In [17]:
#But it is also useful to keep track of units for your now table
#entry
agntable.colnames
Out[17]:
['OBJID',
 'RA',
 'DEC',
 'REDSHIFT',
 'GZ1_MORPHOLOGY',
 'BPT_CLASS',
 'U',
 'G',
 'R',
 'I',
 'Z',
 'SIGMA',
 'SIGMA_ERR',
 'LOG_MSTELLAR',
 'L_O3',
 'Dist']
In [19]:
#Units already'] exist for the stuff that was read in
agntable['Dist']
Out[19]:
<Column name='Dist' unit=None format=None description=None>
array([[ 64949.8203125 ,  70874.7890625 ,  67814.171875  , ...,
         67081.3515625 ,  65246.69921875,  57507.7265625 ]], dtype=float32)
In [20]:
agntable['Dist'].unit = 'Mpc'
In [21]:
agntable['Dist']
Out[21]:
<Column name='Dist' unit=u'Mpc' format=None description=None>
array([[ 64949.8203125 ,  70874.7890625 ,  67814.171875  , ...,
         67081.3515625 ,  65246.69921875,  57507.7265625 ]], dtype=float32)
In [31]:
#So what is the range of this data?
agntable['Dist'].max()-agntable['Dist'].min()
Out[31]:
24951.359
In [23]:
#That range is quite large perhaps we should switch units
In [32]:
#let us Change to Gpc
agntable['Dist'] = agntable['Dist']/1000. #Gpc
agntable['Dist'].unit = 'Gpc'
In [27]:
agntable['Dist'].unit
Out[27]:
$\mathrm{Gpc}$
In [33]:
agntable['Dist'].max()-agntable['Dist'].min()
Out[33]:
24.951359
In [35]:
#Much nicer range
#Now let us look at another column from this data set
agntable['LOG_MSTELLAR']
Out[35]:
<Column name='LOG_MSTELLAR' unit=None format=None description=None>
array([[  9.90809536,   9.50520515,   9.83625889, ...,  10.6171217 ,
         10.15844727,  11.17951012]], dtype=float32)
In [36]:
#This is in log solar units suppose we wanted it in regular solar masses
agntable['MSTELLAR'] = 10.**agntable['LOG_MSTELLAR']
In [37]:
agntable['MSTELLAR']
Out[37]:
<Column name='MSTELLAR' unit=None format=None description=None>
array([[  8.09273600e+09,   3.20040653e+09,   6.85896960e+09, ...,
          4.14115717e+10,   1.44028109e+10,   1.51185490e+11]], dtype=float32)
In [40]:
#Now let us check our sanity
check = np.log10(agntable['MSTELLAR'])-agntable['LOG_MSTELLAR'] #Note the log base
print(check.min(),check.max())
(0.0, 0.0)

In []:
#Good we reconvered the data we sent in