; READ_LISIRD_DATAFILE
;
; AUTHOR: A. Windnagel
; DATE: 08/10/06
; PURPOSE: To read a lisird data text file
; INPUT: 
;   datafile - name of the data file you wish to read
; OUTPUT:
;   structure containing the data
; USAGE:
;   data = read_lisird_datafile('full_sorce_time_series.txt')
;
; 06/01/07 Don Woodraska  Complete rewrite, returns an array of structures
;          instead of a single structure with long arrays.  Expanded to handle 
;          gzip compressed files. If you have trouble reading a compressed 
;          file, try uncompressing it. Using check of each line to separate
;          header from data lines based on first string length match and first
;          character matching either 1 or 2 (for 19xx or 2xxx years). Added
;          header keyword to return the full header as a string array.
;
function read_lisird_datafile, datafile, header=header, status

if n_params() lt 1 or n_params() gt 2 then begin
    print,''
    print, 'Usage:  data = read_lisird_datafile( datafile[, status][, header=header] )'
    print,'  INPUT: datafile is a string with the filename (can have a path)'
    print,'         datafile can be a gzip compressed file, but uncomrpessed is faster'
    print,'  OUTPUT: status=0 if OK, non-zero if there is a problem (optional)'
    print,'  Keyword: header=header'
    print,'          header will be a string array containing the full header'
    print,'  data: an array of LISIRD data structures based on the output format'
    print,''
    status=-1
    return,-1
endif

; verify that the file exists
finfo = file_info(datafile)
if (finfo.exists eq 0) then begin
    print, 'File not found, check name'
    status=-1
    return,-1
endif

status=0

; is it compressed with gzip?
filetype=strlowcase((reverse(strsplit(datafile,'.',/extract)))[0])
compress=0
if filetype eq 'gz' then compress=1

; define the template for reading the file
rec={time_la:'', julian:0.d0, wave:0.d0, irradiance:0.0d, uncertainty:0.d0, $
     quality:0.d0, id:fix(0)}

; define output based on replicating rec for each line
;  how many lines? allow lots of room for good compression
if compress eq 0 then output=replicate(rec,file_lines(datafile)) else $
  output=replicate(rec,10*file_lines(datafile)) ;allow factor of 10 compression

; open file for reading
openr,lun,datafile,/get_lun,compress=compress
;don't just assume first 17 lines are header, check each line
s=''
count=0L
header=s
while not eof(lun) do begin
    readf,lun,s
    tmp=(strsplit(s,' ',/extract))[0]
    ; check string length of first item 
    ; and check if first character is 1 or 2
    ; stregex returns position , so 0 means it matches the reg ex
    if strlen(tmp) ne 17 and $
      (stregex(strmid(tmp,0,1),'[1-2]') eq -1) then begin
        ; header
        header=[header,s]
    endif else begin
        ; actual data
        linestr=strsplit(s,' ',/extract)
        output[count].time_la     = linestr[0]         ;type string
        output[count].julian      = double(linestr[1]) ;double
        output[count].wave        = double(linestr[2]) ;double
        output[count].irradiance  = double(linestr[3]) ;double
        output[count].uncertainty = double(linestr[4]) ;double
        output[count].quality     = double(linestr[5]) ;double
        output[count].id          = fix(linestr[6]) ;short int
        count = count + 1L
        if count gt n_elements(output) then begin
            print,'Too many elements to store in output structure'
            print,'Consider uncompressing the data file and reading again.'
            print,'Attempting to recover, probable performance hit, but trying anyway'
            output=[output,replicate(rec,n_elements(output))]
        endif
    endelse
endwhile
close,lun
free_lun,lun

if n_elements(header) gt 1 then header=header[1:*]

; now filter out empty records using id
output=output[0L : count-1L]
;keep = where(output.id ne 0,n_keep)
;if n_keep gt 0 then output = output[keep]


return, output
end