#!python

import sys,os,re
import datetime as dt

finput = sys.argv[1]
output = 'new/' + finput

urls = """   Metadata=<a href="http://www.auscover.org.au/purl/hyperspectral-surf-refl-hyperion">Persistent URL</a><br/>
   Surf.Refl.=<a href="http://remote-sensing.nci.org.au/u39/public/data/hyperion/surface-reflectance-curtin/{year:d}">HDF file</a><br/>
   L1R file=<a href="http://remote-sensing.nci.org.au/u39/public/data/hyperion/raw-australiapasses-usgs/L1R/{year:d}/{jd:03d}/hyp">TGZ package</a><br/>
   L1T file=<a href="http://remote-sensing.nci.org.au/u39/public/data/hyperion/raw-australiapasses-usgs/L1T/{year:d}/{jd:03d}/hyp">TGZ package</a>
"""

d = None
with open(finput, 'r',) as f:
    with open(output, 'w',) as g:
        for line in f:
            line = line.rstrip()
            m = re.match('\s+<name>(\d{4}_\d{3})', line)
            if m:
                d = dt.datetime.strptime(m.group(1), '%Y_%j')
            if re.match('\s+Data\sURL', line):
                g.write(urls.format(year=d.year, jd=int(d.strftime('%j'))))
            else:
                g.write(line + '\n')


