##max(dt)替代方法
def check_dt(path, level=’G|M|K’):
cmd = ‘hdfs dfs -du -h ‘ + path
process = os.popen(cmd) # return file
output = process.read()
process.close()
output1 = output.split(‘n’)
output2 = [val for val in output1 if len(val) > 0]
res = []
for val in output2:
detepat = re.compile(‘(d+.*d*)s[‘ + level + ‘]s.*dt=[0-9-]{10}’)
result = detepat.finditer(val)
res1 = [m.group() for m in result]
if len(res1) > 0:
res.extend(res1)
if len(res) > 0:
detepat2 = re.compile(‘(d+.*d*)s.*dt=([0-9-]{10})’)
size_dt_grp = [re.search(detepat2, val) for val in res]
max_dt = max([m.group(2) for m in size_dt_grp if float(m.group(1)) > 0])
return max_dt
def get_customized_items():
“””
if you need some special values in your sql, please define and calculate then here
to refer it as {YOUR_VAR} in your sql
“””
today = Time.today()
TX_PRE_60_DATE = Time.date_sub(date=today, itv=60)
TX_PRE_365_DATE = Time.date_sub(date=today, itv=365)
TX_DATE_MAX01 = check_dt(‘hdfs://ns***/hive/databse.db/’) ;
return locals()
根据身份证判断男女的规则:
case
when length(dd)=18 and substr(dd,17,1)%2=1 then ‘1’
when length(dd)=18 and substr(dd,17,1)%2=0 then ‘2’
when length(dd)=15 and substr(dd,15,1)%2=1 then ‘1’
when length(dd)=15 and substr(dd,15,1)%2=0 then ‘2’
end as id_gender