导出hive的建表语句,支持单个或多个指定表库下的所有表--688IT编程网

导出hive的建表语句，⽀持单个或多个指定表库下的所有表

1.导出指定表

#!/usr/bin/env python

#-*- coding:utf8 -*-

# 从mysql中提取hive建表语句-指定的表

import os,sys

import fileinput

import datetime

tor

reload(sys)

sys.setdefaultencoding("utf8")

# 获取参数

if (len(sys.argv) == 3):

Db_name = sys.argv[1].lower()

Tab_Name = sys.argv[2].lower()

else:

print"Parameters is illegal."

print"python export_tabname.py db tablename1,tablename2"

#IP

host_valuse=""

#⽤户

user_valuse="hive"

#密码

passwd_valuse="xxxxxxxxxxx"

#数据库名称

database_valuse="hive"

#表名称

TabName = "'" + place(",","','") + "'"

def hive_create_table(host_valuse,user_valuse,passwd_valuse,database_valuse):

conn = t(host=host_valuse,user=user_valuse,passwd=passwd_valuse,database=database_valuse,charset='utf8')

mycursor = conn.cursor()

# 获取DB_ID

select_DB_ID = "select DB_ID from DBS where NAME='{0}';".format(Db_name)

result_DB_ID = mycursor.fetchall()

fo = open("create_tab.sql", "w")

for dir_DB_ID in result_DB_ID :

# 获取数据库名

DB_ID = str(dir_DB_ID)[1:].split(',')[0]

print(DB_ID)

select_DB_NAME = "select NAME from DBS where DB_ID="+DB_ID+";"

print(select_DB_NAME )

result_DB_NAME = mycursor.fetchone()

fo.write("\n===========数据库:"+str(result_DB_NAME).split('\'')[1]+"===========\n")

DBname=str(result_DB_NAME).split('\'')[1]

print'数据库名字：' + DBname

print(result_DB_NAME)

# 获取表名

select_table_name_sql = "select TBL_NAME from TBLS where DB_ID='"+ DB_ID+"' and TBL_NAME in ({0})".format(TabName)

result_table_names = mycursor.fetchall()

for table_name in result_table_names :

fo.write("\nDROP TABLE IF EXISTS "+DBname +'.`'+str(table_name).split('\'')[1]+"`;")

fo.write("\nCREATE TABLE "+DBname +'.`'+str(table_name).split('\'')[1]+"(\n")

# 根据表名获取SD_ID

select_table_SD_ID = "select SD_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"

print(select_table_SD_ID)

result_SD_ID = mycursor.fetchone()

print(result_SD_ID )

# 根据SD_ID获取CD_ID

SD_ID=str(result_SD_ID)[1:].split(',')[0]

select_table_CD_ID = "select CD_ID from SDS where SD_ID="+str(result_SD_ID)[1:].split(',')[0]+";"

print(select_table_CD_ID)

result_CD_ID = mycursor.fetchone()

print(result_CD_ID)

# 根据CD_ID获取表的列

CD_ID=str(result_CD_ID)[1:].split(',')[0]

select_table_COLUMN_NAME = "select COLUMN_NAME,TYPE_NAME,COMMENT from COLUMNS_V2 where CD_ID="+str(result_CD_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"

print(select_table_COLUMN_NAME)

result_COLUMN_NAME = mycursor.fetchall()

print(result_COLUMN_NAME)

index=0

for col,col_type,col_name in result_COLUMN_NAME:

import语句print(col)

print(col_type)

print(col_name)

print(len(result_COLUMN_NAME) )

# 写⼊表的列和列的类型到⽂件

if col_name is None:

fo.write(" `"+str(col)+"` "+str(col_type))

else:

fo.write(" `"+str(col)+"` "+str(col_type) + " COMMENT '" + str(col_name) + "'")

if index < len(result_COLUMN_NAME)-1:

index = index + 1

fo.write(",\n")

elif index == len(result_COLUMN_NAME)-1:

fo.write("\n)")

# 根据表名获取TBL_ID

select_table_SD_ID = "select TBL_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"

print(select_table_SD_ID)

result_TBL_ID = mycursor.fetchone()

print(result_TBL_ID)

# 根据TBL_ID获取分区信息

select_table_PKEY_NAME_TYPE = "select PKEY_NAME,PKEY_TYPE,PKEY_COMMENT from PARTITION_KEYS where TBL_ID="+str(result_TBL_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"

print(select_table_PKEY_NAME_TYPE)

result_PKEY_NAME_TYPE = mycursor.fetchall()

print(result_PKEY_NAME_TYPE)

if len(result_PKEY_NAME_TYPE) > 0:

#if result_PKEY_NAME_TYPE is not None:

fo.write("\nPARTITIONED BY (\n")

#elif len(result_PKEY_NAME_TYPE) == 0:

else :

fo.write("\n")

i=0

for pkey_name,pkey_type,PKEY_COMMENT in result_PKEY_NAME_TYPE:

if str(PKEY_COMMENT) is None:

fo.write(" `"+str(pkey_name)+"` "+str(pkey_type))

else:

fo.write(" `"+str(pkey_name)+"` "+str(pkey_type) + " COMMENT '" + str(PKEY_COMMENT) + "'\n")

if i < len(result_PKEY_NAME_TYPE)- 1:

i = i + 1

fo.write(",")

elif i == len(result_PKEY_NAME_TYPE) - 1:

fo.write(")\n")

# 根据表TBL_ID 获得中⽂名称

select_PARAM_VALUE01 = "select PARAM_VALUE from TABLE_PARAMS WHERE TBL_ID=( select TBL_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+") and PARAM_KEY='comment';

print(select_PARAM_VALUE01)

result_PARAM_VALUE01 = mycursor.fetchone()

print result_PARAM_VALUE01

if result_PARAM_VALUE01 is None:

print'未设置表名'

elif not result_PARAM_VALUE01[0]:

print'表名为空'

else:

fo.write("COMMENT '" + str(result_PARAM_VALUE01[0]) +"' \n" )

# 根据SD_ID和CD_ID获取SERDE_ID

select_SERDE_ID = "select SERDE_ID from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"

print(select_SERDE_ID)

result_SERDE_ID = mycursor.fetchone()

print(result_SERDE_ID)

# 根据SERDE_ID获取PARAM_VALUE(列分隔符)

select_PARAM_VALUE = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID="+str(result_SERDE_ID)[1:].split(",")[0]+" and PARAM_KEY='field.delim';"

print(select_PARAM_VALUE)

result_PARAM_VALUE = mycursor.fetchone()

print(result_PARAM_VALUE)

if result_PARAM_VALUE is not None:

fo.write("ROW FORMAT DELIMITED\n")

fo.write("FIELDS TERMINATED BY '"+str(result_PARAM_VALUE).split('\'')[1]+"'\n")

# 根据SERDE_ID获取PARAM_VALUE(⾏分隔符)

select_PARAM_HNAG = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID="+str(result_SERDE_ID)[1:].split(",")[0]+" and PARAM_KEY='line.delim';"

print(select_PARAM_HNAG)

RESULT_PARAM_HNAG = mycursor.fetchone()

print(RESULT_PARAM_HNAG)

if RESULT_PARAM_HNAG is not None:

fo.write("LINES TERMINATED BY '"+str(RESULT_PARAM_HNAG).split('\'')[1]+"'\n")

# 根据SD_ID和CD_ID获取输⼊输出格式

select_table_STORE_FORMAT = "select INPUT_FORMAT from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"

print(select_table_STORE_FORMAT)

result_table_STORE_FORMAT= mycursor.fetchall()

print(result_table_STORE_FORMAT)

for store_format in result_table_STORE_FORMAT:

if"org.apache.hadoop.hive.OrcInputFormat"in str(store_format):

fo.write("STORED AS ORC;\n")

elif"org.apache.hadoop.mapred.TextInputFormat"in str(store_format):

fo.write("STORED AS TEXTFILE;\n")

elif"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"in str(store_format):

fo.write("STORED AS PARQUET;\n")

elif"org.apache.kudu.mapreduce.KuduTableInputFormat"in str(store_format):

fo.write("STORED AS KuduTable;\n")

else :

fo.write("STORED AS null;\n")

fo.close()

if__name__ == "__main__":

hive_create_table(host_valuse,user_valuse,passwd_valuse,database_valuse)

2.导出⼀个库下的所有表

#!/usr/bin/env python

#-*- coding:utf8 -*-

# 从mysql中提取hive建表语句-指定数据库的所有表

import os,sys

import fileinput

import datetime

tor

reload(sys)

sys.setdefaultencoding("utf8")

# 获取参数

if (len(sys.argv) == 2):

Db_name = sys.argv[1].lower()

else:

print"Parameters is illegal."

print"python export_tabname.py db_name"

#IP

host_valuse=""

#⽤户

user_valuse="hive"

#密码

passwd_valuse="xxxxxxxxx"

#数据库名称

database_valuse="hive"

def hive_create_table(host_valuse,user_valuse,passwd_valuse,database_valuse):

conn = t(host=host_valuse,user=user_valuse,passwd=passwd_valuse,database=database_valuse,charset='utf8')

mycursor = conn.cursor()

# 获取DB_ID

select_DB_ID = "select DB_ID from DBS where NAME='{0}';".format(Db_name)

result_DB_ID = mycursor.fetchall()

fo = open("create_tab.sql", "w")

for dir_DB_ID in result_DB_ID :

# 获取数据库名

DB_ID = str(dir_DB_ID)[1:].split(',')[0]

print(DB_ID)

select_DB_NAME = "select NAME from DBS where DB_ID="+DB_ID+";"

print(select_DB_NAME )

result_DB_NAME = mycursor.fetchone()

fo.write("\n===========数据库:"+str(result_DB_NAME).split('\'')[1]+"===========\n")

DBname=str(result_DB_NAME).split('\'')[1]

print'数据库名字：' + DBname

print(result_DB_NAME)

# 获取表名

select_table_name_sql = "select TBL_NAME from TBLS where DB_ID='"+ DB_ID+"';"

result_table_names = mycursor.fetchall()

for table_name in result_table_names :

fo.write("\nDROP TABLE IF EXISTS "+DBname +'.`'+str(table_name).split('\'')[1]+"`;")

fo.write("\nCREATE TABLE IF NOT EXISTS "+DBname +'.`'+str(table_name).split('\'')[1]+"`(\n")

# 根据表名获取SD_ID

select_table_SD_ID = "select SD_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"

print(select_table_SD_ID)

result_SD_ID = mycursor.fetchone()

print(result_SD_ID )

# 根据SD_ID获取CD_ID

SD_ID=str(result_SD_ID)[1:].split(',')[0]

select_table_CD_ID = "select CD_ID from SDS where SD_ID="+str(result_SD_ID)[1:].split(',')[0]+";"

print(select_table_CD_ID)

result_CD_ID = mycursor.fetchone()

print(result_CD_ID)

# 根据CD_ID获取表的列

CD_ID=str(result_CD_ID)[1:].split(',')[0]

select_table_COLUMN_NAME = "select COLUMN_NAME,TYPE_NAME,COMMENT from COLUMNS_V2 where CD_ID="+str(result_CD_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"

print(select_table_COLUMN_NAME)

result_COLUMN_NAME = mycursor.fetchall()

print(result_COLUMN_NAME)

index=0

for col,col_type,col_name in result_COLUMN_NAME:

print(col)

print(col_type)

print(col_name)

print(len(result_COLUMN_NAME) )

# 写⼊表的列和列的类型到⽂件

if col_name is None:

fo.write(" `"+str(col)+"` "+str(col_type))

else:

fo.write(" `"+str(col)+"` "+str(col_type) + " COMMENT '" + str(col_name) + "'")

if index < len(result_COLUMN_NAME)-1:

index = index + 1

fo.write(",\n")

elif index == len(result_COLUMN_NAME)-1:

fo.write("\n)")

# 根据表名获取TBL_ID

select_table_SD_ID = "select TBL_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"

print(select_table_SD_ID)

result_TBL_ID = mycursor.fetchone()

print(result_TBL_ID)

# 根据TBL_ID获取分区信息

select_table_PKEY_NAME_TYPE = "select PKEY_NAME,PKEY_TYPE,PKEY_COMMENT from PARTITION_KEYS where TBL_ID="+str(result_TBL_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"

print(select_table_PKEY_NAME_TYPE)

result_PKEY_NAME_TYPE = mycursor.fetchall()

print(result_PKEY_NAME_TYPE)

if len(result_PKEY_NAME_TYPE) > 0:

#if result_PKEY_NAME_TYPE is not None:

fo.write("\nPARTITIONED BY (\n")

#elif len(result_PKEY_NAME_TYPE) == 0:

else :

fo.write("\n")

i=0

for pkey_name,pkey_type,PKEY_COMMENT in result_PKEY_NAME_TYPE:

if str(PKEY_COMMENT) is None:

fo.write(" `"+str(pkey_name)+"` "+str(pkey_type))

else:

fo.write(" `"+str(pkey_name)+"` "+str(pkey_type) + " COMMENT '" + str(PKEY_COMMENT) + "'\n")

if i < len(result_PKEY_NAME_TYPE)- 1:

i = i + 1

fo.write(",")

elif i == len(result_PKEY_NAME_TYPE) - 1:

fo.write(")\n")

# 根据表TBL_ID 获得中⽂名称

select_PARAM_VALUE01 = "select PARAM_VALUE from TABLE_PARAMS WHERE TBL_ID=( select TBL_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+") and PARAM_KEY='comment';

print(select_PARAM_VALUE01)

result_PARAM_VALUE01 = mycursor.fetchone()

print result_PARAM_VALUE01

if result_PARAM_VALUE01 is None:

print'未设置表名'

elif not result_PARAM_VALUE01[0]:

print'表名为空'

else:

fo.write("COMMENT '" + str(result_PARAM_VALUE01[0]) +"' \n" )

# 根据SD_ID和CD_ID获取SERDE_ID

select_SERDE_ID = "select SERDE_ID from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"

print(select_SERDE_ID)

result_SERDE_ID = mycursor.fetchone()

print(result_SERDE_ID)

# 根据SERDE_ID获取PARAM_VALUE(列分隔符)

select_PARAM_VALUE = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID="+str(result_SERDE_ID)[1:].split(",")[0]+" and PARAM_KEY='field.delim';"

print(select_PARAM_VALUE)

result_PARAM_VALUE = mycursor.fetchone()

print(result_PARAM_VALUE)

if result_PARAM_VALUE is not None:

fo.write("ROW FORMAT DELIMITED\n")

fo.write("FIELDS TERMINATED BY '"+str(result_PARAM_VALUE).split('\'')[1]+"'\n")

# 根据SERDE_ID获取PARAM_VALUE(⾏分隔符)

select_PARAM_HNAG = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID="+str(result_SERDE_ID)[1:].split(",")[0]+" and PARAM_KEY='line.delim';"

print(select_PARAM_HNAG)

RESULT_PARAM_HNAG = mycursor.fetchone()

print(RESULT_PARAM_HNAG)

if RESULT_PARAM_HNAG is not None:

fo.write("LINES TERMINATED BY '"+str(RESULT_PARAM_HNAG).split('\'')[1]+"'\n")

# 根据SD_ID和CD_ID获取输⼊输出格式

select_table_STORE_FORMAT = "select INPUT_FORMAT from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"

print(select_table_STORE_FORMAT)

result_table_STORE_FORMAT= mycursor.fetchall()

print(result_table_STORE_FORMAT)

for store_format in result_table_STORE_FORMAT:

if"org.apache.hadoop.hive.OrcInputFormat"in str(store_format):

fo.write("STORED AS ORC;\n")

elif"org.apache.hadoop.mapred.TextInputFormat"in str(store_format):

fo.write("STORED AS TEXTFILE;\n")

elif"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"in str(store_format): fo.write("STORED AS PARQUET;\n")

elif"org.apache.kudu.mapreduce.KuduTableInputFormat"in str(store_format):

fo.write("STORED AS KuduTable;\n")

else :

fo.write("STORED AS null;\n")

fo.close()

if__name__ == "__main__":

hive_create_table(host_valuse,user_valuse,passwd_valuse,database_valuse)

需要注意：

视图也会同时导出，STORED AS null的存储格式。

分隔符需要判断是否正确。

688IT编程网

导出hive的建表语句,支持单个或多个指定表库下的所有表

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

一种任意人头与任意人体的3D结合方法

正则匹配c语言中8进制

fortran数据格式

python中文本转数字用的公式

gh 文本变数值

js判断输入是否为正整数、浮点数等数字的函数代码

qt浮点数正则表达式

QT正则表达式限制输入值

手机号码和电话号码的正则表达式

str转浮点-概述说明以及解释

英豪结尾的诗句

Java正则表达式:符合以特定字符串开头,以特定字符串结尾的所有结果

machinebuilder使用手册

ASP.NET网站建设基本常用代码

LCD显示实时时钟

经纬度正则表达式解析

前端科学计数法转数字

python正则表达式re之compile函数解析

pythonunittest之断言及示例

[lua]lua中匹配字符串小数

最新文章

nginx map用法正则

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

python中re.findall函数实例用法

nginx url表达式

nginx 正则匹配参数

标签列表

688IT编程网

导出hive的建表语句,支持单个或多个指定表库下的所有表

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法 正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

一种任意人头与任意人体的3D结合方法

正则匹配c语言中8进制

fortran数据格式

python中文本转数字用的公式

gh 文本变数值

js判断输入是否为正整数、浮点数等数字的函数代码

qt浮点数正则表达式

QT正则表达式限制输入值

手机号码和电话号码的正则表达式

str转浮点-概述说明以及解释

英豪结尾的诗句

Java正则表达式:符合以特定字符串开头,以特定字符串结尾的所有结果

machinebuilder使用手册

ASP.NET网站建设基本常用代码

LCD显示实时时钟

经纬度正则表达式解析

前端科学计数法转数字

python正则表达式re之compile函数解析

pythonunittest之断言及示例

[lua]lua中匹配字符串小数

最新文章

nginx map用法 正则

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

python中re.findall函数实例用法

nginx url表达式

nginx 正则匹配参数

标签列表

nginx map用法正则

nginx map用法正则