python连接hive

参考:

https://blog.csdn.net/a6822342/article/details/80713652   #简单

https://www.zhihu.com/question/269333988   #复杂点的

方法一: 

ali-bigdata-gateway-guard-1

deactivate

sudo -i

. /mnt/disk1/data/venv_bi/bin/activate


pip install pyhive
pip install thrift
pip install sasl
pip install thrift_sasl


vim h.py
from pyhive import hive

conn = hive.Connection(host='127.0.0.1', port=10000, username='feng.hong', database='test_db')
cursor = conn.cursor()
cursor.execute('show databases')
print(cursor.fetchall())

或使用select查询
conn = hive.Connection(host='127.0.0.1', port=10000, username='feng.hong', database='test_db')
cursor = conn.cursor()
cursor.execute('select * from users limit 10')
print(cursor.fetchall())

#长的查询语句可用三个引号表示忽略换行

#设置队列可以新起一个execute,因为是在同一个connect中所以不会失效.

from pyhive import hive

conn = hive.Connection(host='10.52.5.190', port=10000, username='feng.hong', database='default')
cursor = conn.cursor()
cursor.execute('SET mapreduce.job.queuename=data_bi')
cursor.execute("""SELECT
a.dt,
a.city_id, 
a.city_name, 
a.product_id,
a.driver_id,
a.phone_number 
FROM oride_dw.dim_oride_driver_base a 
where a.dt=DATE_SUB(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),6) limit 10""")
print(cursor.fetchall())

方法二:通过os.system,具体见博客python发邮件

import os
import smtplib
import csv
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import numpy as np
import pandas as pd
import pymysql
import sqlalchemy

os.system("hive -e "SET mapreduce.job.queuename=data_bi;SELECT a.dt,a.city_id,a.city_name,a.product_id,a.driver_id,a.phone_number FROM oride_dw.dim_oride_driver_base a where a.dt >= DATE_SUB(current_date(),2) and a.dt<= DATE_SUB(current_date(),1) limit 10" >/tmp/1.txt")

方法三: 没跑出来,待研究

"""表示注释
import pandas as pd
import sqlalchemy as sa
sql = """SELECT a.dt, a.city_id, a.city_name, a.product_id, a.driver_id, a.phone_number
FROM oride_dw.dim_oride_driver_base a
where a.dt=DATE_SUB(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),6) limit 10"""
engine = sa.create_engine('hive://10.52.5.190:10000/opay_dw')
pd.read_sql(sql, engine)

原文地址:https://www.cnblogs.com/hongfeng2019/p/12307496.html