Saturday, 9 January 2016

Hue Setup


Hue

Hue is web interface for analyzing data with hadoop.

Installation Steps-

Download hue package from

untar package.

Install -
/usr/local/hue/make install

Change permission-
sudo chown -R hue:hue /usr/local/hue

Change setting in hue.ini file

###########################################################################
# Settings to configure your Hadoop cluster.
###########################################################################

[hadoop]

# Configuration for HDFS NameNode
# ------------------------------------------------------------------------
[[hdfs_clusters]]
# HA support by using HttpFs

[[[default]]]
# Enter the filesystem uri
fs_defaultfs=hdfs://localhost:8020
# NameNode logical name.
## logical_name=

# Use WebHdfs/HttpFs as the communication mechanism.
# Domain should be the NameNode or HttpFs host.
# Default port is 14000 for HttpFs.
webhdfs_url=http://localhost:50070/webhdfs/v1

# Change this if your HDFS cluster is Kerberos-secured
## security_enabled=false

# In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
# have to be verified against certificate authority
## ssl_cert_ca_verify=True

# Directory of the Hadoop configuration
## hadoop_conf_dir=$HADOOP_CONF_DIR when set or '/etc/hadoop/conf'

# Configuration for YARN (MR2)
# ------------------------------------------------------------------------
[[yarn_clusters]]

[[[default]]]
# Enter the host on which you are running the ResourceManager
resourcemanager_host=localhost

# The port where the ResourceManager IPC listens on
## resourcemanager_port=8032

# Whether to submit jobs to this cluster
submit_to=True
# Resource Manager logical name (required for HA)
## logical_name=

# Change this if your YARN cluster is Kerberos-secured
## security_enabled=false

# URL of the ResourceManager API
resourcemanager_api_url=http://localhost:8088

# URL of the ProxyServer API
proxy_api_url=http://localhost:8088

# URL of the HistoryServer API
history_server_api_url=http://localhost:19888

# In secure mode (HTTPS), if SSL certificates from from YARN Rest APIs
# have to be verified against certificate authority
## ssl_cert_ca_verify=True

# HA support by specifying multiple clusters
# e.g.

# [[[ha]]]
# Resource Manager logical name (required for HA)
## logical_name=my-rm-name

# Configuration for MapReduce (MR1)
# ------------------------------------------------------------------------
[[mapred_clusters]]

[[[default]]]
# Enter the host on which you are running the Hadoop JobTracker
## jobtracker_host=localhost

# The port where the JobTracker IPC listens on
## jobtracker_port=8021

# JobTracker logical name for HA
## logical_name=

# Thrift plug-in port for the JobTracker
## thrift_port=9290

# Whether to submit jobs to this cluster
submit_to=False

# Change this if your MapReduce cluster is Kerberos-secured
## security_enabled=false

# HA support by specifying multiple clusters
# e.g.

# [[[ha]]]
# Enter the logical name of the JobTrackers
## logical_name=my-jt-name


###########################################################################
# Settings to configure the Filebrowser app
###########################################################################

[filebrowser]
# Location on local filesystem where the uploaded archives are temporary stored.
## archive_upload_tempdir=/tmp
###########################################################################
# Settings to configure liboozie
###########################################################################

[liboozie]
# The URL where the Oozie service runs on. This is required in order for
# users to submit jobs. Empty value disables the config check.
oozie_url=http://localhost:11000/oozie

# Requires FQDN in oozie_url if enabled
## security_enabled=false

# Location on HDFS where the workflows/coordinator are deployed when submitted.
## remote_deployement_dir=/user/hue/oozie/deployments


###########################################################################
# Settings to configure the Oozie app
###########################################################################

[oozie]
# Location on local FS where the examples are stored.
## local_data_dir=..../examples

# Location on local FS where the data for the examples is stored.
## sample_data_dir=...thirdparty/sample_data

# Location on HDFS where the oozie examples and workflows are stored.
## remote_data_dir=/user/hue/oozie/workspaces

# Maximum of Oozie workflows or coodinators to retrieve in one API call.
## oozie_jobs_count=100

# Use Cron format for defining the frequency of a Coordinator instead of the old frequency number/unit.
## enable_cron_scheduling=true


###########################################################################
# Settings to configure Beeswax with Hive
###########################################################################

[beeswax]

# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
hive_server_host=localhost

server_interface=hiveserver2

# Port where HiveServer2 Thrift server runs on.
## hive_server_port=10000

# Hive configuration directory, where hive-site.xml is located
## hive_conf_dir=/etc/hive/conf

# Timeout in seconds for thrift calls to Hive service
## server_conn_timeout=120
# Choose whether to use the old GetLog() thrift call from before Hive 0.14 to retrieve the logs.
# If false, use the FetchResults() thrift call from Hive 1.0 or more instead.
## use_get_log_api=false

# Set a LIMIT clause when browsing a partitioned table.
# A positive value will be set as the LIMIT. If 0 or negative, do not set any limit.
## browse_partitioned_table_limit=250

# A limit to the number of rows that can be downloaded from a query.
# A value of -1 means there will be no limit.
# A maximum of 65,000 is applied to XLS downloads.
## download_row_limit=1000000

# Hue will try to close the Hive query when the user leaves the editor page.
# This will free all the query resources in HiveServer2, but also make its results inaccessible.
## close_queries=false

# Thrift version to use when communicating with HiveServer2.
# New column format is from version 7.
## thrift_version=5

[[ssl]]
# Path to Certificate Authority certificates.
## cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.
## validate=true

###########################################################################
# Settings to configure Impala
###########################################################################

[impala]
# Host of the Impala Server (one of the Impalad)
server_host=localhost

# Port of the Impala Server
## server_port=21050

# Kerberos principal
## impala_principal=impala/hostname.foo.com

# Turn on/off impersonation mechanism when talking to Impala
## impersonation_enabled=False

# Number of initial rows of a result set to ask Impala to cache in order
# to support re-fetching them for downloading them.
# Set to 0 for disabling the option and backward compatibility.
## querycache_rows=50000

# Timeout in seconds for thrift calls
## server_conn_timeout=120

# Hue will try to close the Impala query when the user leaves the editor page.
# This will free all the query resources in Impala, but also make its results inaccessible.
## close_queries=true
# If QUERY_TIMEOUT_S > 0, the query will be timed out (i.e. cancelled) if Impala does not do any work
# (compute or send back results) for that query within QUERY_TIMEOUT_S seconds.
## query_timeout_s=600

[[ssl]]
# SSL communication enabled for this server.
## enabled=false

# Path to Certificate Authority certificates.
## cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.
## validate=true


###########################################################################
# Settings to configure Pig
###########################################################################

[pig]
# Location of piggybank.jar on local filesystem.
## local_sample_dir=/usr/share/hue/apps/pig/examples

# Location piggybank.jar will be copied to in HDFS.
## remote_data_dir=/user/hue/pig/examples


###########################################################################
# Settings to configure Sqoop
###########################################################################

[sqoop]
# For autocompletion, fill out the librdbms section.

# Sqoop server URL
server_url=http://localhost:12000/sqoop


###########################################################################
# Settings to configure Proxy
###########################################################################

[proxy]
# Comma-separated list of regular expressions,
# which match 'host:port' of requested proxy target.
## whitelist=(localhost|127\.0\.0\.1):(50030|50070|50060|50075)

# Comma-separated list of regular expressions,
# which match any prefix of 'host:port/path' of requested proxy target.
# This does not support matching GET parameters.
## blacklist=


###########################################################################
# Settings to configure HBase Browser
###########################################################################

[hbase]
# Comma-separated list of HBase Thrift servers for clusters in the format of '(name|host:port)'.
# Use full hostname with security.
# If using Kerberos we assume GSSAPI SASL, not PLAIN.
hbase_clusters=(Cluster|localhost:9090)

# HBase configuration directory, where hbase-site.xml is located.
## hbase_conf_dir=/etc/hbase/conf

# Hard limit of rows or columns per row fetched before truncating.
## truncate_limit = 500

# 'buffered' is the default of the HBase Thrift Server and supports security.
# 'framed' can be used to chunk up responses,
# which is useful when used in conjunction with the nonblocking server in Thrift.
## thrift_transport=buffered


###########################################################################
# Settings to configure Solr Search
###########################################################################

[search]

# URL of the Solr Server
solr_url=http://localhost:8983/solr/

# Requires FQDN in solr_url if enabled
## security_enabled=false
## Query sent when no term is entered
## empty_query=*:*


###########################################################################
# Settings to configure Solr Indexer
###########################################################################

[indexer]

# Location of the solrctl binary.
## solrctl_path=/usr/bin/solrctl

# Zookeeper ensemble.
## solr_zk_ensemble=localhost:2181/solr


###########################################################################
# Settings to configure Job Designer
###########################################################################

[jobsub]

# Location on local FS where examples and template are stored.
## local_data_dir=..../data

# Location on local FS where sample data is stored
## sample_data_dir=...thirdparty/sample_data


###########################################################################
# Settings to configure Job Browser.
###########################################################################

[jobbrowser]
# Share submitted jobs information with all users. If set to false,
# submitted jobs are visible only to the owner and administrators.
## share_jobs=true


###########################################################################
# Settings to configure the Zookeeper application.
###########################################################################

[zookeeper]

[[clusters]]

[[[default]]]
# Zookeeper ensemble. Comma separated list of Host/Port.
# e.g. localhost:2181,localhost:2182,localhost:2183
## host_ports=localhost:2181

# The URL of the REST contrib service (required for znode browsing).
## rest_url=http://localhost:9998

# Name of Kerberos principal when using security.
## principal_name=zookeeper


###########################################################################
# Settings to configure the Spark application.
###########################################################################

[spark]
# URL of the REST Spark Job Server.
server_url=http://localhost:8090/


# List of available types of snippets
## languages='[{"name": "Scala", "type": "scala"},{"name": "Python", "type": "python"},{"name": "Impala SQL", "type": "impala"},{"name": "Hive SQL", "type": "hive"},{"name": "Text", "type": "text"}]'


###########################################################################
# Settings for the User Admin application
###########################################################################

[useradmin]
# The name of the default user group that users will be a member of
## default_user_group=default

[[password_policy]]
# Set password policy to all users. The default policy requires password to be at least 8 characters long,
# and contain both uppercase and lowercase letters, numbers, and special characters.

## is_enabled=false
## pwd_regex="^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$"
## pwd_hint="The password must be at least 8 characters long, and must contain both uppercase and lowercase letters, at least one number, and at least one special character."
## pwd_error_message="The password must be at least 8 characters long, and must contain both uppercase and lowercase letters, at least one number, and at least one special character."

###########################################################################
# Settings for the Sentry lib
###########################################################################

[libsentry]
# Hostname or IP of server.
## hostname=localhost

# Port the sentry service is running on.
## port=8038

# Sentry configuration directory, where sentry-site.xml is located.
## sentry_conf_dir=/etc/sentry/conf


Set properties in hdfs-site.xml-

<property>
  <name>dfs.webhdfs.enabled</name>
  <value>true</value>
</property>

Set properties in core-site.xml-
<property>
  <name>hadoop.proxyuser.hue.hosts</name>
  <value>*</value>
</property>
<property>
  <name>hadoop.proxyuser.hue.groups</name>
  <value>*</value>
</property>

Start
hue-
build/env/bin/supervisor 
Open link in your browser- 
http://localhost:8888/
Start hadoop, hive , pig etc
and use web interface for running quries, script etc.
After Login you can check status of everything in your browser. 


No comments:

Post a Comment