For Developers: Hue Setup

Hue

Hue is web interface for analyzing data with hadoop.

Installation Steps-

Download hue package from

http://gethue.com/category/release/

untar package.

Install -

/usr/local/hue/make install

Change permission-

sudo chown -R hue:hue /usr/local/hue

Change setting in hue.ini file

###########################################################################

# Settings to configure your Hadoop cluster.

###########################################################################

[hadoop]

# Configuration for HDFS NameNode

# ------------------------------------------------------------------------

[[hdfs_clusters]]

# HA support by using HttpFs

[[[default]]]

# Enter the filesystem uri

fs_defaultfs=hdfs://localhost:8020

# NameNode logical name.

## logical_name=

# Use WebHdfs/HttpFs as the communication mechanism.

# Domain should be the NameNode or HttpFs host.

# Default port is 14000 for HttpFs.

webhdfs_url=http://localhost:50070/webhdfs/v1

# Change this if your HDFS cluster is Kerberos-secured

## security_enabled=false

# In secure mode (HTTPS), if SSL certificates from YARN Rest APIs

# have to be verified against certificate authority

## ssl_cert_ca_verify=True

# Directory of the Hadoop configuration

## hadoop_conf_dir=$HADOOP_CONF_DIR when set or '/etc/hadoop/conf'

# Configuration for YARN (MR2)

# ------------------------------------------------------------------------

[[yarn_clusters]]

[[[default]]]

# Enter the host on which you are running the ResourceManager

resourcemanager_host=localhost

# The port where the ResourceManager IPC listens on

## resourcemanager_port=8032

# Whether to submit jobs to this cluster

submit_to=True

# Resource Manager logical name (required for HA)

## logical_name=

# Change this if your YARN cluster is Kerberos-secured

## security_enabled=false

# URL of the ResourceManager API

resourcemanager_api_url=http://localhost:8088

# URL of the ProxyServer API

proxy_api_url=http://localhost:8088

# URL of the HistoryServer API

history_server_api_url=http://localhost:19888

# In secure mode (HTTPS), if SSL certificates from from YARN Rest APIs

# have to be verified against certificate authority

## ssl_cert_ca_verify=True

# HA support by specifying multiple clusters

# e.g.

# [[[ha]]]

# Resource Manager logical name (required for HA)

## logical_name=my-rm-name

# Configuration for MapReduce (MR1)

# ------------------------------------------------------------------------

[[mapred_clusters]]

[[[default]]]

# Enter the host on which you are running the Hadoop JobTracker

## jobtracker_host=localhost

# The port where the JobTracker IPC listens on

## jobtracker_port=8021

# JobTracker logical name for HA

## logical_name=

# Thrift plug-in port for the JobTracker

## thrift_port=9290

# Whether to submit jobs to this cluster

submit_to=False

# Change this if your MapReduce cluster is Kerberos-secured

## security_enabled=false

# HA support by specifying multiple clusters

# e.g.

# [[[ha]]]

# Enter the logical name of the JobTrackers

## logical_name=my-jt-name

###########################################################################

# Settings to configure the Filebrowser app

###########################################################################

[filebrowser]

# Location on local filesystem where the uploaded archives are temporary stored.

## archive_upload_tempdir=/tmp

###########################################################################

# Settings to configure liboozie

###########################################################################

[liboozie]

# The URL where the Oozie service runs on. This is required in order for

# users to submit jobs. Empty value disables the config check.

oozie_url=http://localhost:11000/oozie

# Requires FQDN in oozie_url if enabled

## security_enabled=false

# Location on HDFS where the workflows/coordinator are deployed when submitted.

## remote_deployement_dir=/user/hue/oozie/deployments

###########################################################################

# Settings to configure the Oozie app

###########################################################################

[oozie]

# Location on local FS where the examples are stored.

## local_data_dir=..../examples

# Location on local FS where the data for the examples is stored.

## sample_data_dir=...thirdparty/sample_data

# Location on HDFS where the oozie examples and workflows are stored.

## remote_data_dir=/user/hue/oozie/workspaces

# Maximum of Oozie workflows or coodinators to retrieve in one API call.

## oozie_jobs_count=100

# Use Cron format for defining the frequency of a Coordinator instead of the old frequency number/unit.

## enable_cron_scheduling=true

###########################################################################

# Settings to configure Beeswax with Hive

###########################################################################

[beeswax]

# Host where HiveServer2 is running.

# If Kerberos security is enabled, use fully-qualified domain name (FQDN).

hive_server_host=localhost

server_interface=hiveserver2

# Port where HiveServer2 Thrift server runs on.

## hive_server_port=10000

# Hive configuration directory, where hive-site.xml is located

## hive_conf_dir=/etc/hive/conf

# Timeout in seconds for thrift calls to Hive service

## server_conn_timeout=120

# Choose whether to use the old GetLog() thrift call from before Hive 0.14 to retrieve the logs.

# If false, use the FetchResults() thrift call from Hive 1.0 or more instead.

## use_get_log_api=false

# Set a LIMIT clause when browsing a partitioned table.

# A positive value will be set as the LIMIT. If 0 or negative, do not set any limit.

## browse_partitioned_table_limit=250

# A limit to the number of rows that can be downloaded from a query.

# A value of -1 means there will be no limit.

# A maximum of 65,000 is applied to XLS downloads.

## download_row_limit=1000000

# Hue will try to close the Hive query when the user leaves the editor page.

# This will free all the query resources in HiveServer2, but also make its results inaccessible.

## close_queries=false

# Thrift version to use when communicating with HiveServer2.

# New column format is from version 7.

## thrift_version=5

[[ssl]]

# Path to Certificate Authority certificates.

## cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.

## validate=true

###########################################################################

# Settings to configure Impala

###########################################################################

[impala]

# Host of the Impala Server (one of the Impalad)

server_host=localhost

# Port of the Impala Server

## server_port=21050

# Kerberos principal

## impala_principal=impala/hostname.foo.com

# Turn on/off impersonation mechanism when talking to Impala

## impersonation_enabled=False

# Number of initial rows of a result set to ask Impala to cache in order

# to support re-fetching them for downloading them.

# Set to 0 for disabling the option and backward compatibility.

## querycache_rows=50000

# Timeout in seconds for thrift calls

## server_conn_timeout=120

# Hue will try to close the Impala query when the user leaves the editor page.

# This will free all the query resources in Impala, but also make its results inaccessible.

## close_queries=true

# If QUERY_TIMEOUT_S > 0, the query will be timed out (i.e. cancelled) if Impala does not do any work

# (compute or send back results) for that query within QUERY_TIMEOUT_S seconds.

## query_timeout_s=600

[[ssl]]

# SSL communication enabled for this server.

## enabled=false

# Path to Certificate Authority certificates.

## cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.

## validate=true

###########################################################################

# Settings to configure Pig

###########################################################################

[pig]

# Location of piggybank.jar on local filesystem.

## local_sample_dir=/usr/share/hue/apps/pig/examples

# Location piggybank.jar will be copied to in HDFS.

## remote_data_dir=/user/hue/pig/examples

###########################################################################

# Settings to configure Sqoop

###########################################################################

[sqoop]

# For autocompletion, fill out the librdbms section.

# Sqoop server URL

server_url=http://localhost:12000/sqoop

###########################################################################

# Settings to configure Proxy

###########################################################################

[proxy]

# Comma-separated list of regular expressions,

# which match 'host:port' of requested proxy target.

## whitelist=(localhost|127\.0\.0\.1):(50030|50070|50060|50075)

# Comma-separated list of regular expressions,

# which match any prefix of 'host:port/path' of requested proxy target.

# This does not support matching GET parameters.

## blacklist=

###########################################################################

# Settings to configure HBase Browser

###########################################################################

[hbase]

# Comma-separated list of HBase Thrift servers for clusters in the format of '(name|host:port)'.

# Use full hostname with security.

# If using Kerberos we assume GSSAPI SASL, not PLAIN.

hbase_clusters=(Cluster|localhost:9090)

# HBase configuration directory, where hbase-site.xml is located.

## hbase_conf_dir=/etc/hbase/conf

# Hard limit of rows or columns per row fetched before truncating.

## truncate_limit = 500

# 'buffered' is the default of the HBase Thrift Server and supports security.

# 'framed' can be used to chunk up responses,

# which is useful when used in conjunction with the nonblocking server in Thrift.

## thrift_transport=buffered

###########################################################################

# Settings to configure Solr Search

###########################################################################

[search]

# URL of the Solr Server

solr_url=http://localhost:8983/solr/

# Requires FQDN in solr_url if enabled

## security_enabled=false

## Query sent when no term is entered

## empty_query=*:*

###########################################################################

# Settings to configure Solr Indexer

###########################################################################

[indexer]

# Location of the solrctl binary.

## solrctl_path=/usr/bin/solrctl

# Zookeeper ensemble.

## solr_zk_ensemble=localhost:2181/solr

###########################################################################

# Settings to configure Job Designer

###########################################################################

[jobsub]

# Location on local FS where examples and template are stored.

## local_data_dir=..../data

# Location on local FS where sample data is stored

## sample_data_dir=...thirdparty/sample_data

###########################################################################

# Settings to configure Job Browser.

###########################################################################

[jobbrowser]

# Share submitted jobs information with all users. If set to false,

# submitted jobs are visible only to the owner and administrators.

## share_jobs=true

###########################################################################

# Settings to configure the Zookeeper application.

###########################################################################

[zookeeper]

[[clusters]]

[[[default]]]

# Zookeeper ensemble. Comma separated list of Host/Port.

# e.g. localhost:2181,localhost:2182,localhost:2183

## host_ports=localhost:2181

# The URL of the REST contrib service (required for znode browsing).

## rest_url=http://localhost:9998

# Name of Kerberos principal when using security.

## principal_name=zookeeper

###########################################################################

# Settings to configure the Spark application.

###########################################################################

[spark]

# URL of the REST Spark Job Server.

server_url=http://localhost:8090/

# List of available types of snippets

## languages='[{"name": "Scala", "type": "scala"},{"name": "Python", "type": "python"},{"name": "Impala SQL", "type": "impala"},{"name": "Hive SQL", "type": "hive"},{"name": "Text", "type": "text"}]'

###########################################################################

# Settings for the User Admin application

###########################################################################

[useradmin]

# The name of the default user group that users will be a member of

## default_user_group=default

[[password_policy]]

# Set password policy to all users. The default policy requires password to be at least 8 characters long,

# and contain both uppercase and lowercase letters, numbers, and special characters.

## is_enabled=false

## pwd_regex="^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$"

## pwd_hint="The password must be at least 8 characters long, and must contain both uppercase and lowercase letters, at least one number, and at least one special character."

## pwd_error_message="The password must be at least 8 characters long, and must contain both uppercase and lowercase letters, at least one number, and at least one special character."

###########################################################################

# Settings for the Sentry lib

###########################################################################

[libsentry]

# Hostname or IP of server.

## hostname=localhost

# Port the sentry service is running on.

## port=8038

# Sentry configuration directory, where sentry-site.xml is located.

## sentry_conf_dir=/etc/sentry/conf

Set properties in hdfs-site.xml-

<property>

<name>dfs.webhdfs.enabled</name>

<value>true</value>

</property>

Set properties in core-site.xml-

<property>

<name>hadoop.proxyuser.hue.hosts</name>

<value>*</value>

</property>

<property>

<name>hadoop.proxyuser.hue.groups</name>

<value>*</value>

</property>

Start
hue-

build/env/bin/supervisor

Open link in your browser-

http://localhost:8888/

Start hadoop, hive , pig etc
and use web interface for running quries, script etc.

After Login you can check status of everything in your browser.

For Developers

Saturday, 9 January 2016

Hue Setup

No comments:

Post a Comment