Hue
Hue is web interface for
analyzing data with hadoop.
Installation Steps-
Download hue package from
untar package.
Install -
/usr/local/hue/make install
Change permission-
sudo chown -R hue:hue
/usr/local/hue
Change
setting in hue.ini file
###########################################################################
#
Settings to configure your Hadoop cluster.
###########################################################################
[hadoop]
#
Configuration for HDFS NameNode
#
------------------------------------------------------------------------
[[hdfs_clusters]]
#
HA support by using HttpFs
[[[default]]]
#
Enter the filesystem uri
fs_defaultfs=hdfs://localhost:8020
#
NameNode logical name.
##
logical_name=
#
Use WebHdfs/HttpFs as the communication mechanism.
#
Domain should be the NameNode or HttpFs host.
#
Default port is 14000 for HttpFs.
webhdfs_url=http://localhost:50070/webhdfs/v1
#
Change this if your HDFS cluster is Kerberos-secured
##
security_enabled=false
#
In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
#
have to be verified against certificate authority
##
ssl_cert_ca_verify=True
#
Directory of the Hadoop configuration
##
hadoop_conf_dir=$HADOOP_CONF_DIR when set or '/etc/hadoop/conf'
#
Configuration for YARN (MR2)
#
------------------------------------------------------------------------
[[yarn_clusters]]
[[[default]]]
#
Enter the host on which you are running the ResourceManager
resourcemanager_host=localhost
#
The port where the ResourceManager IPC listens on
##
resourcemanager_port=8032
#
Whether to submit jobs to this cluster
submit_to=True
#
Resource Manager logical name (required for HA)
##
logical_name=
#
Change this if your YARN cluster is Kerberos-secured
##
security_enabled=false
#
URL of the ResourceManager API
resourcemanager_api_url=http://localhost:8088
#
URL of the ProxyServer API
proxy_api_url=http://localhost:8088
#
URL of the HistoryServer API
history_server_api_url=http://localhost:19888
#
In secure mode (HTTPS), if SSL certificates from from YARN Rest APIs
#
have to be verified against certificate authority
##
ssl_cert_ca_verify=True
#
HA support by specifying multiple clusters
#
e.g.
#
[[[ha]]]
#
Resource Manager logical name (required for HA)
##
logical_name=my-rm-name
#
Configuration for MapReduce (MR1)
#
------------------------------------------------------------------------
[[mapred_clusters]]
[[[default]]]
#
Enter the host on which you are running the Hadoop JobTracker
##
jobtracker_host=localhost
#
The port where the JobTracker IPC listens on
##
jobtracker_port=8021
#
JobTracker logical name for HA
##
logical_name=
#
Thrift plug-in port for the JobTracker
##
thrift_port=9290
#
Whether to submit jobs to this cluster
submit_to=False
#
Change this if your MapReduce cluster is Kerberos-secured
##
security_enabled=false
#
HA support by specifying multiple clusters
#
e.g.
#
[[[ha]]]
#
Enter the logical name of the JobTrackers
##
logical_name=my-jt-name
###########################################################################
#
Settings to configure the Filebrowser app
###########################################################################
[filebrowser]
#
Location on local filesystem where the uploaded archives are
temporary stored.
##
archive_upload_tempdir=/tmp
###########################################################################
#
Settings to configure liboozie
###########################################################################
[liboozie]
#
The URL where the Oozie service runs on. This is required in order
for
#
users to submit jobs. Empty value disables the config check.
oozie_url=http://localhost:11000/oozie
#
Requires FQDN in oozie_url if enabled
##
security_enabled=false
#
Location on HDFS where the workflows/coordinator are deployed when
submitted.
##
remote_deployement_dir=/user/hue/oozie/deployments
###########################################################################
#
Settings to configure the Oozie app
###########################################################################
[oozie]
#
Location on local FS where the examples are stored.
##
local_data_dir=..../examples
#
Location on local FS where the data for the examples is stored.
##
sample_data_dir=...thirdparty/sample_data
#
Location on HDFS where the oozie examples and workflows are stored.
##
remote_data_dir=/user/hue/oozie/workspaces
#
Maximum of Oozie workflows or coodinators to retrieve in one API
call.
##
oozie_jobs_count=100
#
Use Cron format for defining the frequency of a Coordinator instead
of the old frequency number/unit.
##
enable_cron_scheduling=true
###########################################################################
#
Settings to configure Beeswax with Hive
###########################################################################
[beeswax]
#
Host where HiveServer2 is running.
#
If Kerberos security is enabled, use fully-qualified domain name
(FQDN).
hive_server_host=localhost
server_interface=hiveserver2
#
Port where HiveServer2 Thrift server runs on.
##
hive_server_port=10000
#
Hive configuration directory, where hive-site.xml is located
##
hive_conf_dir=/etc/hive/conf
#
Timeout in seconds for thrift calls to Hive service
##
server_conn_timeout=120
#
Choose whether to use the old GetLog() thrift call from before Hive
0.14 to retrieve the logs.
#
If false, use the FetchResults() thrift call from Hive 1.0 or more
instead.
##
use_get_log_api=false
#
Set a LIMIT clause when browsing a partitioned table.
#
A positive value will be set as the LIMIT. If 0 or negative, do not
set any limit.
##
browse_partitioned_table_limit=250
#
A limit to the number of rows that can be downloaded from a query.
#
A value of -1 means there will be no limit.
#
A maximum of 65,000 is applied to XLS downloads.
##
download_row_limit=1000000
#
Hue will try to close the Hive query when the user leaves the editor
page.
#
This will free all the query resources in HiveServer2, but also make
its results inaccessible.
##
close_queries=false
#
Thrift version to use when communicating with HiveServer2.
#
New column format is from version 7.
##
thrift_version=5
[[ssl]]
#
Path to Certificate Authority certificates.
##
cacerts=/etc/hue/cacerts.pem
#
Choose whether Hue should validate certificates received from the
server.
##
validate=true
###########################################################################
#
Settings to configure Impala
###########################################################################
[impala]
#
Host of the Impala Server (one of the Impalad)
server_host=localhost
#
Port of the Impala Server
##
server_port=21050
#
Kerberos principal
##
impala_principal=impala/hostname.foo.com
#
Turn on/off impersonation mechanism when talking to Impala
##
impersonation_enabled=False
#
Number of initial rows of a result set to ask Impala to cache in
order
#
to support re-fetching them for downloading them.
#
Set to 0 for disabling the option and backward compatibility.
##
querycache_rows=50000
#
Timeout in seconds for thrift calls
##
server_conn_timeout=120
#
Hue will try to close the Impala query when the user leaves the
editor page.
#
This will free all the query resources in Impala, but also make its
results inaccessible.
##
close_queries=true
#
If QUERY_TIMEOUT_S > 0, the query will be timed out (i.e.
cancelled) if Impala does not do any work
#
(compute or send back results) for that query within QUERY_TIMEOUT_S
seconds.
##
query_timeout_s=600
[[ssl]]
#
SSL communication enabled for this server.
##
enabled=false
#
Path to Certificate Authority certificates.
##
cacerts=/etc/hue/cacerts.pem
#
Choose whether Hue should validate certificates received from the
server.
##
validate=true
###########################################################################
#
Settings to configure Pig
###########################################################################
[pig]
#
Location of piggybank.jar on local filesystem.
##
local_sample_dir=/usr/share/hue/apps/pig/examples
#
Location piggybank.jar will be copied to in HDFS.
##
remote_data_dir=/user/hue/pig/examples
###########################################################################
#
Settings to configure Sqoop
###########################################################################
[sqoop]
#
For autocompletion, fill out the librdbms section.
#
Sqoop server URL
server_url=http://localhost:12000/sqoop
###########################################################################
#
Settings to configure Proxy
###########################################################################
[proxy]
#
Comma-separated list of regular expressions,
#
which match 'host:port' of requested proxy target.
##
whitelist=(localhost|127\.0\.0\.1):(50030|50070|50060|50075)
#
Comma-separated list of regular expressions,
#
which match any prefix of 'host:port/path' of requested proxy target.
#
This does not support matching GET parameters.
##
blacklist=
###########################################################################
#
Settings to configure HBase Browser
###########################################################################
[hbase]
#
Comma-separated list of HBase Thrift servers for clusters in the
format of '(name|host:port)'.
#
Use full hostname with security.
#
If using Kerberos we assume GSSAPI SASL, not PLAIN.
hbase_clusters=(Cluster|localhost:9090)
#
HBase configuration directory, where hbase-site.xml is located.
##
hbase_conf_dir=/etc/hbase/conf
#
Hard limit of rows or columns per row fetched before truncating.
##
truncate_limit = 500
#
'buffered' is the default of the HBase Thrift Server and supports
security.
#
'framed' can be used to chunk up responses,
#
which is useful when used in conjunction with the nonblocking server
in Thrift.
##
thrift_transport=buffered
###########################################################################
#
Settings to configure Solr Search
###########################################################################
[search]
#
URL of the Solr Server
solr_url=http://localhost:8983/solr/
#
Requires FQDN in solr_url if enabled
##
security_enabled=false
##
Query sent when no term is entered
##
empty_query=*:*
###########################################################################
#
Settings to configure Solr Indexer
###########################################################################
[indexer]
#
Location of the solrctl binary.
##
solrctl_path=/usr/bin/solrctl
#
Zookeeper ensemble.
##
solr_zk_ensemble=localhost:2181/solr
###########################################################################
#
Settings to configure Job Designer
###########################################################################
[jobsub]
#
Location on local FS where examples and template are stored.
##
local_data_dir=..../data
#
Location on local FS where sample data is stored
##
sample_data_dir=...thirdparty/sample_data
###########################################################################
#
Settings to configure Job Browser.
###########################################################################
[jobbrowser]
#
Share submitted jobs information with all users. If set to false,
#
submitted jobs are visible only to the owner and administrators.
##
share_jobs=true
###########################################################################
#
Settings to configure the Zookeeper application.
###########################################################################
[zookeeper]
[[clusters]]
[[[default]]]
#
Zookeeper ensemble. Comma separated list of Host/Port.
#
e.g. localhost:2181,localhost:2182,localhost:2183
##
host_ports=localhost:2181
#
The URL of the REST contrib service (required for znode browsing).
##
rest_url=http://localhost:9998
#
Name of Kerberos principal when using security.
##
principal_name=zookeeper
###########################################################################
#
Settings to configure the Spark application.
###########################################################################
[spark]
#
URL of the REST Spark Job Server.
server_url=http://localhost:8090/
#
List of available types of snippets
##
languages='[{"name": "Scala", "type":
"scala"},{"name": "Python", "type":
"python"},{"name": "Impala SQL",
"type": "impala"},{"name": "Hive
SQL", "type": "hive"},{"name":
"Text", "type": "text"}]'
###########################################################################
#
Settings for the User Admin application
###########################################################################
[useradmin]
#
The name of the default user group that users will be a member of
##
default_user_group=default
[[password_policy]]
#
Set password policy to all users. The default policy requires
password to be at least 8 characters long,
#
and contain both uppercase and lowercase letters, numbers, and
special characters.
##
is_enabled=false
##
pwd_regex="^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$"
##
pwd_hint="The password must be at least 8 characters long, and
must contain both uppercase and lowercase letters, at least one
number, and at least one special character."
##
pwd_error_message="The password must be at least 8 characters
long, and must contain both uppercase and lowercase letters, at least
one number, and at least one special character."
###########################################################################
#
Settings for the Sentry lib
###########################################################################
[libsentry]
#
Hostname or IP of server.
##
hostname=localhost
#
Port the sentry service is running on.
##
port=8038
#
Sentry configuration directory, where sentry-site.xml is located.
##
sentry_conf_dir=/etc/sentry/conf
Set
properties in hdfs-site.xml-
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
Set
properties in core-site.xml-
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
Start
hue-
build/env/bin/supervisor
Open link in your browser-
http://localhost:8888/
Start hadoop, hive , pig etc
and use web interface for running quries, script etc.
After Login you can check status of everything in your browser.
No comments:
Post a Comment