[Openstack] [Sahara] Cluster launching times out with Oozie

Jeremy Freudberg jfreud at bu.edu
Fri Jun 3 19:41:30 UTC 2016


Hi, I'm running DevStack on Ubuntu 16.04 and having some issues with
Sahara. Namely, I can not launch a cluster if one of the nodes is
running Oozie. I end up getting the error "Creating cluster failed for
the following reason(s): 'Operation' timed out after 300 second(s)".
Without Oozie, everything goes great. I have tried the Ubuntu and
Fedora vanilla images from
http://sahara-files.mirantis.com/images/upstream/mitaka/ for my nodes.
Any ideas/help are greatly appreciated. Thanks!

The full stack trace, if it helps:

2016-06-03 19:29:29.990 ERROR sahara.service.ops
[req-c4f9f8d0-fb44-40ce-91fb-28812ced4412 sahara admin] [instance:
none, cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0] Error during
operating on cluster (reason: 'Operation' timed out after 300
second(s)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0] Traceback (most recent
call last):
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/service/ops.py", line 192, in wrapper
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     f(cluster_id,
*args, **kwds)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/service/ops.py", line 302, in
_provision_cluster
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
plugin.start_cluster(cluster)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/plugins/vanilla/plugin.py", line 52, in
start_cluster
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
cluster.hadoop_version).start_cluster(cluster)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/plugins/vanilla/v2_7_1/versionhandler.py",
line 82, in start_cluster
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
s_scripts.start_oozie(self.pctx, cluster)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/plugins/vanilla/hadoop2/starting_scripts.py",
line 67, in start_oozie
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
run.start_oozie_process(pctx, oo)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/cluster_progress_ops.py", line 139, in
handler
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
add_fail_event(instance, e)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/local/lib/python2.7/dist-packages/oslo_utils/excutils.py", line
221, in __exit__
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
self.force_reraise()
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/local/lib/python2.7/dist-packages/oslo_utils/excutils.py", line
197, in force_reraise
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]
six.reraise(self.type_, self.value, self.tb)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/cluster_progress_ops.py", line 136, in
handler
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     value = func(*args,
**kwargs)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/plugins/vanilla/hadoop2/run_scripts.py",
line 97, in start_oozie_process
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     _oozie_share_lib(r)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/plugins/vanilla/hadoop2/run_scripts.py",
line 132, in _oozie_share_lib
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     'sudo su - -c
"mkdir /tmp/oozielib && '
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/ssh_remote.py", line 802, in
execute_command
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     get_stderr,
raise_when_error)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/ssh_remote.py", line 885, in _run_s
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     return
self._run_with_log(func, timeout, *args, **kwargs)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/ssh_remote.py", line 725, in
_run_with_log
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     return
self._run(func, *args, **kwargs)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/ssh_remote.py", line 881, in _run
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     return
procutils.run_in_subprocess(self.proc, func, args, kwargs)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/opt/stack/sahara/sahara/utils/procutils.py", line 54, in
run_in_subprocess
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     result =
pickle.load(proc.stdout)  # nosec
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/lib/python2.7/pickle.py", line 1384, in load
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     return
Unpickler(file).load()
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/lib/python2.7/pickle.py", line 863, in load
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     key = read(1)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/lib/python2.7/socket.py", line 384, in read
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     data =
self._sock.recv(left)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/local/lib/python2.7/dist-packages/eventlet/greenio/py2.py", line
160, in recv
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     data =
os.read(self._fileno, buflen)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/local/lib/python2.7/dist-packages/eventlet/green/os.py", line
50, in read
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     hubs.trampoline(fd,
read=True)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/local/lib/python2.7/dist-packages/eventlet/hubs/__init__.py",
line 162, in trampoline
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     return hub.switch()
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]   File
"/usr/local/lib/python2.7/dist-packages/eventlet/hubs/hub.py", line
294, in switch
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]     return
self.greenlet.switch()
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0] TimeoutException:
'Operation' timed out after 300 second(s)
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0] Error ID:
165cf3cd-9c13-489f-8bc3-e34e3ec9a6b6
2016-06-03 19:29:29.990 TRACE sahara.service.ops [instance: none,
cluster: a1d95d31-73f4-4224-b8d8-57769cbf5df0]




More information about the Openstack mailing list