Add: julia-0.6.2
Former-commit-id: ccc667cf67d569f3fb3df39aa57c2134755a7551
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
This is a proof-of-concept that uses ZeroMQ as transport.
|
||||
It uses a star topology as opposed to the native mesh network.
|
||||
|
||||
Package ZMQ must be installed. All workers only run on localhost.
|
||||
|
||||
All Julia nodes only connect to a "broker" process that listens on known ports
|
||||
8100 and 8101 via ZMQ sockets.
|
||||
|
||||
|
||||
All commands must be run from `examples/clustermanager/0mq` directory
|
||||
|
||||
First, start the broker. In a new console type:
|
||||
julia broker.jl
|
||||
|
||||
This does not return.
|
||||
|
||||
Next, start a Julia REPL and type:
|
||||
include("ZMQCM.jl")
|
||||
ZMQCM.start_master(4) # start with four workers
|
||||
|
||||
|
||||
Alternatively, head.jl, a test script could be run. It just launches the requested number of workers,
|
||||
executes a simple command on all of them and exits.
|
||||
julia head.jl 4
|
||||
|
||||
NOTE: As stated this is a proof-of-concept. A real Julia cluster using ZMQ will probably use
|
||||
different ZMQ socket types and optimize the transport.
|
||||
277
julia-0.6.2/share/doc/julia/examples/clustermanager/0mq/ZMQCM.jl
Normal file
277
julia-0.6.2/share/doc/julia/examples/clustermanager/0mq/ZMQCM.jl
Normal file
@@ -0,0 +1,277 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
using ZMQ
|
||||
|
||||
import Base: launch, manage, connect, kill
|
||||
|
||||
const BROKER_SUB_PORT = 8100
|
||||
const BROKER_PUB_PORT = 8101
|
||||
|
||||
const SELF_INITIATED = 0
|
||||
const REMOTE_INITIATED = 1
|
||||
|
||||
const PAYLOAD_MSG = "J"
|
||||
const CONTROL_MSG = "Z"
|
||||
|
||||
const REQUEST_ACK = "R"
|
||||
const ACK_MSG = "A"
|
||||
const KILL_MSG = "K"
|
||||
|
||||
mutable struct ZMQCMan <: ClusterManager
|
||||
map_zmq_julia::Dict{Int, Tuple}
|
||||
c::Condition
|
||||
isfree::Bool
|
||||
ctx
|
||||
pub
|
||||
sub
|
||||
zid_self
|
||||
ZMQCMan() = new(Dict{Int, Tuple}(), Condition(), true)
|
||||
end
|
||||
|
||||
const manager = ZMQCMan()
|
||||
|
||||
function lock_for_send()
|
||||
if manager.isfree == true
|
||||
manager.isfree = false
|
||||
else
|
||||
while manager.isfree == false
|
||||
wait(manager.c)
|
||||
if manager.isfree == true
|
||||
manager.isfree = false
|
||||
return
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function release_lock_for_send()
|
||||
manager.isfree = true
|
||||
notify(manager.c, all=true)
|
||||
end
|
||||
|
||||
function init_node(zid=0)
|
||||
manager.ctx = Context(1)
|
||||
pub=Socket(manager.ctx, PUB) # Outbound
|
||||
connect(pub, "tcp://127.0.0.1:$BROKER_SUB_PORT")
|
||||
|
||||
sub=Socket(manager.ctx, SUB) # In bound
|
||||
connect(sub, "tcp://127.0.0.1:$BROKER_PUB_PORT")
|
||||
ZMQ.set_subscribe(sub, string(zid))
|
||||
|
||||
manager.pub = pub
|
||||
manager.sub = sub
|
||||
manager.zid_self = zid
|
||||
|
||||
(pub, sub)
|
||||
end
|
||||
|
||||
function send_data(zid, mtype, data)
|
||||
lock_for_send()
|
||||
ZMQ.send(manager.pub, Message(string(zid)), SNDMORE)
|
||||
ZMQ.send(manager.pub, Message(string(manager.zid_self)), SNDMORE)
|
||||
#println("Sending message of type $mtype to $zid")
|
||||
ZMQ.send(manager.pub, Message(mtype), SNDMORE)
|
||||
ZMQ.send(manager.pub, Message(data))
|
||||
release_lock_for_send()
|
||||
end
|
||||
|
||||
function setup_connection(zid, initiated_by)
|
||||
try
|
||||
read_stream=BufferStream()
|
||||
write_stream=BufferStream()
|
||||
|
||||
if initiated_by == REMOTE_INITIATED
|
||||
test_remote = false
|
||||
else
|
||||
test_remote = true
|
||||
end
|
||||
|
||||
manager.map_zmq_julia[zid] = (read_stream, write_stream, test_remote)
|
||||
|
||||
@schedule begin
|
||||
while true
|
||||
(r_s, w_s, do_test_remote) = manager.map_zmq_julia[zid]
|
||||
if do_test_remote
|
||||
send_data(zid, CONTROL_MSG, REQUEST_ACK)
|
||||
sleep(0.5)
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
(r_s, w_s, do_test_remote) = manager.map_zmq_julia[zid]
|
||||
|
||||
while true
|
||||
data = readavailable(w_s)
|
||||
send_data(zid, PAYLOAD_MSG, data)
|
||||
end
|
||||
end
|
||||
(read_stream, write_stream)
|
||||
catch e
|
||||
Base.show_backtrace(STDOUT,catch_backtrace())
|
||||
println(e)
|
||||
rethrow(e)
|
||||
end
|
||||
end
|
||||
|
||||
# BROKER
|
||||
function start_broker()
|
||||
ctx=Context(1)
|
||||
xpub=Socket(ctx, XPUB)
|
||||
xsub=Socket(ctx, XSUB)
|
||||
|
||||
ZMQ.bind(xsub, "tcp://127.0.0.1:$(BROKER_SUB_PORT)")
|
||||
ZMQ.bind(xpub, "tcp://127.0.0.1:$(BROKER_PUB_PORT)")
|
||||
|
||||
ccall((:zmq_proxy, :libzmq), Cint, (Ptr{Void}, Ptr{Void}, Ptr{Void}), xsub.data, xpub.data, C_NULL)
|
||||
# proxy(xsub, xpub)
|
||||
|
||||
# control never comes here
|
||||
ZMQ.close(xpub)
|
||||
ZMQ.close(xsub)
|
||||
ZMQ.close(ctx)
|
||||
end
|
||||
|
||||
function recv_data()
|
||||
try
|
||||
#println("On $(manager.zid_self) waiting to recv message")
|
||||
zid = parse(Int,String(ZMQ.recv(manager.sub)))
|
||||
assert(zid == manager.zid_self)
|
||||
|
||||
from_zid = parse(Int,String(ZMQ.recv(manager.sub)))
|
||||
mtype = String(ZMQ.recv(manager.sub))
|
||||
|
||||
#println("$zid received message of type $mtype from $from_zid")
|
||||
|
||||
data = ZMQ.recv(manager.sub)
|
||||
if mtype == CONTROL_MSG
|
||||
cmsg = String(data)
|
||||
if cmsg == REQUEST_ACK
|
||||
#println("$from_zid REQUESTED_ACK from $zid")
|
||||
# send back a control_msg
|
||||
send_data(from_zid, CONTROL_MSG, ACK_MSG)
|
||||
elseif cmsg == ACK_MSG
|
||||
#println("$zid got ACK_MSG from $from_zid")
|
||||
(r_s, w_s, test_remote) = manager.map_zmq_julia[from_zid]
|
||||
manager.map_zmq_julia[from_zid] = (r_s, w_s, false)
|
||||
elseif cmsg == KILL_MSG
|
||||
exit(0)
|
||||
else
|
||||
error("Unknown control message : ", cmsg)
|
||||
end
|
||||
data = ""
|
||||
end
|
||||
|
||||
(from_zid, data)
|
||||
catch e
|
||||
Base.show_backtrace(STDOUT,catch_backtrace())
|
||||
println(e)
|
||||
rethrow(e)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# MASTER
|
||||
function start_master(np)
|
||||
init_node()
|
||||
@schedule begin
|
||||
try
|
||||
while true
|
||||
(from_zid, data) = recv_data()
|
||||
|
||||
#println("master recv data from $from_zid")
|
||||
|
||||
(r_s, w_s, t_r) = manager.map_zmq_julia[from_zid]
|
||||
unsafe_write(r_s, pointer(data), length(data))
|
||||
end
|
||||
catch e
|
||||
Base.show_backtrace(STDOUT,catch_backtrace())
|
||||
println(e)
|
||||
rethrow(e)
|
||||
end
|
||||
end
|
||||
|
||||
addprocs(manager; np=np)
|
||||
end
|
||||
|
||||
|
||||
function launch(manager::ZMQCMan, params::Dict, launched::Array, c::Condition)
|
||||
#println("launch $(params[:np])")
|
||||
for i in 1:params[:np]
|
||||
io, pobj = open(`$(params[:exename]) worker.jl $i $(Base.cluster_cookie())`, "r")
|
||||
|
||||
wconfig = WorkerConfig()
|
||||
wconfig.userdata = Dict(:zid=>i, :io=>io)
|
||||
push!(launched, wconfig)
|
||||
notify(c)
|
||||
end
|
||||
end
|
||||
|
||||
function connect(manager::ZMQCMan, pid::Int, config::WorkerConfig)
|
||||
#println("connect_m2w")
|
||||
if myid() == 1
|
||||
zid = get(config.userdata)[:zid]
|
||||
config.connect_at = zid # This will be useful in the worker-to-worker connection setup.
|
||||
|
||||
print_worker_stdout(get(config.userdata)[:io], pid)
|
||||
else
|
||||
#println("connect_w2w")
|
||||
zid = get(config.connect_at)
|
||||
config.userdata = Dict{Symbol, Any}(:zid=>zid)
|
||||
end
|
||||
|
||||
streams = setup_connection(zid, SELF_INITIATED)
|
||||
|
||||
udata = get(config.userdata)
|
||||
udata[:streams] = streams
|
||||
|
||||
streams
|
||||
end
|
||||
|
||||
# WORKER
|
||||
function start_worker(zid, cookie)
|
||||
#println("start_worker")
|
||||
Base.init_worker(cookie, ZMQCMan())
|
||||
init_node(zid)
|
||||
|
||||
while true
|
||||
(from_zid, data) = recv_data()
|
||||
|
||||
#println("worker recv data from $from_zid")
|
||||
|
||||
streams = get(manager.map_zmq_julia, from_zid, nothing)
|
||||
if streams === nothing
|
||||
# First time..
|
||||
(r_s, w_s) = setup_connection(from_zid, REMOTE_INITIATED)
|
||||
Base.process_messages(r_s, w_s)
|
||||
else
|
||||
(r_s, w_s, t_r) = streams
|
||||
end
|
||||
|
||||
unsafe_write(r_s, pointer(data), length(data))
|
||||
end
|
||||
end
|
||||
|
||||
function manage(manager::ZMQCMan, id::Int, config::WorkerConfig, op)
|
||||
nothing
|
||||
end
|
||||
|
||||
function kill(manager::ZMQCMan, pid::Int, config::WorkerConfig)
|
||||
send_data(get(config.userdata)[:zid], CONTROL_MSG, KILL_MSG)
|
||||
(r_s, w_s) = get(config.userdata)[:streams]
|
||||
close(r_s)
|
||||
close(w_s)
|
||||
|
||||
# remove from our map
|
||||
delete!(manager.map_zmq_julia, get(config.userdata)[:zid])
|
||||
|
||||
nothing
|
||||
end
|
||||
|
||||
|
||||
function print_worker_stdout(io, pid)
|
||||
@schedule while !eof(io)
|
||||
line = readline(io)
|
||||
println("\tFrom worker $(pid):\t$line")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
include("ZMQCM.jl")
|
||||
start_broker()
|
||||
@@ -0,0 +1,11 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
include("ZMQCM.jl")
|
||||
|
||||
# @spawn run(`julia broker.jl`)
|
||||
|
||||
start_master(parse(Int,ARGS[1]))
|
||||
|
||||
resp = pmap(x -> myid() *2, [1:nworkers()])
|
||||
|
||||
println(resp)
|
||||
@@ -0,0 +1,5 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
include("ZMQCM.jl")
|
||||
|
||||
start_worker(parse(Int,ARGS[1]), ARGS[2])
|
||||
Reference in New Issue
Block a user