278 lines
7.0 KiB
Julia
278 lines
7.0 KiB
Julia
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
|
|
|
using ZMQ
|
|
|
|
import Base: launch, manage, connect, kill
|
|
|
|
const BROKER_SUB_PORT = 8100
|
|
const BROKER_PUB_PORT = 8101
|
|
|
|
const SELF_INITIATED = 0
|
|
const REMOTE_INITIATED = 1
|
|
|
|
const PAYLOAD_MSG = "J"
|
|
const CONTROL_MSG = "Z"
|
|
|
|
const REQUEST_ACK = "R"
|
|
const ACK_MSG = "A"
|
|
const KILL_MSG = "K"
|
|
|
|
mutable struct ZMQCMan <: ClusterManager
|
|
map_zmq_julia::Dict{Int, Tuple}
|
|
c::Condition
|
|
isfree::Bool
|
|
ctx
|
|
pub
|
|
sub
|
|
zid_self
|
|
ZMQCMan() = new(Dict{Int, Tuple}(), Condition(), true)
|
|
end
|
|
|
|
const manager = ZMQCMan()
|
|
|
|
function lock_for_send()
|
|
if manager.isfree == true
|
|
manager.isfree = false
|
|
else
|
|
while manager.isfree == false
|
|
wait(manager.c)
|
|
if manager.isfree == true
|
|
manager.isfree = false
|
|
return
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
function release_lock_for_send()
|
|
manager.isfree = true
|
|
notify(manager.c, all=true)
|
|
end
|
|
|
|
function init_node(zid=0)
|
|
manager.ctx = Context(1)
|
|
pub=Socket(manager.ctx, PUB) # Outbound
|
|
connect(pub, "tcp://127.0.0.1:$BROKER_SUB_PORT")
|
|
|
|
sub=Socket(manager.ctx, SUB) # In bound
|
|
connect(sub, "tcp://127.0.0.1:$BROKER_PUB_PORT")
|
|
ZMQ.set_subscribe(sub, string(zid))
|
|
|
|
manager.pub = pub
|
|
manager.sub = sub
|
|
manager.zid_self = zid
|
|
|
|
(pub, sub)
|
|
end
|
|
|
|
function send_data(zid, mtype, data)
|
|
lock_for_send()
|
|
ZMQ.send(manager.pub, Message(string(zid)), SNDMORE)
|
|
ZMQ.send(manager.pub, Message(string(manager.zid_self)), SNDMORE)
|
|
#println("Sending message of type $mtype to $zid")
|
|
ZMQ.send(manager.pub, Message(mtype), SNDMORE)
|
|
ZMQ.send(manager.pub, Message(data))
|
|
release_lock_for_send()
|
|
end
|
|
|
|
function setup_connection(zid, initiated_by)
|
|
try
|
|
read_stream=BufferStream()
|
|
write_stream=BufferStream()
|
|
|
|
if initiated_by == REMOTE_INITIATED
|
|
test_remote = false
|
|
else
|
|
test_remote = true
|
|
end
|
|
|
|
manager.map_zmq_julia[zid] = (read_stream, write_stream, test_remote)
|
|
|
|
@schedule begin
|
|
while true
|
|
(r_s, w_s, do_test_remote) = manager.map_zmq_julia[zid]
|
|
if do_test_remote
|
|
send_data(zid, CONTROL_MSG, REQUEST_ACK)
|
|
sleep(0.5)
|
|
else
|
|
break
|
|
end
|
|
end
|
|
(r_s, w_s, do_test_remote) = manager.map_zmq_julia[zid]
|
|
|
|
while true
|
|
data = readavailable(w_s)
|
|
send_data(zid, PAYLOAD_MSG, data)
|
|
end
|
|
end
|
|
(read_stream, write_stream)
|
|
catch e
|
|
Base.show_backtrace(STDOUT,catch_backtrace())
|
|
println(e)
|
|
rethrow(e)
|
|
end
|
|
end
|
|
|
|
# BROKER
|
|
function start_broker()
|
|
ctx=Context(1)
|
|
xpub=Socket(ctx, XPUB)
|
|
xsub=Socket(ctx, XSUB)
|
|
|
|
ZMQ.bind(xsub, "tcp://127.0.0.1:$(BROKER_SUB_PORT)")
|
|
ZMQ.bind(xpub, "tcp://127.0.0.1:$(BROKER_PUB_PORT)")
|
|
|
|
ccall((:zmq_proxy, :libzmq), Cint, (Ptr{Void}, Ptr{Void}, Ptr{Void}), xsub.data, xpub.data, C_NULL)
|
|
# proxy(xsub, xpub)
|
|
|
|
# control never comes here
|
|
ZMQ.close(xpub)
|
|
ZMQ.close(xsub)
|
|
ZMQ.close(ctx)
|
|
end
|
|
|
|
function recv_data()
|
|
try
|
|
#println("On $(manager.zid_self) waiting to recv message")
|
|
zid = parse(Int,String(ZMQ.recv(manager.sub)))
|
|
assert(zid == manager.zid_self)
|
|
|
|
from_zid = parse(Int,String(ZMQ.recv(manager.sub)))
|
|
mtype = String(ZMQ.recv(manager.sub))
|
|
|
|
#println("$zid received message of type $mtype from $from_zid")
|
|
|
|
data = ZMQ.recv(manager.sub)
|
|
if mtype == CONTROL_MSG
|
|
cmsg = String(data)
|
|
if cmsg == REQUEST_ACK
|
|
#println("$from_zid REQUESTED_ACK from $zid")
|
|
# send back a control_msg
|
|
send_data(from_zid, CONTROL_MSG, ACK_MSG)
|
|
elseif cmsg == ACK_MSG
|
|
#println("$zid got ACK_MSG from $from_zid")
|
|
(r_s, w_s, test_remote) = manager.map_zmq_julia[from_zid]
|
|
manager.map_zmq_julia[from_zid] = (r_s, w_s, false)
|
|
elseif cmsg == KILL_MSG
|
|
exit(0)
|
|
else
|
|
error("Unknown control message : ", cmsg)
|
|
end
|
|
data = ""
|
|
end
|
|
|
|
(from_zid, data)
|
|
catch e
|
|
Base.show_backtrace(STDOUT,catch_backtrace())
|
|
println(e)
|
|
rethrow(e)
|
|
end
|
|
|
|
end
|
|
|
|
# MASTER
|
|
function start_master(np)
|
|
init_node()
|
|
@schedule begin
|
|
try
|
|
while true
|
|
(from_zid, data) = recv_data()
|
|
|
|
#println("master recv data from $from_zid")
|
|
|
|
(r_s, w_s, t_r) = manager.map_zmq_julia[from_zid]
|
|
unsafe_write(r_s, pointer(data), length(data))
|
|
end
|
|
catch e
|
|
Base.show_backtrace(STDOUT,catch_backtrace())
|
|
println(e)
|
|
rethrow(e)
|
|
end
|
|
end
|
|
|
|
addprocs(manager; np=np)
|
|
end
|
|
|
|
|
|
function launch(manager::ZMQCMan, params::Dict, launched::Array, c::Condition)
|
|
#println("launch $(params[:np])")
|
|
for i in 1:params[:np]
|
|
io, pobj = open(`$(params[:exename]) worker.jl $i $(Base.cluster_cookie())`, "r")
|
|
|
|
wconfig = WorkerConfig()
|
|
wconfig.userdata = Dict(:zid=>i, :io=>io)
|
|
push!(launched, wconfig)
|
|
notify(c)
|
|
end
|
|
end
|
|
|
|
function connect(manager::ZMQCMan, pid::Int, config::WorkerConfig)
|
|
#println("connect_m2w")
|
|
if myid() == 1
|
|
zid = get(config.userdata)[:zid]
|
|
config.connect_at = zid # This will be useful in the worker-to-worker connection setup.
|
|
|
|
print_worker_stdout(get(config.userdata)[:io], pid)
|
|
else
|
|
#println("connect_w2w")
|
|
zid = get(config.connect_at)
|
|
config.userdata = Dict{Symbol, Any}(:zid=>zid)
|
|
end
|
|
|
|
streams = setup_connection(zid, SELF_INITIATED)
|
|
|
|
udata = get(config.userdata)
|
|
udata[:streams] = streams
|
|
|
|
streams
|
|
end
|
|
|
|
# WORKER
|
|
function start_worker(zid, cookie)
|
|
#println("start_worker")
|
|
Base.init_worker(cookie, ZMQCMan())
|
|
init_node(zid)
|
|
|
|
while true
|
|
(from_zid, data) = recv_data()
|
|
|
|
#println("worker recv data from $from_zid")
|
|
|
|
streams = get(manager.map_zmq_julia, from_zid, nothing)
|
|
if streams === nothing
|
|
# First time..
|
|
(r_s, w_s) = setup_connection(from_zid, REMOTE_INITIATED)
|
|
Base.process_messages(r_s, w_s)
|
|
else
|
|
(r_s, w_s, t_r) = streams
|
|
end
|
|
|
|
unsafe_write(r_s, pointer(data), length(data))
|
|
end
|
|
end
|
|
|
|
function manage(manager::ZMQCMan, id::Int, config::WorkerConfig, op)
|
|
nothing
|
|
end
|
|
|
|
function kill(manager::ZMQCMan, pid::Int, config::WorkerConfig)
|
|
send_data(get(config.userdata)[:zid], CONTROL_MSG, KILL_MSG)
|
|
(r_s, w_s) = get(config.userdata)[:streams]
|
|
close(r_s)
|
|
close(w_s)
|
|
|
|
# remove from our map
|
|
delete!(manager.map_zmq_julia, get(config.userdata)[:zid])
|
|
|
|
nothing
|
|
end
|
|
|
|
|
|
function print_worker_stdout(io, pid)
|
|
@schedule while !eof(io)
|
|
line = readline(io)
|
|
println("\tFrom worker $(pid):\t$line")
|
|
end
|
|
end
|
|
|