1.64.  failednode( integer, integer )

Propriétés de la fonction
Langage: PLPGSQL
Type du code retour: integer

Initie la bascule de failed_node à backup_node. Cette fonction doit être appelée sur tous les nœuds, puis doit attendre le redémarrage de tous les démons de nœud.

declare
        p_failed_node           alias for $1;
        p_backup_node           alias for $2;
        v_row                           record;
        v_row2                          record;
        v_n                                     int4;
begin
        -- ----
        -- Grab the central configuration lock
        -- ----
        lock table sl_config_lock;

        -- ----
        -- All consistency checks first
        -- Check that every node that has a path to the failed node
        -- also has a path to the backup node.
        -- ----
        for v_row in select P.pa_client
                        from sl_path P
                        where P.pa_server = p_failed_node
                                and P.pa_client <> p_backup_node
                                and not exists (select true from sl_path PP
                                                        where PP.pa_server = p_backup_node
                                                                and PP.pa_client = P.pa_client)
        loop
                raise exception 'Slony-I: cannot failover - node % has no path to the backup node',
                                v_row.pa_client;
        end loop;

        -- ----
        -- Check all sets originating on the failed node
        -- ----
        for v_row in select set_id
                        from sl_set
                        where set_origin = p_failed_node
        loop
                -- ----
                -- Check that the backup node is subscribed to all sets
                -- that originate on the failed node
                -- ----
                select into v_row2 sub_forward, sub_active
                                from sl_subscribe
                                where sub_set = v_row.set_id
                                        and sub_receiver = p_backup_node;
                if not found then
                        raise exception 'Slony-I: cannot failover - node % is not subscribed to set %',
                                        p_backup_node, v_row.set_id;
                end if;

                -- ----
                -- Check that the subscription is active
                -- ----
                if not v_row2.sub_active then
                        raise exception 'Slony-I: cannot failover - subscription for set % is not active',
                                        v_row.set_id;
                end if;

                -- ----
                -- If there are other subscribers, the backup node needs to
                -- be a forwarder too.
                -- ----
                select into v_n count(*)
                                from sl_subscribe
                                where sub_set = v_row.set_id
                                        and sub_receiver <> p_backup_node;
                if v_n > 0 and not v_row2.sub_forward then
                        raise exception 'Slony-I: cannot failover - node % is not a forwarder of set %',
                                        p_backup_node, v_row.set_id;
                end if;
        end loop;

        -- ----
        -- Terminate all connections of the failed node the hard way
        -- ----
        perform terminateNodeConnections(p_failed_node);

        -- ----
        -- Move the sets
        -- ----
        for v_row in select S.set_id, (select count(*)
                                        from sl_subscribe SUB
                                        where S.set_id = SUB.sub_set
                                                and SUB.sub_receiver <> p_backup_node
                                                and SUB.sub_provider = p_failed_node)
                                        as num_direct_receivers 
                        from sl_set S
                        where S.set_origin = p_failed_node
                        for update
        loop
                -- ----
                -- If the backup node is the only direct subscriber ...
                -- ----
                if v_row.num_direct_receivers = 0 then
                        raise notice 'failedNode: set % has no other direct receivers - move now', v_row.set_id;
                        -- ----
                        -- backup_node is the only direct subscriber, move the set
                        -- right now. On the backup node itself that includes restoring
                        -- all user mode triggers, removing the protection trigger,
                        -- adding the log trigger, removing the subscription and the
                        -- obsolete setsync status.
                        -- ----
                        if p_backup_node = getLocalNodeId('_schemadoc') then
                                for v_row2 in select * from sl_table
                                                where tab_set = v_row.set_id
                                loop
                                        perform alterTableRestore(v_row2.tab_id);
                                end loop;

                                update sl_set set set_origin = p_backup_node
                                                where set_id = v_row.set_id;

                                delete from sl_setsync
                                                where ssy_setid = v_row.set_id;

                                for v_row2 in select * from sl_table
                                                where tab_set = v_row.set_id
                                loop
                                        perform alterTableForReplication(v_row2.tab_id);
                                end loop;
                        end if;

                        delete from sl_subscribe
                                        where sub_set = v_row.set_id
                                                and sub_receiver = p_backup_node;
                else
                        raise notice 'failedNode: set % has other direct receivers - change providers only', v_row.set_id;
                        -- ----
                        -- Backup node is not the only direct subscriber. This
                        -- means that at this moment, we redirect all direct
                        -- subscribers to receive from the backup node, and the
                        -- backup node itself to receive from another one.
                        -- The admin utility will wait for the slon engine to
                        -- restart and then call failedNode2() on the node with
                        -- the highest SYNC and redirect this to it on
                        -- backup node later.
                        -- ----
                        update sl_subscribe
                                        set sub_provider = (select min(SS.sub_receiver)
                                                        from sl_subscribe SS
                                                        where SS.sub_set = v_row.set_id
                                                                and SS.sub_provider = p_failed_node
                                                                and SS.sub_receiver <> p_backup_node
                                                                and SS.sub_forward)
                                        where sub_set = v_row.set_id
                                                and sub_receiver = p_backup_node;
                        update sl_subscribe
                                        set sub_provider = p_backup_node
                                        where sub_set = v_row.set_id
                                                and sub_provider = p_failed_node
                                                and sub_receiver <> p_backup_node;
                end if;
        end loop;

        -- Rewrite sl_listen table
        perform RebuildListenEntries();

        -- Run addPartialLogIndices() to try to add indices to unused sl_log_? table
        perform addPartialLogIndices();

        -- ----
        -- Make sure the node daemon will restart
        -- ----
        notify "_schemadoc_Restart";

        -- ----
        -- That is it - so far.
        -- ----
        return p_failed_node;
end;