Executando verificação de segurança...
4

Merge sort versão multiprocessing

Olá!

Hoje vim compartilhar com vocês uma versão interessante do mergesort.

Leiam com cuidado cada comentário, acho que dispensa explicações (isso se você conhecer o merge sort)

from multiprocessing import Process, Queue
import os

from lists import group_by_two

verbose = True
# f is a function that takes a list as an argument
# and sorts it
def f(list, queue):
    if verbose:
        print('child process', os.getpid())
        print('sorting list on child process pid', os.getpid())
    list.sort()
    queue.put(list)
    if verbose:
        print('sorted list on child process pid', os.getpid(), list)

# g is a function that takes two lists as arguments
# and merges them
def g(a, b, queue):
    if verbose:
        print('child process', os.getpid())
        print('merging lists on child process pid', os.getpid())
    c = []
    while len(a) > 0 and len(b) > 0:
        if a[0] < b[0]:
            c.append(a[0])
            a.pop(0)
        else:
            c.append(b[0])
            b.pop(0)
    if len(a) > 0:
        c.extend(a)
    else:
        c.extend(b)
    queue.put(c)
    if verbose:
        print('merged lists on child process pid', os.getpid(), c)
        print('\n')

def multiprocess_sort(lists):
    # for each list in lists, create a process to sort it
    # and add the process to the processes list
    processes = []
    queue = Queue()
    for list in lists:
        p = Process(target=f, args=(list, queue))
        processes.append(p)
        p.start()

    # wait for all processes to finish
    for p in processes:
        p.join()

    # get the sorted lists from the queue
    sorted_lists = []
    while not queue.empty():
        sorted_lists.append(queue.get())

    if verbose:
        print('parent process', os.getpid())
        print('all child processes finished')
        print('lists after sorting', sorted_lists)
        print('\n')

    round_number = 1

    # while there is more than one list in sorted_lists
    while len(sorted_lists) > 1:
        if verbose:
            print('now, we will launch a new process to merge lists two by two')
        zip_lists = group_by_two(sorted_lists)

        # for each pair of lists in zip_lists, create a process to merge them
        # and add the process to the processes list
        processes = []
        queue = Queue()

        for a, b in zip_lists:
            p = Process(target=g, args=(a, b, queue))
            processes.append(p)
            p.start()
    
        # wait for all processes to finish
        for p in processes:
            p.join()

        # get the merged lists from the queue
        merged_lists = []
        while not queue.empty():
            merged_lists.append(queue.get())

        # if there is an odd number of lists in sorted_lists,
        # add the last list to merged_lists
        if len(sorted_lists) % 2 == 1:
            merged_lists.append(sorted_lists[-1])

        if verbose:
            print('parent process', os.getpid())
            print('all child processes finished')
            print('lists after merging', merged_lists)
            print('\n')
    
        sorted_lists = merged_lists
        round_number += 1

    return sorted_lists[0]

Como usar?

from files import read_file
from lists import split_list
from multiprocess_sort import multiprocess_sort

def main():
    n_processes = 16
    n_values = 20
    lists = split_list(read_file('input.txt', n_values), n_processes)
    sorted_list = multiprocess_sort(lists)
    print(sorted_list)

if __name__ == '__main__':
    main()

Basicamente define o número de processos + número de linhas (poderia ser programáticamente, mas keep it simple nesse momento)

Carregando publicação patrocinada...