Source code for cartons_inventory.cartons

import csv
import inspect
import os

import numpy as np
import pandas as pd
from astropy.io import ascii
from sdssdb.peewee.sdss5db.targetdb import (Cadence, Carton, CartonToTarget,
                                            Category, Instrument, Magnitude,
                                            Mapper, Version)

import cartons_inventory
from cartons_inventory import log, main


Car = Carton.alias()
CarTar = CartonToTarget.alias()
Cad = Cadence.alias()
Inst = Instrument.alias()
Categ = Category.alias()
Map = Mapper.alias()
Mag = Magnitude.alias()


[docs]class CartonInfo(object):
    """Saves targetdb info for cartons.

    This class takes basic information from a carton (``name``, ``plan``, and ``category_label``
    at minimum) and at instantiation sets the carton dependent (as opposed to target dependent)
    information of the carton. ``stage`` and ``active`` parameters can also be provided but
    currently nothing is done with those. Carton dependent information is either taken from
    input parameters of __init__ or by the assign_carton_info function that also set the
    boolean in_targetdb to check the existence of the carton.

    Then, function assign_target_info assigns target dependent information which can be
    the magnitude placholders used for the different photometric system in the carton
    (calculate_mag_placeholders=True), and/or python sets with the unique values found per
    cadence, lambda, and instrument in the carton, along with ``priority`` and ``value``
    ranges.

    Finally, function process_cartons wraps all the functions of this class. Based on the value
    of the ``origin`` parameter, takes as input a file from rsconfig or curstom, or takes a
    selection criteria to search cartons in targetdb. With this function we can evaluate the
    existence of a list of cartons, check their content, save a selection criteria as an input
    file ready to be used by process_cartons, runs assign_target_info to get target parameter
    set, ranges, and/or magnitude_placeholders, saves an output .csv file with the information
    of each carton, or return a list of all the CartonInfo objects.

    Parameters
    ----------

    carton: str
        Carton name in table targetdb.carton
    plan: str
        Plan in table targetdb.version
    category_label: str
        Label in targetdb.category table (e.g. science, standard_boss, guide)
    stage: str
        Robostrategy stage, could be srd, open, none, filler. Default is 'N/A'
    active: str
        ``y`` or ``n`` to check if it is active in robostrategy. Default is 'N/A'
    mapper_label: str
        Label in targetdb.mapper (MWM or BHM)
    program: str
        Program in targetdb.program table
    version_pk: int
        ID in targetdb.verion table
    tag: str
        tag in targetdb.version table
    mapper_pk: int
        Mapper_pk in targetdb.carton table. 0 for MWM and 1 for BHM
    category_pk: int
        category_pk in targetdb.carton table (e.g. 0 for science)
    in_targetdb: bool
        True is carton/plan/category_label combination is found in targetdb, false if not.
    sets_calculated: bool
        True when in_targetdb is True and target dependent parameters value_min, value_max,
        priority_min, priority_max, cadence_pk, cadence_label, lambda_eff, instrument_pk,
        and instrument_label have been calculated for the carton using
        assign_target_info(calculate_sets=True)
    mag_placeholders_calculated: bool
        True when magnitude placholdes used for SDSS, TMASS, and GAIA photometric systems
        have been calculated. These are calculated using check_magnitude_outliers function

    """
    cfg = cartons_inventory.config

    def __init__(self, carton, plan, category_label, stage='N/A', active='N/A'):
        self.carton = carton
        self.plan = plan
        self.category_label = category_label
        self.stage = stage
        self.active = active

        self.mapper_label, self.program, self.version_pk = [], [], []
        self.tag, self.mapper_pk, self.category_pk = [], [], []
        self.in_targetdb = False
        self.sets_calculated = False
        self.mag_placeholders_calculated = False

        self.assign_carton_info()

[docs]    def assign_carton_info(self):
        """Assigns carton dependent information for cartons in targetdb.

        If the carton/plan/category_label combination in the CartonInfo object
        is found in targetdb this function assigns attributes for carton dependent
        parameters (parameters shared for all targets in the carton). These paraemters
        are mapper_label, program, version_pk, tag, mapper_pk, and category_pk.
        Finally it set in_targetdb attribute as True when found in the database.

        """

        cfg = cartons_inventory.config

        basic_info = (
            Car
            .select(Map.label.alias('mapper_label'), Car.version_pk.alias('version_pk'),
                    Car.category_pk.alias('category_pk'), Car.mapper_pk.alias('mapper_pk'),
                    Version.tag, Car.program)
            .join(Version, on=(Version.pk == Car.version_pk))
            .join(Categ, 'LEFT JOIN', Car.category_pk == Categ.pk)
            .join(Map, 'LEFT JOIN', Car.mapper_pk == Map.pk)
            .where(Car.carton == self.carton)
            .where(Version.plan == self.plan)
            .where(Categ.label == self.category_label).dicts()
        )

        if len(basic_info) > 0:  # If the carton is in targetdb assigns carton info
            res = basic_info[0]
            carton_parameter_names = cfg['db_fields']['carton_dependent']
            for parameter in carton_parameter_names:
                setattr(self, parameter, res[parameter])
            self.in_targetdb = True

        if self.in_targetdb is False:  # If not in targetdb still tries to get the Version info
            query_version = (
                Version
                .select(Version.tag, Version.pk)
                .where(Version.plan == self.plan).dicts()
            )
            if len(query_version) > 0:
                ver_info = query_version[0]
                self.tag = ver_info['tag']
                self.version_pk = ver_info['pk']

[docs]    def build_query_target(self):
        """Creates the query with the target dependet information of the carton."""

        query_target = (
            Car
            .select(Inst.label.alias('instrument_label'), CarTar.cadence_pk.alias('cadence_pk'),
                    CarTar.lambda_eff, CarTar.instrument_pk.alias('instrument_pk'),
                    CarTar.priority, CarTar.value, Cad.label.alias('cadence_label'), Mag.g, Mag.r,
                    Mag.i, Mag.z, Mag.h, Mag.j, Mag.k, Mag.bp, Mag.rp, Mag.gaia_g)
            .join(Version, on=(Version.pk == Car.version_pk))
            .join(CarTar, on=(CarTar.carton_pk == Car.pk))
            .join(Cad, 'LEFT JOIN', on=(Cad.pk == CarTar.cadence_pk))
            .join(Inst, 'LEFT JOIN', CarTar.instrument_pk == Inst.pk)
            .join(Mag, 'LEFT JOIN', CarTar.pk == Mag.carton_to_target_pk)
            .where(Car.carton == self.carton)
            .where((Version.plan == self.plan) & (Version.tag == self.tag))
        )

        return query_target

[docs]    def return_target_dataframe(self):
        """Executes query from build_query_target and returns it in a Pandas DataFrame."""

        if not self.in_targetdb:
            print(self.carton, 'not in targetdb so we cant return the target dataframe')
            return
        target_query = self.build_query_target()
        df = pd.DataFrame(list(target_query.dicts()))
        return df

[docs]    def assign_target_info(self, calculate_sets=True, calculate_mag_placeholders=False):
        """Assignt target dependent information for cartons in targetdb.

        This function calls return_target_dataframe to get a Pandas DataFrame
        with target dependent information for a carton. Then it sets different attributes
        to the CartonInfo object depending on the values of calculate_sets and
        calculate_mag_placeholders

        Parameters
        ----------

        calculate_sets : bool
            If true this function assigns the attributes value_min, value_max,
            priority_min, priority_max, cadence_pk, cadence_label, lambda_eff, instrument_pk,
            and instrument_label, based on information from targetdb. It also sets the attribute
            sets_calculated as True to keep record.
        calculate_mag_placeholders : bool
            If true this function assigns the attribute magnitude_placeholders using
            check_mag_outliers function, and sets mag_placeholers_calculated=True to keep record.
            magnitude_placeholres is a set with all the combination of photometric system
            (SDSS, TMASS, GAIA) and mag placeholder used for that photometric system in that
            carton (None, Invalid, 0.0, -9999.0, 999, 99.9).


        """
        dataframe_created = False
        if not self.in_targetdb:
            print('carton', self.carton, 'version_pk', self.version_pk,
                  'category_label', self.category_label, 'not found in database',
                  'so we cant assign target info')
            return

        if calculate_sets:
            if self.sets_calculated:
                print('Sets already calculated for this carton')
            else:
                dataframe = self.return_target_dataframe()
                dataframe_created = True
                target_parameters = self.cfg['db_fields']
                set_names = target_parameters['sets']
                set_range_names = target_parameters['set_ranges']

                for set_name in set_names:
                    setattr(self, set_name, main.set_or_none(dataframe[set_name]))
                for set_name in set_range_names:
                    set_range = main.get_range(getattr(self, set_name))
                    setattr(self, set_name + '_min', set_range[0])
                    setattr(self, set_name + '_max', set_range[1])
                self.sets_calculated = True

        if calculate_mag_placeholders:
            if self.mag_placeholders_calculated:
                print('Magnitude placeholders already caclulated for this carton')
            else:
                if not dataframe_created:
                    dataframe = self.return_target_dataframe()
                    dataframe_created = True
                bands = self.cfg['bands']
                mags_names = [el for key in bands.keys() for el in bands[key]]
                systems_names = [key for key in bands.keys() for el in bands[key]]
                self.magnitude_placeholders = check_mag_outliers(dataframe, mags_names,
                                                                 systems_names)
                self.mag_placeholders_calculated = True

[docs]    def check_existence(self, log, verbose=True):
        """Checks if the carton/plan/category_label from object is found in targetdb.

        This function checks whether a carton exists or not in targetdb, to be used
        when a list of cartons is used in process_cartons (i.e. ``origin`` rsconfig or custom)
        or to check the existence of a single carton.

        Parameters
        ----------

        log : SDSSLogger
            Log used to store information of cartons_inventory
        verbose : bool
            If true and if the carton is not found in the database the function will print
            and save on log information to try to correct the input file from which the
            carton/plan/category_label was taken (and stored in the object). If no carton
            with that name is found in targetdb it will print the associated warning, and if
            cartons with the same name but different plan or category_label are found a line
            with input file format will be printed for each of those cartons so the user
            can replace the line in the input file with one of the options proposed.

        Returns
        -------

        cartons_aleternatives : Pandas DataFrame
            A Pandas DataFrame that for each carton/plan/category_label combination not found
            in targetdb has an entry for it and for all the alternative cartons found in targetdb
            that have the same carton name but different plan or category. For each entry the
            dataframe contains the columns carton, plan, category_label, stage, active, tag,
            version_pk, and in_targetdb.

        """
        df_data = {}
        msg = ''
        if self.in_targetdb is False:
            colnames = ['carton', 'plan', 'category_label', 'stage',
                        'active', 'tag', 'version_pk', 'in_targetdb']
            for index in range(len(colnames)):
                colname = colnames[index]
                locals()[colname] = []
                locals()[colname].append(getattr(self, colname))

            alternatives_info = (
                Car
                .select(Car.carton, Version.plan, Car.version_pk.alias('version_pk'),
                        Categ.label.alias('category_label'), Version.tag, Car.program)
                .join(Version, on=(Version.pk == Car.version_pk))
                .join(Categ, 'LEFT JOIN', Car.category_pk == Categ.pk)
                .where(Car.carton == self.carton).dicts()
            )
            if len(alternatives_info) == 0:
                msg = 'Wargning: Carton' + self.carton + ' not in targetdb'\
                    'not in targetdb and there is no carton with that name'
            else:
                msg = 'Carton ' + self.carton + ' not in targetdb, to avoid this you can replace '\
                    'the next\nline with the information that follows '\
                    'replacing (stage) and (active) if it corresponds\n'
                msg += '|' + self.carton.rjust(41) + ' | ' + self.plan.rjust(6) + ' | '\
                       + self.category_label.rjust(20) + ' |'\
                       + self.stage.rjust(6) + ' | ' + self.active.rjust(6) + ' | '\
                       + '--> Replace this line\n'
                for ind in range(len(alternatives_info)):
                    res = alternatives_info[ind]
                    res['stage'], res['active'] = 'N/A', 'N/A'
                    for colname in colnames[:-1]:
                        locals()[colname].append(res[colname])
                    locals()['in_targetdb'].append(True)
                    msg += '|' + res['carton'].rjust(41) + ' | ' + res['plan'].rjust(6) + ' | '\
                        + res['category_label'].rjust(20) + ' |   N/A |    N/A |\n'
            for index in range(len(colnames)):
                df_data[colnames[index]] = locals()[colnames[index]]
        if verbose is True and msg != '':
            log.debug(msg)
            print(msg)
        df = pd.DataFrame(data=df_data)
        return df

[docs]    def visualize_content(self, log, width=140):
        """Logs and prints information from targetdb for a given carton."""

        pars = cartons_inventory.config['db_fields']
        log.info(' ')
        log.info('#' * width)
        print_centered_msg('CARTON DEPENDENT INFORMATION', width, log)
        print_centered_msg(' ', width, log)
        for par in ['carton'] + pars['input_dependent'] + ['in_targetdb']:
            self.print_param(par, width, log)
        for par in pars['carton_dependent']:
            self.print_param(par, width, log)
        log.info('#' * width)

        if not self.in_targetdb:
            print_centered_msg('Since the carton is not in targetdb', width, log)
            print_centered_msg('this is all the information we can get', width, log)
            log.info('#' * width)
            return

        if not(self.sets_calculated):
            print_centered_msg('The list of values par target parameter has', width, log)
            print_centered_msg('not been calculated for this carton, to do so', width, log)
            print_centered_msg('first run assign_target_info on this carton', width, log)
            print_centered_msg('using calculate_sets=True (default)', width, log)
            log.info('#' * width)

        else:
            print_centered_msg('VALUES PER TARGET DEPENDENT PARAMETER', width, log)
            print_centered_msg(' ', width, log)
            for par in [el for el in pars['sets'] if el not in pars['set_ranges']]:
                self.print_param(par, width, log)
            for par in pars['set_ranges']:
                self.print_range(par, width, log)
            log.info('#' * width)

        if not(self.mag_placeholders_calculated):
            print_centered_msg('The list of mag placeholers for each photometric', width, log)
            print_centered_msg('system has not been calculated for this carton yet,', width, log)
            print_centered_msg('to do so first run assign_target_info on this carton', width, log)
            print_centered_msg('using calculate_mag_placeholers=True (not default)', width, log)
            log.info('#' * width)

        else:
            print_centered_msg('MAGNITUDE PLACEHOLDERS PER PHOTOMETRIC SYSTEM', width, log)
            print_centered_msg(' ', width, log)
            self.print_param('magnitude_placeholders', width, log)
            log.info('#' * width)

[docs]    def print_param(self, par, width, log):
        """logs a message with width=width containing a parameter from carton object."""
        log.info('### ' + par + ': ' + str(getattr(self, par)).ljust(width - len(par) - 10) +
                 ' ###')

[docs]    def print_range(self, par, width, log):
        """logs a message with width=width containing the range of a parameter from the carton."""
        left_msg = str(getattr(self, par + '_min'))
        right_msg = str(getattr(self, par + '_max'))
        log.info('### ' + par + ' range: ' + left_msg + ' to ' + right_msg +
                 ' ' * (width - len(left_msg) - len(right_msg) - len(par) - 20) + ' ###')


def print_centered_msg(st, width, log):
    """Logs and prints string st with width=width in the log"""
    left = round((width - len(st) - 7) / 2.0)
    right = width - len(st) - 7 - left
    log.info('###' + ' ' * left + st + ' ' * right + ' ###')


[docs]def gets_carton_info(carton_list_filename, header_length=1, delimiter='|'):
    """Get the necessary information from the input carton list file."""

    cat = np.loadtxt(carton_list_filename, dtype='str',
                     skiprows=header_length, delimiter=delimiter)
    cartons = [str.strip(cat[ind, 1]) for ind in range(len(cat))]
    plans = [str.strip(cat[ind, 2]) for ind in range(len(cat))]
    categories = [str.strip(cat[ind, 3]) for ind in range(len(cat))]
    stages = [str.strip(cat[ind, 4]) for ind in range(len(cat))]
    actives = [str.strip(cat[ind, 5]) for ind in range(len(cat))]
    return cartons, plans, categories, stages, actives


[docs]def check_mag_outliers(datafr, bands, systems):
    """Returns a list with all the types of outliers found for each photometric system.

    Parameters
    ----------
    datafr : Pandas DataFrame
        Containing the magnitudes from different photometric systems for the stars in a
        given carton.
    bands : strings list
        Containing the bands to search each belonging to a given photometric system.
    system : strings list
        Photometric system to which each band listed belongs to. The options are
        'SDSS', 'TMASS', and 'GAIA'. The system to which a band belongs is defined by the
        index of the band in the list (i.e. band[ind] belongs to systems[ind])

    Returns
    -------
    placeholders : set
        A set of strings where each string starts with the photometric system,
        then an underscore and finally the type of magnitude outlier that at least one
        magnitude of the corresponding system has.
        The type of outliers are: None (For empty entries), Invalid (For Nan's and
        infinite values), and <<Number>> (For values brighter than -9, dimmer than 50,
        or equal to zero), in the latter cases the number itself is returned as the outlier type.

        For example if a carton contains stars with h=999.9, k=999.9, j=None, and bp=Inf.
        This function will return {'TMASS_999.9', 'TMASS_None', 'GAIA_Invalid}.


    """
    out_bands, out_systems = [], []
    for ind_band in range(len(bands)):
        maglist = datafr[bands[ind_band]]
        nonempty_maglist = [el for el in maglist if el is not None]
        magarr_filled = np.array(nonempty_maglist)
        ind_valid = np.where(np.isfinite(magarr_filled))[0]
        magarr_valid = magarr_filled[ind_valid]
        ind_out = np.where((magarr_valid < -9) | (magarr_valid > 50) | (magarr_valid == 0))[0]
        out_band = list(set([str(magarr_valid[indice]) for indice in ind_out]))
        if len(maglist) > len(nonempty_maglist):
            out_band.append('None')
        if len(magarr_filled) > len(magarr_valid):
            out_band.append('Invalid')
        n_out = len(out_band)
        out_bands = out_bands + out_band
        out_systems = out_systems + [systems[ind_band]] * n_out
    out = main.set_or_none([out_systems[idx] + '_' + out_bands[idx]
                           for idx in range(len(out_bands))])
    return out


[docs]def process_cartons(origin='rsconfig', files_folder='./files/', inputname=None,
                    delim='|', check_exists=False, verb=False, return_objects=False,
                    write_input=False, write_output=False, assign_sets=False,
                    assign_placeholders=False, visualize=False, overwrite=False,
                    all_cartons=False, cartons_name_pattern=None, versions='latest',
                    forced_versions=None, unique_version=None):
    """Get targetdb information for list of cartons or selection criteria and outputs .csv file.

    Takes as input a file with a list of cartons from rsconfig (origin=``rsconfig``)
    or custom (origin=``custom``) or a selection criteria to be applied on targetdb
    (origin=``targetdb) in which case an input list file can also be created
    (with write_input=True) for future use.

    This function can be used to check the existence of the cartons (check_exist=True)
    in which case it returns a dataframe with the alternative cartons information, or
    it can be used to call assign_target_info to get the targetdb information of all
    the cartons (check_exists=False) and store it in a .csv file and/or return the
    CartonInfo objects.

    The function also has provides the option of logging and printing the targetdb information
    from all the cartons in a human readable way by using visualize=True.

    Parameters
    ----------
    origin : str
        ``rsconfig`` to use input list file from rsconfig, ``custom`` to use custom input list
        of carton or ``targetdb`` to look for cartons in targetdb based on the ``all_cartons``,
        ``cartons_name_pattern``, ``versions``, ``forced_versions``, and ``unique_versions``
        parameters.
    files_folder : str
        Main folder where input and output files would be stored. In this folder subfolders
        rsconfig, custom, and targetdb are expected.
    inputname : str or None
        Name of input file to be searched in <<files_folder>>/<<origin>> folder
    delim : str
        Delimiter character to use when creating output .csv file
    check_exists : bool
        If true and origin is rsconfig or custom the function looks for alternatives to cartons
        that exist in targetdb but have different values of plan or category_label than carton
        object. In this case the function returns a dataframe with the original carton versions
        not found and the alternatives and exits the function
    verb : bool
        If True function logs and prints alternatives to replace the input lines
        corresponding to carton/plan/category_label combinations not found in targetdb with lines
        corresponding to the same carton but with existing plan/category_label combinations.
    return_objects : bool
        If True the function returns the CartonInfo objects.
    write_input : bool
        If True the function writes a file to be used then as input by
        process_cartons with the cartons retrieved by the targetdb query.
    write_output : bool
        If True the function creates an output .csv file with the
        information of each CartonInfo object.
    assign_sets : bool
        If True assign_target_info assigns the attributes for target dependent parameters for the
        carton. For each parameter returns a python set with all the values present in the carton
        targets or the range spanned by them.
    assign_placeholders : bool
        If True assign_target_info assigns magnitude placeholders found in targetdb for each
        photometric system (SDSS, TMASS, GAIA) for each carton using check_mag_outliers.
    visualize : bool
        If True we log and print all the information found in targetdb for each carton in a human
        readable way
    overwrite : bool
        If True enables that inputfile like and output file could be overwritten.
    all_cartons : bool
        If True and origin=targetdb cartons with any name are taken from targetdb.
        from targetdb
    cartons_name_pattern : str or None
        If True and origin=targetdb only cartons with pattern name cartons_name_pattern are
        are taken from targetdb. The string uses * character as wildcard
    versions : str
        If True and origin=targetdb sets the versions that would be taken for each carton name
        If ``single`` only versions matching ``unique_version`` will be taken, if ``latest``
        only the latest version of each carton would be taken, if ``all`` all versions from each
        carton is taken.
    forced_versions: dict or None
        If present, and origin=targetdb all cartons in this dictionary are forced to consider
        only the version in the dictionary corresponding value, independent on the ``versions``
        value.
    unique_version : Int or None
        If present, origin=targetdb, and versions=single then only this version_pk will be
        considered for each carton


    Returns
    -------

    """
    cfg = cartons_inventory.config
    # Check that we have a valid origin parameter
    assert origin in ['targetdb', 'rsconfig', 'custom'], f'{origin!r} is not a valid'\
        ' option for origin parameter'

    fullfolder = files_folder + origin + '/'

    # If an input file is used check that it exists and that we are not trying to overwrite it
    if origin in ['rsconfig', 'custom']:

        assert write_input is False, 'write_input=True only available for origin=\'targetdb\''
        assert inputname is not None, f'for origin={origin!r} an inputname has to be provided'
        inputread_filename = fullfolder + inputname
        assert os.path.isfile(inputread_filename), 'file: ' + \
            os.path.realpath(inputread_filename) + '\n' + f' required for origin={origin!r}'\
            f'and inputname={inputname!r} but file doesn\'t exist'

        outputbase_filename = fullfolder + 'Info_' + inputname.replace('.txt', '')

    if origin == 'targetdb':

        # First check if the input arguments are valid
        assert check_exists is False, 'check_exists=True option only valid for origin'\
            '\'rsconfig\' or \'custom\''
        assert versions in ['latest', 'all', 'single'], f'{versions!r} is not a valid option'\
            ' for versions parameter'
        assert forced_versions is None or type(forced_versions) == dict, 'if used, '\
            f'forced_versions has to be type=dict not type={type(forced_versions)}'
        assert all_cartons is True or cartons_name_pattern is not None, ' carton_name_pattern'\
            ' needed when all_cartons=False (e.g. cartons_name_pattern=\'bhm_rm_*\')'
        assert versions != 'single' or type(unique_version) == int, 'If versions=\'single\' then'\
            ' unique version has to be an integer'
        assert write_input is True or write_output is False, 'To create an output file'\
            ' an input file has to be created as well to help keep record'

        # Then I calculate the base name for input and output files based on selection criteria
        if all_cartons is True:
            basename = 'Cartons_all'
        if all_cartons is False:
            basename = 'Cartons_sample'
        if versions != 'unique':
            basename += '_Versions_' + versions
        if versions == 'unique':
            basename += '_Version_' + str(unique_version)
        if forced_versions is not None:
            basename += '_and_forced'

        inputwrite_filename = fullfolder + basename + '.txt'
        outputbase_filename = fullfolder + 'Info_' + basename

        if write_input is True and overwrite is False:
            assert not os.path.isfile(inputwrite_filename), 'input file '\
                f'{os.path.realpath(inputwrite_filename)}\n already exists and overwrite=False'

    # If write_output set the final output_filename and check overwritting
    if write_output is True:
        assert assign_sets is True or assign_placeholders is True, 'to create an output .csv'\
            'at least one of assign_sets or assign_placeholders has to be True'
        if assign_sets is True and assign_placeholders is False:
            output_filename = outputbase_filename + '_sets.csv'
        if assign_sets is False and assign_placeholders is True:
            output_filename = outputbase_filename + '_magplaceholers.csv'
        if assign_sets is True and assign_placeholders is True:
            output_filename = outputbase_filename + '_all.csv'

        if overwrite is False:
            assert not os.path.isfile(output_filename), 'output file '\
                f'{os.path.realpath(output_filename)}\n already exists and overwrite=False'

    if origin in ['rsconfig', 'custom']:
        cartons, plans, categories, stages, actives = gets_carton_info(inputread_filename)
    if origin == 'targetdb':
        if all_cartons is True:
            pattern = '%%'
        if all_cartons is False:
            pattern = cartons_name_pattern.replace('*', '%')

        cartons_list = (
            Car
            .select(Car.carton, Version.pk.alias('version_pk'), Version.plan,
                    Categ.label.alias('category_label'))
            .join(Version, on=(Version.pk == Car.version_pk))
            .join(Categ, 'LEFT JOIN', Car.category_pk == Categ.pk)
            .where(Car.carton ** pattern)
            .dicts()
        )
        # Here we look for the basic information of each carton/plan/category_label
        # available in targetdb to then instantiate the objects with that information
        # For each carton name we calculate the version_pk(s) that match the selection criteria
        # according to the value of ``versions`` parameter (single, all, latest) and override
        # the value if carton is present in forced_versions dictionary.
        cart_results = pd.DataFrame(cartons_list)
        cartons_unique = np.sort(list(set(cart_results['carton'])))
        all_indices = []
        for name in cartons_unique:
            indcart = np.where(cart_results['carton'] == name)[0]
            if forced_versions and name in forced_versions.keys():
                inds = np.where((cart_results['carton'] == name) &
                                (cart_results['version_pk'] == forced_versions[name]))[0]
            elif versions == 'single':
                inds = np.where((cart_results['carton'] == name) &
                                (cart_results['version_pk'] == unique_version))[0]
            elif versions == 'all':
                inds = indcart
            elif versions == 'latest':
                max_version = np.max(cart_results['version_pk'][indcart])
                inds = np.where((cart_results['carton'] == name) &
                                (cart_results['version_pk'] == max_version))[0]
            all_indices += list(inds)
        assert len(all_indices) > 0, 'There are no carton/version_pk pairs matching the selection'\
            ' criteria used'
        carts_sel = cart_results.iloc[all_indices]
        cartons = carts_sel['carton'].values.tolist()
        plans = carts_sel['plan'].values.tolist()
        categories = carts_sel['category_label'].values.tolist()
        stages, actives = ['N/A'] * len(carts_sel), ['N/A'] * len(carts_sel)

    # Here we start the corresponding log based on the origin, assign_sets,
    # and assign_placeholders value
    log.start_file_logger(f'./logs/origin_{origin}_sets_{assign_sets}'
                          f'_mags_{assign_placeholders}.log')
    log.info('#' * 60)
    print_centered_msg('STARTING CODE EXECUTION', 60, log)
    log.info('#' * 60)
    log.info('Ran process_cartons using the following arguments')
    signature = inspect.signature(process_cartons)
    # First thing we log is the parameters used in process_cartons function
    for param in signature.parameters.keys():
        arg = locals()[param]
        log.info(f'{param}={arg}')
    log.info(' ')

    # Here we write an input-like file if requested
    if origin == 'targetdb' and write_input is True:
        data = np.transpose([cartons, plans, categories, stages, actives])
        ascii.write(data, inputwrite_filename, format='fixed_width',
                    names=['carton', 'plan', 'category', 'stage', 'active'],
                    overwrite=overwrite)
        log.info(f'Wrote file {inputwrite_filename}')

    # If write_output then we prepare the .csv writer
    if write_output is True:
        fields = cfg['db_fields']
        f = open(output_filename, 'w')
        writer = csv.writer(f, delimiter=delim)
        columns = ['carton'] + fields['input_dependent'] + fields['carton_dependent']
        if assign_sets is True:
            new_cols = [x for x in fields['sets'] if x not in fields['set_ranges']]
            columns += new_cols
            for col in fields['set_ranges']:
                columns += [col + '_min', col + '_max']
        if assign_placeholders is True:
            columns += ['magnitude_placeholders']
        writer.writerow(columns)

    # Here we start the actual processing of the cartons
    objects, diffs = [], []
    for index in range(len(cartons)):

        # First we instantiate the CartonInfo objects with the information we have
        obj = CartonInfo(cartons[index], plans[index], categories[index],
                         stages[index], actives[index])
        # If check_exists we run check_existence on the cartons and return the diff dataframe
        if check_exists is True:
            output = None
            diff = obj.check_existence(log, verbose=verb)
            if len(diff) > 0:
                diffs.append(diff)
            if index == len(cartons) - 1:
                log.info('Ran check_existence to compare input file '
                         f'{inputname} with targetdb content')
                if len(diffs) > 0:
                    output = pd.concat(diffs)
                return output
            continue

        if obj.in_targetdb is False:
            log.debug(f'carton={obj.carton} plan={obj.plan} version_pk={obj.version_pk}'
                      f'category={obj.category_label} not found in targetdb')
        # Here we assign sets and or mag placeholders info based on input arguments
        # And we visualize and write in output .csv if it corresponds
        if obj.in_targetdb is True:
            if assign_sets is True or assign_placeholders is True:
                obj.assign_target_info(calculate_sets=assign_sets,
                                       calculate_mag_placeholders=assign_placeholders)
                objects.append(obj)
                log.info(f'Ran assign_target_info on carton {obj.carton}')

            else:
                objects.append(obj)
                log.info(f'Appending object for carton {obj.carton}'
                         'but without running assign_target_info')

            if visualize is True:
                obj.visualize_content(log)

            if write_output is True:
                curr_info = [getattr(obj, attr) for attr in columns]
                writer.writerow(curr_info)
                log.info(f'wrote row to output csv for carton={obj.carton}'
                         f' ({index + 1}/{len(cartons)})')

    if write_output is True:
        f.close()
        log.info(f'Saved output file={output_filename}')

    if return_objects is True:
        return objects