SlideShare a Scribd company logo
Apache Airflow
넷마블 데이터인프라팀 김장현
def scheduler(args):
job = jobs.SchedulerJob(
if args.daemon:
pid, stdout, stderr, log_file = setup_locations("scheduler",,
handle = setup_logging(log_file)
stdout = open(stdout, 'w+')
stderr = open(stderr, 'w+')
ctx = daemon.DaemonContext(
pidfile=TimeoutPIDLockFile(pid, -1),
with ctx:
signal.signal(signal.SIGINT, sigint_handler)
signal.signal(signal.SIGTERM, sigint_handler)
signal.signal(signal.SIGQUIT, sigquit_handler)
class SchedulerJob(BaseJob):
This SchedulerJob runs for a specific time interval and schedules the jobs
that are ready to run. It figures out the latest runs for each
task and sees if the dependencies for the next schedules are met.
If so, it creates appropriate TaskInstances and sends run commands to the
executor. It does this for each task in each DAG and repeats.
def run(self):
Stats.incr(self.__class__.__name__.lower() + '_start', 1, 1)
# Adding an entry in the DB
with create_session() as session:
self.state = State.RUNNING
id_ =
make_transient(self) = id_
# In case of max runs or max duration
self.state = State.SUCCESS
except SystemExit as e:
# In case of ^C or SIGTERM
self.state = State.SUCCESS
except Exception as e:
self.state = State.FAILED
self.end_date = timezone.utcnow()
Stats.incr(self.__class__.__name__.lower() + '_end', 1, 1)
def _execute(self):"Starting the scheduler")
# DAGs can be pickled for easier remote execution by some executors
pickle_dags = False
if self.do_pickle and self.executor.__class__ not in 
(executors.LocalExecutor, executors.SequentialExecutor):
pickle_dags = True
# Use multiple processes to parse and generate tasks for the
# DAGs in parallel. By processing them in separate processes,
# we can get parallelism and isolation from potentially harmful
# user code."Processing files using up to %s processes at a time",
self.max_threads)"Running execute loop for %s seconds", self.run_duration)"Processing each file at most %s times", self.num_runs)"Process each file at most once every %s seconds",
self.file_process_interval)"Wait until at least %s seconds have passed between file parsing "
"loops", self.min_file_parsing_loop_time)"Checking for new files in %s every %s seconds",
self.subdir, self.dag_dir_list_interval)
# Build up a list of Python files that could contain DAGs"Searching for files in %s", self.subdir)
known_file_paths = list_py_file_paths(self.subdir)"There are %s files in %s", len(known_file_paths), self.subdir)
def processor_factory(file_path):
return DagFileProcessor(file_path,
processor_manager = DagFileProcessorManager(self.subdir,
finally:"Exited execute loop")
# Kill all child processes on exit since we don't want to leave
# them as orphaned.
# 후략
# For the execute duration, parse and schedule DAGs
while (timezone.utcnow() - execute_start_time).total_seconds() < 
self.run_duration or self.run_duration < 0:
self.log.debug("Starting Loop...")
loop_start_time = time.time()
# Traverse the DAG directory for Python files containing DAGs
# periodically
elapsed_time_since_refresh = (timezone.utcnow() -
# 중략
# Kick of new processes and collect results from finished ones
self.log.debug("Heartbeating the process manager")
simple_dags = processor_manager.heartbeat()
# Send tasks for execution if available
simple_dag_bag = SimpleDagBag(simple_dags)
if len(simple_dags) > 0:
# Handle cases where a DAG run state is set (perhaps manually) to
# a non-running state. Handle task instances that belong to
# DAG runs in those states
# If a task instance is up for retry but the corresponding DAG run
# isn't running, mark the task instance as FAILED so we don't try
# to re-run it.
# If a task instance is scheduled or queued, but the corresponding
# DAG run isn't running, set the state to NONE so we don't try to
# re-run it.
# Call heartbeats
self.log.debug("Heartbeating the executor")
# Process events from the executor
# Heartbeat the scheduler periodically
time_since_last_heartbeat = (timezone.utcnow() -
if time_since_last_heartbeat > self.heartrate:
self.log.debug("Heartbeating the scheduler")
last_self_heartbeat_time = timezone.utcnow()
# Occasionally print out stats about how fast the files are getting processed
if ((timezone.utcnow() - last_stat_print_time).total_seconds() >
if len(known_file_paths) > 0:
last_stat_print_time = timezone.utcnow()
loop_end_time = time.time()
self.log.debug("Ran scheduling loop in %.2f seconds",
loop_end_time - loop_start_time)
# Exit early for a test mode
if processor_manager.max_runs_reached():"Exiting loop as all files have been processed %s times",
def _execute_helper(self, processor_manager):
:param processor_manager: manager to use
:type processor_manager: DagFileProcessorManager
:return: None
# 중략
# For the execute duration, parse and schedule DAGs
while (timezone.utcnow() - execute_start_time).total_seconds() < 
self.run_duration or self.run_duration < 0:
self.log.debug("Starting Loop...")
loop_start_time = time.time()
# 스케줄러 프로세스 실행 동안 반복되는 루프
# Stop any processors
# Verify that all files were processed, and if so, deactivate DAGs that
# haven't been touched by the scheduler as they likely have been
# deleted.
all_files_processed = True
for file_path in known_file_paths:
if processor_manager.get_last_finish_time(file_path) is None:
all_files_processed = False
if all_files_processed:
"Deactivating DAGs that haven't been touched since %s",
def heartbeat(self):
This should be periodically called by the scheduler. This method will
kick off new processes to process DAG definition files and read the
results from the finished processors.
:return: a list of SimpleDags that were produced by processors that
have finished since the last time this was called
:rtype: list[SimpleDag]
finished_processors = {}
""":type : dict[unicode, AbstractDagFileProcessor]"""
running_processors = {}
""":type : dict[unicode, AbstractDagFileProcessor]"""
for file_path, processor in self._processors.items():
if processor.done:"Processor for %s finished", file_path)
now = timezone.utcnow()
finished_processors[file_path] = processor
self._last_runtime[file_path] = (now -
self._last_finish_time[file_path] = now
self._run_count[file_path] += 1
running_processors[file_path] = processor
self._processors = running_processors
self.log.debug("%s/%s scheduler processes running",
len(self._processors), self._parallelism)
self.log.debug("%s file paths queued for processing",
# Collect all the DAGs that were found in the processed files
simple_dags = []
for file_path, processor in finished_processors.items():
if processor.result is None:
"Processor for %s exited with return code %s.",
processor.file_path, processor.exit_code
for simple_dag in processor.result:
# 중략
# Start more processors if we have enough slots and files to process
while (self._parallelism - len(self._processors) > 0 and
len(self._file_path_queue) > 0):
file_path = self._file_path_queue.pop(0)
processor = self._processor_factory(file_path)
"Started a process (PID: %s) to generate tasks for %s",, file_path
self._processors[file_path] = processor
# Update scheduler heartbeat count.
self._run_count[self._heart_beat_key] += 1
return simple_dags
def helper():
# This helper runs in the newly created process
log = logging.getLogger("airflow.processor")
stdout = StreamLogWriter(log, logging.INFO)
stderr = StreamLogWriter(log, logging.WARN)
set_context(log, file_path)
# redirect stdout/stderr to log
sys.stdout = stdout
sys.stderr = stderr
# Re-configure the ORM engine as there are issues with multiple processes
# Change the thread name to differentiate log lines. This is
# really a separate process, but changing the name of the
# process doesn't work, so changing the thread name instead.
threading.current_thread().name = thread_name
start_time = time.time()"Started process (PID=%s) to work on %s",
os.getpid(), file_path)
scheduler_job = SchedulerJob(dag_ids=dag_id_white_list, log=log)
result = scheduler_job.process_file(file_path,
end_time = time.time()
"Processing %s took %.3f seconds", file_path, end_time - start_time
# Log exceptions through the logging framework.
log.exception("Got an exception! Propagating...")
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__
# We re-initialized the ORM within this Process above so we need to
# tear it down manually here
def _launch_process(result_queue,
Launch a process to process the given file.
:param result_queue: the queue to use for passing back the result
:type result_queue: multiprocessing.Queue
:param file_path: the file to process
:type file_path: unicode
:param pickle_dags: whether to pickle the DAGs found in the file and
save them to the DB
:type pickle_dags: bool
:param dag_id_white_list: if specified, only examine DAG ID's that are
in this list
:type dag_id_white_list: list[unicode]
:param thread_name: the name to use for the process that is launched
:type thread_name: unicode
:return: the process that was launched
:rtype: multiprocessing.Process
p = multiprocessing.Process(target=helper,
return p
def start(self):
Launch the process and start processing the DAG.
self._process = DagFileProcessor._launch_process(
self._start_time = timezone.utcnow()
def _process_dags(self, dagbag, dags, tis_out):
Iterates over the dags and processes them. Processing includes:
1. Create appropriate DagRun(s) in the DB.
2. Create appropriate TaskInstance(s) in the DB.
3. Send emails for tasks that have missed SLAs.
:param dagbag: a collection of DAGs to process
:type dagbag: models.DagBag
:param dags: the DAGs from the DagBag to process
:type dags: DAG
:param tis_out: A queue to add generated TaskInstance objects
:type tis_out: multiprocessing.Queue[TaskInstance]
:return: None
for dag in dags:
dag = dagbag.get_dag(dag.dag_id)
if dag.is_paused:"Not processing DAG %s since it's paused", dag.dag_id)
if not dag:
self.log.error("DAG ID %s was not found in the DagBag", dag.dag_id)
continue"Processing %s", dag.dag_id)
dag_run = self.create_dag_run(dag)
if dag_run:"Created %s", dag_run)
self._process_task_instances(dag, tis_out)
models.DagStat.update([d.dag_id for d in dags])
def process_file(self, file_path, pickle_dags=False, session=None):
Process a Python file containing Airflow DAGs.
This includes:
1. Execute the file and look for DAG objects in the namespace.
2. Pickle the DAG and save it to the DB (if necessary).
3. For each DAG, see what tasks should run and create appropriate task
instances in the DB.
4. Record any errors importing the file into ORM
5. Kill (in ORM) any task instances belonging to the DAGs that haven't
issued a heartbeat in a while.
Returns a list of SimpleDag objects that represent the DAGs found in
the file
:param file_path: the path to the Python file that should be executed
:type file_path: unicode
:param pickle_dags: whether serialize the DAGs found in the file and
save them to the db
:type pickle_dags: bool
:return: a list of SimpleDags made from the Dags found in the file
:rtype: list[SimpleDag]
""""Processing file %s for tasks to queue", file_path)
# As DAGs are parsed from this file, they will be converted into SimpleDags
simple_dags = []
dagbag = models.DagBag(file_path)
except Exception:
self.log.exception("Failed at reloading the DAG file %s", file_path)
Stats.incr('dag_file_refresh_error', 1, 1)
return []
# 중략
self._process_dags(dagbag, dags, ti_keys_to_schedule)
def _process_task_instances(self, dag, queue, session=None):
This method schedules the tasks for a single DAG by looking at the
active DAG runs and adding task instances that should run to the
# 중략
for run in active_dag_runs:
self.log.debug("Examining active DAG run: %s", run)
# this needs a fresh session sometimes tis get detached
tis = run.get_task_instances(state=(State.NONE,
# this loop is quite slow as it uses are_dependencies_met for
# every task (in ti.is_runnable). This is also called in
# update_state above which has already checked these tasks
for ti in tis:
task = dag.get_task(ti.task_id)
# fixme: ti.task is transient but needs to be set
ti.task = task
# future: remove adhoc
if task.adhoc:
if ti.are_dependencies_met(
self.log.debug('Queuing task: %s', ti)
def process_file(self, file_path, pickle_dags=False, session=None):
Process a Python file containing Airflow DAGs.
This includes:
1. Execute the file and look for DAG objects in the namespace.
2. Pickle the DAG and save it to the DB (if necessary).
3. For each DAG, see what tasks should run and create appropriate task
instances in the DB.
4. Record any errors importing the file into ORM
5. Kill (in ORM) any task instances belonging to the DAGs that haven't
issued a heartbeat in a while.
Returns a list of SimpleDag objects that represent the DAGs found in
the file
:param file_path: the path to the Python file that should be executed
:type file_path: unicode
:param pickle_dags: whether serialize the DAGs found in the file and
save them to the db
:type pickle_dags: bool
:return: a list of SimpleDags made from the Dags found in the file
:rtype: list[SimpleDag]
""""Processing file %s for tasks to queue", file_path)
# As DAGs are parsed from this file, they will be converted into SimpleDags
simple_dags = []
dagbag = models.DagBag(file_path)
except Exception:
self.log.exception("Failed at reloading the DAG file %s", file_path)
Stats.incr('dag_file_refresh_error', 1, 1)
return []
# 중략
self._process_dags(dagbag, dags, ti_keys_to_schedule)
def process_file(self, file_path, pickle_dags=False, session=None):
# 중략
ti_keys_to_schedule = []
self._process_dags(dagbag, dags, ti_keys_to_schedule)
for ti_key in ti_keys_to_schedule:
dag = dagbag.dags[ti_key[0]]
task = dag.get_task(ti_key[1])
ti = models.TaskInstance(task, ti_key[2])
ti.refresh_from_db(session=session, lock_for_update=True)
# We can defer checking the task dependency checks to the worker themselves
# since they can be expensive to run in the scheduler.
dep_context = DepContext(deps=QUEUE_DEPS, ignore_task_deps=True)
# Only schedule tasks that have their dependencies met, e.g. to avoid
# a task that recently got its state changed to RUNNING from somewhere
# other than the scheduler from getting its state overwritten.
# TODO(aoen): It's not great that we have to check all the task instance
# dependencies twice; once to get the task scheduled, and again to actually
# run the task. We should try to come up with a way to only check them once.
if ti.are_dependencies_met(
# Task starts out in the scheduled state. All tasks in the
# scheduled state will be sent to the executor
ti.state = State.SCHEDULED
# Also save this task instance to the DB."Creating / updating %s in ORM", ti)
# commit batch
# 중략
return simple_dags
def _find_executable_task_instances(self, simple_dag_bag, states, session=None):
# 중략
states_to_count_as_running = [State.RUNNING]
executable_tis = []
# Get all the queued task instances from associated with scheduled
# DagRuns which are not backfilled, in the given states,
# and the dag is not paused
TI = models.TaskInstance
DR = models.DagRun
DM = models.DagModel
ti_query = (
and_(DR.dag_id == TI.dag_id,
DR.execution_date == TI.execution_date))
.filter(or_(DR.run_id == None,
not_( + '%'))))
.outerjoin(DM, DM.dag_id==TI.dag_id)
.filter(or_(DM.dag_id == None,
if None in states:
ti_query = ti_query.filter(or_(TI.state == None, TI.state.in_(states)))
ti_query = ti_query.filter(TI.state.in_(states))
task_instances_to_examine = ti_query.all()
# 중략
# Get the pool settings
pools = {p.pool: p for p in session.query(models.Pool).all()}
pool_to_task_instances = defaultdict(list)
for task_instance in task_instances_to_examine:
# 중략
# Go through each pool, and queue up a task for execution if there are
# any open slots in the pool.
for pool, task_instances in pool_to_task_instances.items():
# 중략
priority_sorted_task_instances = sorted(
task_instances, key=lambda ti: (-ti.priority_weight, ti.execution_date))
# DAG IDs with running tasks that equal the concurrency limit of the dag
dag_id_to_possibly_running_task_count = {}
for task_instance in priority_sorted_task_instances:
if open_slots <= 0:
"Not scheduling since there are %s open slots in pool %s",
open_slots, pool
# Can't schedule any more since there are no more open slots.
# Check to make sure that the task concurrency of the DAG hasn't been
# reached.
# 중략
task_instance_str = "nt".join(
["{}".format(x) for x in executable_tis])"Setting the follow tasks to queued state:nt%s", task_instance_str)
# so these dont expire on commit
for ti in executable_tis:
copy_dag_id = ti.dag_id
copy_execution_date = ti.execution_date
copy_task_id = ti.task_id
ti.dag_id = copy_dag_id
ti.execution_date = copy_execution_date
ti.task_id = copy_task_id
return executable_tis
class LocalExecutor(BaseExecutor):
LocalExecutor executes tasks locally in parallel. It uses the
multiprocessing Python library and queues to parallelize the execution
of tasks.
def start(self):
self.result_queue = multiprocessing.Queue()
self.queue = None
self.workers = []
self.workers_used = 0
self.workers_active = 0
self.impl = (LocalExecutor._UnlimitedParallelism(self) if self.parallelism == 0
else LocalExecutor._LimitedParallelism(self))
def __init__(
heartrate=conf.getfloat('scheduler', 'JOB_HEARTBEAT_SEC'),
*args, **kwargs):
self.hostname = get_hostname()
self.executor = executor
self.executor_class = executor.__class__.__name__
self.start_date = timezone.utcnow()
self.latest_heartbeat = timezone.utcnow()
self.heartrate = heartrate
self.unixname = getpass.getuser()
self.max_tis_per_query = conf.getint('scheduler', 'max_tis_per_query')
super(BaseJob, self).__init__(*args, **kwargs)
class BaseJob(Base, LoggingMixin):
Abstract class to be derived for jobs. Jobs are processing items with state
and duration that aren't task instances. For instance a BackfillJob is
a collection of task instance runs, but should have its own state, start
and end time.
def GetDefaultExecutor():
"""Creates a new instance of the configured executor if none exists and returns it"""
if DEFAULT_EXECUTOR is not None:
executor_name = configuration.conf.get('core', 'EXECUTOR')
DEFAULT_EXECUTOR = _get_executor(executor_name)
log = LoggingMixin().log"Using executor %s", executor_name)
def _get_executor(executor_name):
Creates a new instance of the named executor.
In case the executor name is not know in airflow,
look for it in the plugins
if executor_name == Executors.LocalExecutor:
return LocalExecutor()
elif executor_name == Executors.SequentialExecutor:
return SequentialExecutor()
elif executor_name == Executors.CeleryExecutor:
from airflow.executors.celery_executor import CeleryExecutor
return CeleryExecutor()
elif executor_name == Executors.DaskExecutor:
from airflow.executors.dask_executor import DaskExecutor
return DaskExecutor()
elif executor_name == Executors.MesosExecutor:
from airflow.contrib.executors.mesos_executor import MesosExecutor
return MesosExecutor()
elif executor_name == Executors.KubernetesExecutor:
from airflow.contrib.executors.kubernetes_executor import KubernetesExecutor
return KubernetesExecutor()
# Loading plugins
executor_path = executor_name.split('.')
if len(executor_path) != 2:
raise AirflowException(
"Executor {0} not supported: "
"please specify in format plugin_module.executor".format(executor_name))
if executor_path[0] in globals():
return globals()[executor_path[0]].__dict__[executor_path[1]]()
raise AirflowException("Executor {0} not supported.".format(executor_name))
def _execute_helper(self, processor_manager):
:param processor_manager: manager to use
:type processor_manager: DagFileProcessorManager
:return: None
# 후략
class SchedulerJob(BaseJob):
This SchedulerJob runs for a specific time interval and schedules the jobs
that are ready to run. It figures out the latest runs for each
task and sees if the dependencies for the next schedules are met.
If so, it creates appropriate TaskInstances and sends run commands to the
executor. It does this for each task in each DAG and repeats.
def _enqueue_task_instances_with_queued_state(self, simple_dag_bag, task_instances):
Takes task_instances, which should have been set to queued, and enqueues them
with the executor.
:param task_instances: TaskInstances to enqueue
:type task_instances: List[TaskInstance]
:param simple_dag_bag: Should contains all of the task_instances' dags
:type simple_dag_bag: SimpleDagBag
TI = models.TaskInstance
# actually enqueue them
for task_instance in task_instances:
simple_dag = simple_dag_bag.get_dag(task_instance.dag_id)
command = " ".join(TI.generate_command(
priority = task_instance.priority_weight
queue = task_instance.queue
"Sending %s to executor with priority %s and queue %s",
task_instance.key, priority, queue
# save attributes so sqlalchemy doesnt expire them
copy_dag_id = task_instance.dag_id
copy_task_id = task_instance.task_id
copy_execution_date = task_instance.execution_date
task_instance.dag_id = copy_dag_id
task_instance.task_id = copy_task_id
task_instance.execution_date = copy_execution_date
def _execute_task_instances(self,
Attempts to execute TaskInstances that should be executed by the scheduler.
There are three steps:
1. Pick TIs by priority with the constraint that they are in the expected states
and that we do exceed max_active_runs or pool limits.
2. Change the state for the TIs above atomically.
3. Enqueue the TIs in the executor.
:param simple_dag_bag: TaskInstances associated with DAGs in the
simple_dag_bag will be fetched from the DB and executed
:type simple_dag_bag: SimpleDagBag
:param states: Execute TaskInstances in these states
:type states: Tuple[State]
:return: None
executable_tis = self._find_executable_task_instances(simple_dag_bag, states,
def query(result, items):
tis_with_state_changed = self._change_state_for_executable_task_instances(
return result + len(tis_with_state_changed)
return helpers.reduce_in_chunks(query, executable_tis, 0, self.max_tis_per_query)
def _execute_helper(self, processor_manager):
:param processor_manager: manager to use
:type processor_manager: DagFileProcessorManager
:return: None
# 중략
# For the execute duration, parse and schedule DAGs
while (timezone.utcnow() - execute_start_time).total_seconds() < 
self.run_duration or self.run_duration < 0:
self.log.debug("Starting Loop...")
loop_start_time = time.time()
# 중략
# Kick of new processes and collect results from finished ones
self.log.debug("Heartbeating the process manager")
simple_dags = processor_manager.heartbeat()
# Send tasks for execution if available
simple_dag_bag = SimpleDagBag(simple_dags)
if len(simple_dags) > 0:
# 중략
# Call heartbeats
self.log.debug("Heartbeating the executor")
# 중략
# Exit early for a test mode
if processor_manager.max_runs_reached():"Exiting loop as all files have been processed %s times",
# 후략
def queue_command(self, task_instance, command, priority=1, queue=None):
key = task_instance.key
if key not in self.queued_tasks and key not in self.running:"Adding to queue: %s", command)
self.queued_tasks[key] = (command, priority, queue, task_instance)
else:"could not queue task {}".format(key))
def heartbeat(self):
# Triggering new jobs
if not self.parallelism:
open_slots = len(self.queued_tasks)
open_slots = self.parallelism - len(self.running)
self.log.debug("%s running task instances", len(self.running))
self.log.debug("%s in queue", len(self.queued_tasks))
self.log.debug("%s open slots", open_slots)
sorted_queue = sorted(
[(k, v) for k, v in self.queued_tasks.items()],
key=lambda x: x[1][1],
for i in range(min((open_slots, len(self.queued_tasks)))):
key, (command, _, queue, ti) = sorted_queue.pop(0)
# TODO(jlowin) without a way to know what Job ran which tasks,
# there is a danger that another Job started running a task
# that was also queued to this executor. This is the last chance
# to check if that happened. The most probable way is that a
# Scheduler tried to run a task that was originally queued by a
# Backfill. This fix reduces the probability of a collision but
# does NOT eliminate it.
if ti.state != State.RUNNING:
self.running[key] = command
'Task is already running, not sending to '
'executor: {}'.format(key))
# Calling child class sync method
self.log.debug("Calling the %s sync method", self.__class__)
def execute_async(self, key, command):
:param key: the key to identify the TI
:type key: Tuple(dag_id, task_id, execution_date)
:param command: the command to execute
:type command: string
local_worker = LocalWorker(self.executor.result_queue)
local_worker.key = key
local_worker.command = command
self.executor.workers_used += 1
self.executor.workers_active += 1
class LocalWorker(multiprocessing.Process, LoggingMixin):
"""LocalWorker Process implementation to run airflow commands. Executes the given
command and puts the result into a result queue when done, terminating execution."""
def __init__(self, result_queue):
:param result_queue: the queue to store result states tuples (key, State)
:type result_queue: multiprocessing.Queue
super(LocalWorker, self).__init__()
self.daemon = True
self.result_queue = result_queue
self.key = None
self.command = None
def execute_work(self, key, command):
Executes command received and stores result state in queue.
:param key: the key to identify the TI
:type key: Tuple(dag_id, task_id, execution_date)
:param command: the command to execute
:type command: string
if key is None:
return"%s running %s", self.__class__.__name__, command)
command = "exec bash -c '{0}'".format(command)
subprocess.check_call(command, shell=True, close_fds=True)
state = State.SUCCESS
except subprocess.CalledProcessError as e:
state = State.FAILED
self.log.error("Failed to execute task %s.", str(e))
# TODO: Why is this commented out?
# raise e
self.result_queue.put((key, state))
def run(self):
self.execute_work(self.key, self.command)
def generate_command(dag_id,
Generates the shell command required to execute this task instance.
iso = execution_date.isoformat()
cmd = ["airflow", "run", str(dag_id), str(task_id), str(iso)]
cmd.extend(["--mark_success"]) if mark_success else None
cmd.extend(["--pickle", str(pickle_id)]) if pickle_id else None
cmd.extend(["--job_id", str(job_id)]) if job_id else None
cmd.extend(["-A"]) if ignore_all_deps else None
cmd.extend(["-i"]) if ignore_task_deps else None
cmd.extend(["-I"]) if ignore_depends_on_past else None
cmd.extend(["--force"]) if ignore_ti_state else None
cmd.extend(["--local"]) if local else None
cmd.extend(["--pool", pool]) if pool else None
cmd.extend(["--raw"]) if raw else None
cmd.extend(["-sd", file_path]) if file_path else None
cmd.extend(["--cfg_path", cfg_path]) if cfg_path else None
return cmd
def run(args, dag=None):
# 중략
task = dag.get_task(task_id=args.task_id)
ti = TaskInstance(task, args.execution_date)
hostname = get_hostname()"Running %s on host %s", ti, hostname)
if args.interactive:
_run(args, dag, ti)
with redirect_stdout(ti.log, logging.INFO), redirect_stderr(ti.log, logging.WARN):
_run(args, dag, ti)
def _run(args, dag, ti):
if args.local:
run_job = jobs.LocalTaskJob(
elif args.raw:
# 후략
def _execute(self):
self.task_runner = get_task_runner(self)
# 중략
if not self.task_instance._check_and_change_state_before_execution(
pool=self.pool):"Task is not able to be run")
last_heartbeat_time = time.time()
heartbeat_time_limit = conf.getint('scheduler',
while True:
# Monitor the task to see if it's done
return_code = self.task_runner.return_code()
if return_code is not None:"Task exited with return code %s", return_code)
# 중략
self.on_kill() class BashTaskRunner(BaseTaskRunner):
Runs the raw Airflow task by invoking through the Bash shell.
def __init__(self, local_task_job):
super(BashTaskRunner, self).__init__(local_task_job)
def start(self):
self.process = self.run_command(['bash', '-c'], join_args=True)
def return_code(self):
return self.process.poll()

More Related Content

What's hot

Drush. Secrets come out.
Drush. Secrets come out.Drush. Secrets come out.
Drush. Secrets come out.
Alex S
How Secure Are Docker Containers?
How Secure Are Docker Containers?How Secure Are Docker Containers?
How Secure Are Docker Containers?
Ben Hall
Deconstructing the Functional Web with Clojure
Deconstructing the Functional Web with ClojureDeconstructing the Functional Web with Clojure
Deconstructing the Functional Web with Clojure
Norman Richards
To infinity and beyond
To infinity and beyondTo infinity and beyond
To infinity and beyond
Drupal 8 in action, the route to the method
Drupal 8 in action, the route to the methodDrupal 8 in action, the route to the method
Drupal 8 in action, the route to the method
Zabbix LLD from a C Module by Jan-Piet Mens
Zabbix LLD from a C Module by Jan-Piet MensZabbix LLD from a C Module by Jan-Piet Mens
Zabbix LLD from a C Module by Jan-Piet Mens
Anatomy of distributed computing with Hadoop
Anatomy of distributed computing with HadoopAnatomy of distributed computing with Hadoop
Anatomy of distributed computing with Hadoop
Sergey Bushik
Best Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesBest Practices in Handling Performance Issues
Best Practices in Handling Performance Issues
How to create a secured multi tenancy for clustered ML with JupyterHub
How to create a secured multi tenancy for clustered ML with JupyterHubHow to create a secured multi tenancy for clustered ML with JupyterHub
How to create a secured multi tenancy for clustered ML with JupyterHub
Tiago Simões
Nouveau document texte
Nouveau document texteNouveau document texte
Nouveau document texte
Sai Ef
Node Powered Mobile
Node Powered MobileNode Powered Mobile
Node Powered Mobile
Tim Caswell
Commands documentaion
Commands documentaionCommands documentaion
Commands documentaion
How to go the extra mile on monitoring
How to go the extra mile on monitoringHow to go the extra mile on monitoring
How to go the extra mile on monitoring
Tiago Simões
Real Time Web with Node
Real Time Web with NodeReal Time Web with Node
Real Time Web with Node
Tim Caswell
Perl Web Client
Perl Web ClientPerl Web Client
Perl Web Client
Flavio Poletti
Nubilus Perl
Nubilus PerlNubilus Perl
Nubilus Perl
Flavio Poletti
Drush - use full power - DrupalCamp Donetsk 2014
Drush - use full power - DrupalCamp Donetsk 2014Drush - use full power - DrupalCamp Donetsk 2014
Drush - use full power - DrupalCamp Donetsk 2014
Alex S
Pry, the good parts
Pry, the good partsPry, the good parts
Pry, the good parts
Conrad Irwin

What's hot (20)

Drush. Secrets come out.
Drush. Secrets come out.Drush. Secrets come out.
Drush. Secrets come out.
How Secure Are Docker Containers?
How Secure Are Docker Containers?How Secure Are Docker Containers?
How Secure Are Docker Containers?
Deconstructing the Functional Web with Clojure
Deconstructing the Functional Web with ClojureDeconstructing the Functional Web with Clojure
Deconstructing the Functional Web with Clojure
To infinity and beyond
To infinity and beyondTo infinity and beyond
To infinity and beyond
Drupal 8 in action, the route to the method
Drupal 8 in action, the route to the methodDrupal 8 in action, the route to the method
Drupal 8 in action, the route to the method
Zabbix LLD from a C Module by Jan-Piet Mens
Zabbix LLD from a C Module by Jan-Piet MensZabbix LLD from a C Module by Jan-Piet Mens
Zabbix LLD from a C Module by Jan-Piet Mens
Anatomy of distributed computing with Hadoop
Anatomy of distributed computing with HadoopAnatomy of distributed computing with Hadoop
Anatomy of distributed computing with Hadoop
Best Practices in Handling Performance Issues
Best Practices in Handling Performance IssuesBest Practices in Handling Performance Issues
Best Practices in Handling Performance Issues
How to create a secured multi tenancy for clustered ML with JupyterHub
How to create a secured multi tenancy for clustered ML with JupyterHubHow to create a secured multi tenancy for clustered ML with JupyterHub
How to create a secured multi tenancy for clustered ML with JupyterHub
Nouveau document texte
Nouveau document texteNouveau document texte
Nouveau document texte
Node Powered Mobile
Node Powered MobileNode Powered Mobile
Node Powered Mobile
Commands documentaion
Commands documentaionCommands documentaion
Commands documentaion
How to go the extra mile on monitoring
How to go the extra mile on monitoringHow to go the extra mile on monitoring
How to go the extra mile on monitoring
Real Time Web with Node
Real Time Web with NodeReal Time Web with Node
Real Time Web with Node
Perl Web Client
Perl Web ClientPerl Web Client
Perl Web Client
Nubilus Perl
Nubilus PerlNubilus Perl
Nubilus Perl
Drush - use full power - DrupalCamp Donetsk 2014
Drush - use full power - DrupalCamp Donetsk 2014Drush - use full power - DrupalCamp Donetsk 2014
Drush - use full power - DrupalCamp Donetsk 2014
Pry, the good parts
Pry, the good partsPry, the good parts
Pry, the good parts

Similar to Apache Airflow

Magic of Ruby
Magic of RubyMagic of Ruby
Magic of Ruby
Gabriele Lana
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak   CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
Ajuste (tuning) del rendimiento de SQL Server 2008
Ajuste (tuning) del rendimiento de SQL Server 2008Ajuste (tuning) del rendimiento de SQL Server 2008
Ajuste (tuning) del rendimiento de SQL Server 2008
Eduardo Castro
SQL Server Performance Analysis
SQL Server Performance AnalysisSQL Server Performance Analysis
SQL Server Performance Analysis
Eduardo Castro
Python advanced 3.the python std lib by example – system related modules
Python advanced 3.the python std lib by example – system related modulesPython advanced 3.the python std lib by example – system related modules
Python advanced 3.the python std lib by example – system related modules
John(Qiang) Zhang
Django Celery
Django Celery Django Celery
Django Celery
Mat Clayton
Scaling python webapps from 0 to 50 million users - A top-down approach
Scaling python webapps from 0 to 50 million users - A top-down approachScaling python webapps from 0 to 50 million users - A top-down approach
Scaling python webapps from 0 to 50 million users - A top-down approach
Jinal Jhaveri
Process monitoring in UNIX shell scripting
Process monitoring in UNIX shell scriptingProcess monitoring in UNIX shell scripting
Process monitoring in UNIX shell scripting
Dan Morrill
Python magicmethods
Python magicmethodsPython magicmethods
Python magicmethods
Counting on God
Counting on GodCounting on God
Counting on God
James Gray
Airflow tutorials hands_on
Airflow tutorials hands_onAirflow tutorials hands_on
Airflow tutorials hands_on
Monitoring with Prometheus
Monitoring with PrometheusMonitoring with Prometheus
Monitoring with Prometheus
Shiao-An Yuan
Scheduling tasks the human way - Brad Wood - ITB2021
Scheduling tasks the human way -  Brad Wood - ITB2021Scheduling tasks the human way -  Brad Wood - ITB2021
Scheduling tasks the human way - Brad Wood - ITB2021
Ortus Solutions, Corp
Hacking ansible
Hacking ansibleHacking ansible
Hacking ansible
Python Asíncrono - Async Python
Python Asíncrono - Async PythonPython Asíncrono - Async Python
Python Asíncrono - Async Python
Javier Abadía
Apache Spark in your likeness - low and high level customization
Apache Spark in your likeness - low and high level customizationApache Spark in your likeness - low and high level customization
Apache Spark in your likeness - low and high level customization
Bartosz Konieczny
Commit2015 kharchenko - python generators - ext
Commit2015   kharchenko - python generators - extCommit2015   kharchenko - python generators - ext
Commit2015 kharchenko - python generators - ext
Maxym Kharchenko
Go Web Development
Go Web DevelopmentGo Web Development
Go Web Development
Cheng-Yi Yu
PyCon 2010 SQLAlchemy tutorial
PyCon 2010 SQLAlchemy tutorialPyCon 2010 SQLAlchemy tutorial
PyCon 2010 SQLAlchemy tutorial
Metaprogramovanie #1
Metaprogramovanie #1Metaprogramovanie #1
Metaprogramovanie #1
Jano Suchal

Similar to Apache Airflow (20)

Magic of Ruby
Magic of RubyMagic of Ruby
Magic of Ruby
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak   CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
Ajuste (tuning) del rendimiento de SQL Server 2008
Ajuste (tuning) del rendimiento de SQL Server 2008Ajuste (tuning) del rendimiento de SQL Server 2008
Ajuste (tuning) del rendimiento de SQL Server 2008
SQL Server Performance Analysis
SQL Server Performance AnalysisSQL Server Performance Analysis
SQL Server Performance Analysis
Python advanced 3.the python std lib by example – system related modules
Python advanced 3.the python std lib by example – system related modulesPython advanced 3.the python std lib by example – system related modules
Python advanced 3.the python std lib by example – system related modules
Django Celery
Django Celery Django Celery
Django Celery
Scaling python webapps from 0 to 50 million users - A top-down approach
Scaling python webapps from 0 to 50 million users - A top-down approachScaling python webapps from 0 to 50 million users - A top-down approach
Scaling python webapps from 0 to 50 million users - A top-down approach
Process monitoring in UNIX shell scripting
Process monitoring in UNIX shell scriptingProcess monitoring in UNIX shell scripting
Process monitoring in UNIX shell scripting
Python magicmethods
Python magicmethodsPython magicmethods
Python magicmethods
Counting on God
Counting on GodCounting on God
Counting on God
Airflow tutorials hands_on
Airflow tutorials hands_onAirflow tutorials hands_on
Airflow tutorials hands_on
Monitoring with Prometheus
Monitoring with PrometheusMonitoring with Prometheus
Monitoring with Prometheus
Scheduling tasks the human way - Brad Wood - ITB2021
Scheduling tasks the human way -  Brad Wood - ITB2021Scheduling tasks the human way -  Brad Wood - ITB2021
Scheduling tasks the human way - Brad Wood - ITB2021
Hacking ansible
Hacking ansibleHacking ansible
Hacking ansible
Python Asíncrono - Async Python
Python Asíncrono - Async PythonPython Asíncrono - Async Python
Python Asíncrono - Async Python
Apache Spark in your likeness - low and high level customization
Apache Spark in your likeness - low and high level customizationApache Spark in your likeness - low and high level customization
Apache Spark in your likeness - low and high level customization
Commit2015 kharchenko - python generators - ext
Commit2015   kharchenko - python generators - extCommit2015   kharchenko - python generators - ext
Commit2015 kharchenko - python generators - ext
Go Web Development
Go Web DevelopmentGo Web Development
Go Web Development
PyCon 2010 SQLAlchemy tutorial
PyCon 2010 SQLAlchemy tutorialPyCon 2010 SQLAlchemy tutorial
PyCon 2010 SQLAlchemy tutorial
Metaprogramovanie #1
Metaprogramovanie #1Metaprogramovanie #1
Metaprogramovanie #1

Recently uploaded

A presentation that explain the Power BI Licensing
A presentation that explain the Power BI LicensingA presentation that explain the Power BI Licensing
A presentation that explain the Power BI Licensing
Global Situational Awareness of A.I. and where its headed
Global Situational Awareness of A.I. and where its headedGlobal Situational Awareness of A.I. and where its headed
Global Situational Awareness of A.I. and where its headed
vikram sood
Analysis insight about a Flyball dog competition team's performance
Analysis insight about a Flyball dog competition team's performanceAnalysis insight about a Flyball dog competition team's performance
Analysis insight about a Flyball dog competition team's performance
Learn SQL from basic queries to Advance queries
Learn SQL from basic queries to Advance queriesLearn SQL from basic queries to Advance queries
Learn SQL from basic queries to Advance queries
University of New South Wales degree offer diploma Transcript
University of New South Wales degree offer diploma TranscriptUniversity of New South Wales degree offer diploma Transcript
University of New South Wales degree offer diploma Transcript
4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...
4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...
4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...
Social Samosa
Challenges of Nation Building-1.pptx with more important
Challenges of Nation Building-1.pptx with more importantChallenges of Nation Building-1.pptx with more important
Challenges of Nation Building-1.pptx with more important
Udemy_2024_Global_Learning_Skills_Trends_Report (1).pdf
Udemy_2024_Global_Learning_Skills_Trends_Report (1).pdfUdemy_2024_Global_Learning_Skills_Trends_Report (1).pdf
Udemy_2024_Global_Learning_Skills_Trends_Report (1).pdf
Fernanda Palhano
The Building Blocks of QuestDB, a Time Series Database
The Building Blocks of QuestDB, a Time Series DatabaseThe Building Blocks of QuestDB, a Time Series Database
The Building Blocks of QuestDB, a Time Series Database
javier ramirez
06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM
06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM
06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM
Timothy Spann
ViewShift: Hassle-free Dynamic Policy Enforcement for Every Data Lake
ViewShift: Hassle-free Dynamic Policy Enforcement for Every Data LakeViewShift: Hassle-free Dynamic Policy Enforcement for Every Data Lake
ViewShift: Hassle-free Dynamic Policy Enforcement for Every Data Lake
Walaa Eldin Moustafa
Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...
Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...
Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...
STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...
STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...
STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...
sameer shah
Intelligence supported media monitoring in veterinary medicine
Intelligence supported media monitoring in veterinary medicineIntelligence supported media monitoring in veterinary medicine
Intelligence supported media monitoring in veterinary medicine
The Ipsos - AI - Monitor 2024 Report.pdf
The  Ipsos - AI - Monitor 2024 Report.pdfThe  Ipsos - AI - Monitor 2024 Report.pdf
The Ipsos - AI - Monitor 2024 Report.pdf
Social Samosa

Recently uploaded (20)

A presentation that explain the Power BI Licensing
A presentation that explain the Power BI LicensingA presentation that explain the Power BI Licensing
A presentation that explain the Power BI Licensing
Global Situational Awareness of A.I. and where its headed
Global Situational Awareness of A.I. and where its headedGlobal Situational Awareness of A.I. and where its headed
Global Situational Awareness of A.I. and where its headed
Analysis insight about a Flyball dog competition team's performance
Analysis insight about a Flyball dog competition team's performanceAnalysis insight about a Flyball dog competition team's performance
Analysis insight about a Flyball dog competition team's performance
Learn SQL from basic queries to Advance queries
Learn SQL from basic queries to Advance queriesLearn SQL from basic queries to Advance queries
Learn SQL from basic queries to Advance queries
University of New South Wales degree offer diploma Transcript
University of New South Wales degree offer diploma TranscriptUniversity of New South Wales degree offer diploma Transcript
University of New South Wales degree offer diploma Transcript
4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...
4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...
4th Modern Marketing Reckoner by MMA Global India & Group M: 60+ experts on W...
Challenges of Nation Building-1.pptx with more important
Challenges of Nation Building-1.pptx with more importantChallenges of Nation Building-1.pptx with more important
Challenges of Nation Building-1.pptx with more important
Udemy_2024_Global_Learning_Skills_Trends_Report (1).pdf
Udemy_2024_Global_Learning_Skills_Trends_Report (1).pdfUdemy_2024_Global_Learning_Skills_Trends_Report (1).pdf
Udemy_2024_Global_Learning_Skills_Trends_Report (1).pdf
The Building Blocks of QuestDB, a Time Series Database
The Building Blocks of QuestDB, a Time Series DatabaseThe Building Blocks of QuestDB, a Time Series Database
The Building Blocks of QuestDB, a Time Series Database
06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM
06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM
06-12-2024-BudapestDataForum-BuildingReal-timePipelineswithFLaNK AIM
ViewShift: Hassle-free Dynamic Policy Enforcement for Every Data Lake
ViewShift: Hassle-free Dynamic Policy Enforcement for Every Data LakeViewShift: Hassle-free Dynamic Policy Enforcement for Every Data Lake
ViewShift: Hassle-free Dynamic Policy Enforcement for Every Data Lake
Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...
Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...
Beyond the Basics of A/B Tests: Highly Innovative Experimentation Tactics You...
STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...
STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...
STATATHON: Unleashing the Power of Statistics in a 48-Hour Knowledge Extravag...
Intelligence supported media monitoring in veterinary medicine
Intelligence supported media monitoring in veterinary medicineIntelligence supported media monitoring in veterinary medicine
Intelligence supported media monitoring in veterinary medicine
The Ipsos - AI - Monitor 2024 Report.pdf
The  Ipsos - AI - Monitor 2024 Report.pdfThe  Ipsos - AI - Monitor 2024 Report.pdf
The Ipsos - AI - Monitor 2024 Report.pdf

Apache Airflow

  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 11.
  • 12.
  • 24. Q&A
  • 25.
  • 26.
  • 27.
  • 28.
  • 29. def scheduler(args): print(settings.HEADER) job = jobs.SchedulerJob( dag_id=args.dag_id, subdir=process_subdir(args.subdir), run_duration=args.run_duration, num_runs=args.num_runs, do_pickle=args.do_pickle) if args.daemon: pid, stdout, stderr, log_file = setup_locations("scheduler",, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) signal.signal(signal.SIGQUIT, sigquit_handler) class SchedulerJob(BaseJob): """ This SchedulerJob runs for a specific time interval and schedules the jobs that are ready to run. It figures out the latest runs for each task and sees if the dependencies for the next schedules are met. If so, it creates appropriate TaskInstances and sends run commands to the executor. It does this for each task in each DAG and repeats. """ def run(self): Stats.incr(self.__class__.__name__.lower() + '_start', 1, 1) # Adding an entry in the DB with create_session() as session: self.state = State.RUNNING session.add(self) session.commit() id_ = make_transient(self) = id_ try: self._execute() # In case of max runs or max duration self.state = State.SUCCESS except SystemExit as e: # In case of ^C or SIGTERM self.state = State.SUCCESS except Exception as e: self.state = State.FAILED raise finally: self.end_date = timezone.utcnow() session.merge(self) session.commit() Stats.incr(self.__class__.__name__.lower() + '_end', 1, 1) def _execute(self):"Starting the scheduler") # DAGs can be pickled for easier remote execution by some executors pickle_dags = False if self.do_pickle and self.executor.__class__ not in (executors.LocalExecutor, executors.SequentialExecutor): pickle_dags = True # Use multiple processes to parse and generate tasks for the # DAGs in parallel. By processing them in separate processes, # we can get parallelism and isolation from potentially harmful # user code."Processing files using up to %s processes at a time", self.max_threads)"Running execute loop for %s seconds", self.run_duration)"Processing each file at most %s times", self.num_runs)"Process each file at most once every %s seconds", self.file_process_interval)"Wait until at least %s seconds have passed between file parsing " "loops", self.min_file_parsing_loop_time)"Checking for new files in %s every %s seconds", self.subdir, self.dag_dir_list_interval) # Build up a list of Python files that could contain DAGs"Searching for files in %s", self.subdir) known_file_paths = list_py_file_paths(self.subdir)"There are %s files in %s", len(known_file_paths), self.subdir) def processor_factory(file_path): return DagFileProcessor(file_path, pickle_dags, self.dag_ids) processor_manager = DagFileProcessorManager(self.subdir, known_file_paths, self.max_threads, self.file_process_interval, self.min_file_parsing_loop_time, self.num_runs, processor_factory) try: self._execute_helper(processor_manager) finally:"Exited execute loop") # Kill all child processes on exit since we don't want to leave # them as orphaned. # 후략
  • 30. # For the execute duration, parse and schedule DAGs while (timezone.utcnow() - execute_start_time).total_seconds() < self.run_duration or self.run_duration < 0: self.log.debug("Starting Loop...") loop_start_time = time.time() # Traverse the DAG directory for Python files containing DAGs # periodically elapsed_time_since_refresh = (timezone.utcnow() - last_dag_dir_refresh_time).total_seconds() # 중략 # Kick of new processes and collect results from finished ones self.log.debug("Heartbeating the process manager") simple_dags = processor_manager.heartbeat() # Send tasks for execution if available simple_dag_bag = SimpleDagBag(simple_dags) if len(simple_dags) > 0: # Handle cases where a DAG run state is set (perhaps manually) to # a non-running state. Handle task instances that belong to # DAG runs in those states # If a task instance is up for retry but the corresponding DAG run # isn't running, mark the task instance as FAILED so we don't try # to re-run it. self._change_state_for_tis_without_dagrun(simple_dag_bag, [State.UP_FOR_RETRY], State.FAILED) # If a task instance is scheduled or queued, but the corresponding # DAG run isn't running, set the state to NONE so we don't try to # re-run it. self._change_state_for_tis_without_dagrun(simple_dag_bag, [State.QUEUED, State.SCHEDULED], State.NONE) self._execute_task_instances(simple_dag_bag, (State.SCHEDULED,)) # Call heartbeats self.log.debug("Heartbeating the executor") self.executor.heartbeat() # Process events from the executor self._process_executor_events(simple_dag_bag) # Heartbeat the scheduler periodically time_since_last_heartbeat = (timezone.utcnow() - last_self_heartbeat_time).total_seconds() if time_since_last_heartbeat > self.heartrate: self.log.debug("Heartbeating the scheduler") self.heartbeat() last_self_heartbeat_time = timezone.utcnow() # Occasionally print out stats about how fast the files are getting processed if ((timezone.utcnow() - last_stat_print_time).total_seconds() > self.print_stats_interval): if len(known_file_paths) > 0: self._log_file_processing_stats(known_file_paths, processor_manager) last_stat_print_time = timezone.utcnow() loop_end_time = time.time() self.log.debug("Ran scheduling loop in %.2f seconds", loop_end_time - loop_start_time) # Exit early for a test mode if processor_manager.max_runs_reached():"Exiting loop as all files have been processed %s times", self.num_runs) break def _execute_helper(self, processor_manager): """ :param processor_manager: manager to use :type processor_manager: DagFileProcessorManager :return: None """ self.executor.start() # 중략 # For the execute duration, parse and schedule DAGs while (timezone.utcnow() - execute_start_time).total_seconds() < self.run_duration or self.run_duration < 0: self.log.debug("Starting Loop...") loop_start_time = time.time() # 스케줄러 프로세스 실행 동안 반복되는 루프 # Stop any processors processor_manager.terminate() # Verify that all files were processed, and if so, deactivate DAGs that # haven't been touched by the scheduler as they likely have been # deleted. all_files_processed = True for file_path in known_file_paths: if processor_manager.get_last_finish_time(file_path) is None: all_files_processed = False break if all_files_processed: "Deactivating DAGs that haven't been touched since %s", execute_start_time.isoformat() ) models.DAG.deactivate_stale_dags(execute_start_time) self.executor.end() settings.Session.remove()
  • 31. def heartbeat(self): """ This should be periodically called by the scheduler. This method will kick off new processes to process DAG definition files and read the results from the finished processors. :return: a list of SimpleDags that were produced by processors that have finished since the last time this was called :rtype: list[SimpleDag] """ finished_processors = {} """:type : dict[unicode, AbstractDagFileProcessor]""" running_processors = {} """:type : dict[unicode, AbstractDagFileProcessor]""" for file_path, processor in self._processors.items(): if processor.done:"Processor for %s finished", file_path) now = timezone.utcnow() finished_processors[file_path] = processor self._last_runtime[file_path] = (now - processor.start_time).total_seconds() self._last_finish_time[file_path] = now self._run_count[file_path] += 1 else: running_processors[file_path] = processor self._processors = running_processors self.log.debug("%s/%s scheduler processes running", len(self._processors), self._parallelism) self.log.debug("%s file paths queued for processing", len(self._file_path_queue)) # Collect all the DAGs that were found in the processed files simple_dags = [] for file_path, processor in finished_processors.items(): if processor.result is None: self.log.warning( "Processor for %s exited with return code %s.", processor.file_path, processor.exit_code ) else: for simple_dag in processor.result: simple_dags.append(simple_dag) # 중략 # Start more processors if we have enough slots and files to process while (self._parallelism - len(self._processors) > 0 and len(self._file_path_queue) > 0): file_path = self._file_path_queue.pop(0) processor = self._processor_factory(file_path) processor.start() "Started a process (PID: %s) to generate tasks for %s",, file_path ) self._processors[file_path] = processor # Update scheduler heartbeat count. self._run_count[self._heart_beat_key] += 1 return simple_dags
  • 32. def helper(): # This helper runs in the newly created process log = logging.getLogger("airflow.processor") stdout = StreamLogWriter(log, logging.INFO) stderr = StreamLogWriter(log, logging.WARN) set_context(log, file_path) try: # redirect stdout/stderr to log sys.stdout = stdout sys.stderr = stderr # Re-configure the ORM engine as there are issues with multiple processes settings.configure_orm() # Change the thread name to differentiate log lines. This is # really a separate process, but changing the name of the # process doesn't work, so changing the thread name instead. threading.current_thread().name = thread_name start_time = time.time()"Started process (PID=%s) to work on %s", os.getpid(), file_path) scheduler_job = SchedulerJob(dag_ids=dag_id_white_list, log=log) result = scheduler_job.process_file(file_path, pickle_dags) result_queue.put(result) end_time = time.time() "Processing %s took %.3f seconds", file_path, end_time - start_time ) except: # Log exceptions through the logging framework. log.exception("Got an exception! Propagating...") raise finally: sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ # We re-initialized the ORM within this Process above so we need to # tear it down manually here settings.dispose_orm() def _launch_process(result_queue, file_path, pickle_dags, dag_id_white_list, thread_name): """ Launch a process to process the given file. :param result_queue: the queue to use for passing back the result :type result_queue: multiprocessing.Queue :param file_path: the file to process :type file_path: unicode :param pickle_dags: whether to pickle the DAGs found in the file and save them to the DB :type pickle_dags: bool :param dag_id_white_list: if specified, only examine DAG ID's that are in this list :type dag_id_white_list: list[unicode] :param thread_name: the name to use for the process that is launched :type thread_name: unicode :return: the process that was launched :rtype: multiprocessing.Process """ p = multiprocessing.Process(target=helper, args=(), name="{}-Process".format(thread_name)) p.start() return p def start(self): """ Launch the process and start processing the DAG. """ self._process = DagFileProcessor._launch_process( self._result_queue, self.file_path, self._pickle_dags, self._dag_id_white_list, "DagFileProcessor{}".format(self._instance_id)) self._start_time = timezone.utcnow()
  • 33. def _process_dags(self, dagbag, dags, tis_out): """ Iterates over the dags and processes them. Processing includes: 1. Create appropriate DagRun(s) in the DB. 2. Create appropriate TaskInstance(s) in the DB. 3. Send emails for tasks that have missed SLAs. :param dagbag: a collection of DAGs to process :type dagbag: models.DagBag :param dags: the DAGs from the DagBag to process :type dags: DAG :param tis_out: A queue to add generated TaskInstance objects :type tis_out: multiprocessing.Queue[TaskInstance] :return: None """ for dag in dags: dag = dagbag.get_dag(dag.dag_id) if dag.is_paused:"Not processing DAG %s since it's paused", dag.dag_id) continue if not dag: self.log.error("DAG ID %s was not found in the DagBag", dag.dag_id) continue"Processing %s", dag.dag_id) dag_run = self.create_dag_run(dag) if dag_run:"Created %s", dag_run) self._process_task_instances(dag, tis_out) self.manage_slas(dag) models.DagStat.update([d.dag_id for d in dags]) def process_file(self, file_path, pickle_dags=False, session=None): """ Process a Python file containing Airflow DAGs. This includes: 1. Execute the file and look for DAG objects in the namespace. 2. Pickle the DAG and save it to the DB (if necessary). 3. For each DAG, see what tasks should run and create appropriate task instances in the DB. 4. Record any errors importing the file into ORM 5. Kill (in ORM) any task instances belonging to the DAGs that haven't issued a heartbeat in a while. Returns a list of SimpleDag objects that represent the DAGs found in the file :param file_path: the path to the Python file that should be executed :type file_path: unicode :param pickle_dags: whether serialize the DAGs found in the file and save them to the db :type pickle_dags: bool :return: a list of SimpleDags made from the Dags found in the file :rtype: list[SimpleDag] """"Processing file %s for tasks to queue", file_path) # As DAGs are parsed from this file, they will be converted into SimpleDags simple_dags = [] try: dagbag = models.DagBag(file_path) except Exception: self.log.exception("Failed at reloading the DAG file %s", file_path) Stats.incr('dag_file_refresh_error', 1, 1) return [] # 중략 self._process_dags(dagbag, dags, ti_keys_to_schedule) def _process_task_instances(self, dag, queue, session=None): """ This method schedules the tasks for a single DAG by looking at the active DAG runs and adding task instances that should run to the queue. """ # 중략 for run in active_dag_runs: self.log.debug("Examining active DAG run: %s", run) # this needs a fresh session sometimes tis get detached tis = run.get_task_instances(state=(State.NONE, State.UP_FOR_RETRY)) # this loop is quite slow as it uses are_dependencies_met for # every task (in ti.is_runnable). This is also called in # update_state above which has already checked these tasks for ti in tis: task = dag.get_task(ti.task_id) # fixme: ti.task is transient but needs to be set ti.task = task # future: remove adhoc if task.adhoc: continue if ti.are_dependencies_met( dep_context=DepContext(flag_upstream_failed=True), session=session): self.log.debug('Queuing task: %s', ti) queue.append(ti.key)
  • 34. def process_file(self, file_path, pickle_dags=False, session=None): """ Process a Python file containing Airflow DAGs. This includes: 1. Execute the file and look for DAG objects in the namespace. 2. Pickle the DAG and save it to the DB (if necessary). 3. For each DAG, see what tasks should run and create appropriate task instances in the DB. 4. Record any errors importing the file into ORM 5. Kill (in ORM) any task instances belonging to the DAGs that haven't issued a heartbeat in a while. Returns a list of SimpleDag objects that represent the DAGs found in the file :param file_path: the path to the Python file that should be executed :type file_path: unicode :param pickle_dags: whether serialize the DAGs found in the file and save them to the db :type pickle_dags: bool :return: a list of SimpleDags made from the Dags found in the file :rtype: list[SimpleDag] """"Processing file %s for tasks to queue", file_path) # As DAGs are parsed from this file, they will be converted into SimpleDags simple_dags = [] try: dagbag = models.DagBag(file_path) except Exception: self.log.exception("Failed at reloading the DAG file %s", file_path) Stats.incr('dag_file_refresh_error', 1, 1) return [] # 중략 self._process_dags(dagbag, dags, ti_keys_to_schedule) def process_file(self, file_path, pickle_dags=False, session=None): # 중략 ti_keys_to_schedule = [] self._process_dags(dagbag, dags, ti_keys_to_schedule) for ti_key in ti_keys_to_schedule: dag = dagbag.dags[ti_key[0]] task = dag.get_task(ti_key[1]) ti = models.TaskInstance(task, ti_key[2]) ti.refresh_from_db(session=session, lock_for_update=True) # We can defer checking the task dependency checks to the worker themselves # since they can be expensive to run in the scheduler. dep_context = DepContext(deps=QUEUE_DEPS, ignore_task_deps=True) # Only schedule tasks that have their dependencies met, e.g. to avoid # a task that recently got its state changed to RUNNING from somewhere # other than the scheduler from getting its state overwritten. # TODO(aoen): It's not great that we have to check all the task instance # dependencies twice; once to get the task scheduled, and again to actually # run the task. We should try to come up with a way to only check them once. if ti.are_dependencies_met( dep_context=dep_context, session=session, verbose=True): # Task starts out in the scheduled state. All tasks in the # scheduled state will be sent to the executor ti.state = State.SCHEDULED # Also save this task instance to the DB."Creating / updating %s in ORM", ti) session.merge(ti) # commit batch session.commit() # 중략 return simple_dags def _find_executable_task_instances(self, simple_dag_bag, states, session=None): # 중략 states_to_count_as_running = [State.RUNNING] executable_tis = [] # Get all the queued task instances from associated with scheduled # DagRuns which are not backfilled, in the given states, # and the dag is not paused TI = models.TaskInstance DR = models.DagRun DM = models.DagModel ti_query = ( session .query(TI) .filter(TI.dag_id.in_(simple_dag_bag.dag_ids)) .outerjoin(DR, and_(DR.dag_id == TI.dag_id, DR.execution_date == TI.execution_date)) .filter(or_(DR.run_id == None, not_( + '%')))) .outerjoin(DM, DM.dag_id==TI.dag_id) .filter(or_(DM.dag_id == None, not_(DM.is_paused))) ) if None in states: ti_query = ti_query.filter(or_(TI.state == None, TI.state.in_(states))) else: ti_query = ti_query.filter(TI.state.in_(states)) task_instances_to_examine = ti_query.all() # 중략 # Get the pool settings pools = {p.pool: p for p in session.query(models.Pool).all()} pool_to_task_instances = defaultdict(list) for task_instance in task_instances_to_examine: pool_to_task_instances[task_instance.pool].append(task_instance) # 중략 # Go through each pool, and queue up a task for execution if there are # any open slots in the pool. for pool, task_instances in pool_to_task_instances.items(): # 중략 priority_sorted_task_instances = sorted( task_instances, key=lambda ti: (-ti.priority_weight, ti.execution_date)) # DAG IDs with running tasks that equal the concurrency limit of the dag dag_id_to_possibly_running_task_count = {} for task_instance in priority_sorted_task_instances: if open_slots <= 0: "Not scheduling since there are %s open slots in pool %s", open_slots, pool ) # Can't schedule any more since there are no more open slots. break # Check to make sure that the task concurrency of the DAG hasn't been # reached. # 중략 task_instance_str = "nt".join( ["{}".format(x) for x in executable_tis])"Setting the follow tasks to queued state:nt%s", task_instance_str) # so these dont expire on commit for ti in executable_tis: copy_dag_id = ti.dag_id copy_execution_date = ti.execution_date copy_task_id = ti.task_id make_transient(ti) ti.dag_id = copy_dag_id ti.execution_date = copy_execution_date ti.task_id = copy_task_id return executable_tis _execute_task_instances
  • 35. class LocalExecutor(BaseExecutor): """ LocalExecutor executes tasks locally in parallel. It uses the multiprocessing Python library and queues to parallelize the execution of tasks. """ def start(self): self.result_queue = multiprocessing.Queue() self.queue = None self.workers = [] self.workers_used = 0 self.workers_active = 0 self.impl = (LocalExecutor._UnlimitedParallelism(self) if self.parallelism == 0 else LocalExecutor._LimitedParallelism(self)) self.impl.start() def __init__( self, executor=executors.GetDefaultExecutor(), heartrate=conf.getfloat('scheduler', 'JOB_HEARTBEAT_SEC'), *args, **kwargs): self.hostname = get_hostname() self.executor = executor self.executor_class = executor.__class__.__name__ self.start_date = timezone.utcnow() self.latest_heartbeat = timezone.utcnow() self.heartrate = heartrate self.unixname = getpass.getuser() self.max_tis_per_query = conf.getint('scheduler', 'max_tis_per_query') super(BaseJob, self).__init__(*args, **kwargs) class BaseJob(Base, LoggingMixin): """ Abstract class to be derived for jobs. Jobs are processing items with state and duration that aren't task instances. For instance a BackfillJob is a collection of task instance runs, but should have its own state, start and end time. """ def GetDefaultExecutor(): """Creates a new instance of the configured executor if none exists and returns it""" global DEFAULT_EXECUTOR if DEFAULT_EXECUTOR is not None: return DEFAULT_EXECUTOR executor_name = configuration.conf.get('core', 'EXECUTOR') DEFAULT_EXECUTOR = _get_executor(executor_name) log = LoggingMixin().log"Using executor %s", executor_name) return DEFAULT_EXECUTOR def _get_executor(executor_name): """ Creates a new instance of the named executor. In case the executor name is not know in airflow, look for it in the plugins """ if executor_name == Executors.LocalExecutor: return LocalExecutor() elif executor_name == Executors.SequentialExecutor: return SequentialExecutor() elif executor_name == Executors.CeleryExecutor: from airflow.executors.celery_executor import CeleryExecutor return CeleryExecutor() elif executor_name == Executors.DaskExecutor: from airflow.executors.dask_executor import DaskExecutor return DaskExecutor() elif executor_name == Executors.MesosExecutor: from airflow.contrib.executors.mesos_executor import MesosExecutor return MesosExecutor() elif executor_name == Executors.KubernetesExecutor: from airflow.contrib.executors.kubernetes_executor import KubernetesExecutor return KubernetesExecutor() else: # Loading plugins _integrate_plugins() executor_path = executor_name.split('.') if len(executor_path) != 2: raise AirflowException( "Executor {0} not supported: " "please specify in format plugin_module.executor".format(executor_name)) if executor_path[0] in globals(): return globals()[executor_path[0]].__dict__[executor_path[1]]() else: raise AirflowException("Executor {0} not supported.".format(executor_name)) def _execute_helper(self, processor_manager): """ :param processor_manager: manager to use :type processor_manager: DagFileProcessorManager :return: None """ self.executor.start() # 후략 class SchedulerJob(BaseJob): """ This SchedulerJob runs for a specific time interval and schedules the jobs that are ready to run. It figures out the latest runs for each task and sees if the dependencies for the next schedules are met. If so, it creates appropriate TaskInstances and sends run commands to the executor. It does this for each task in each DAG and repeats. """
  • 36. def _enqueue_task_instances_with_queued_state(self, simple_dag_bag, task_instances): """ Takes task_instances, which should have been set to queued, and enqueues them with the executor. :param task_instances: TaskInstances to enqueue :type task_instances: List[TaskInstance] :param simple_dag_bag: Should contains all of the task_instances' dags :type simple_dag_bag: SimpleDagBag """ TI = models.TaskInstance # actually enqueue them for task_instance in task_instances: simple_dag = simple_dag_bag.get_dag(task_instance.dag_id) command = " ".join(TI.generate_command( task_instance.dag_id, task_instance.task_id, task_instance.execution_date, local=True, mark_success=False, ignore_all_deps=False, ignore_depends_on_past=False, ignore_task_deps=False, ignore_ti_state=False, pool=task_instance.pool, file_path=simple_dag.full_filepath, pickle_id=simple_dag.pickle_id)) priority = task_instance.priority_weight queue = task_instance.queue "Sending %s to executor with priority %s and queue %s", task_instance.key, priority, queue ) # save attributes so sqlalchemy doesnt expire them copy_dag_id = task_instance.dag_id copy_task_id = task_instance.task_id copy_execution_date = task_instance.execution_date make_transient(task_instance) task_instance.dag_id = copy_dag_id task_instance.task_id = copy_task_id task_instance.execution_date = copy_execution_date self.executor.queue_command( task_instance, command, priority=priority, queue=queue) def _execute_task_instances(self, simple_dag_bag, states, session=None): """ Attempts to execute TaskInstances that should be executed by the scheduler. There are three steps: 1. Pick TIs by priority with the constraint that they are in the expected states and that we do exceed max_active_runs or pool limits. 2. Change the state for the TIs above atomically. 3. Enqueue the TIs in the executor. :param simple_dag_bag: TaskInstances associated with DAGs in the simple_dag_bag will be fetched from the DB and executed :type simple_dag_bag: SimpleDagBag :param states: Execute TaskInstances in these states :type states: Tuple[State] :return: None """ executable_tis = self._find_executable_task_instances(simple_dag_bag, states, session=session) def query(result, items): tis_with_state_changed = self._change_state_for_executable_task_instances( items, states, session=session) self._enqueue_task_instances_with_queued_state( simple_dag_bag, tis_with_state_changed) session.commit() return result + len(tis_with_state_changed) return helpers.reduce_in_chunks(query, executable_tis, 0, self.max_tis_per_query) def _execute_helper(self, processor_manager): """ :param processor_manager: manager to use :type processor_manager: DagFileProcessorManager :return: None """ self.executor.start() # 중략 # For the execute duration, parse and schedule DAGs while (timezone.utcnow() - execute_start_time).total_seconds() < self.run_duration or self.run_duration < 0: self.log.debug("Starting Loop...") loop_start_time = time.time() # 중략 # Kick of new processes and collect results from finished ones self.log.debug("Heartbeating the process manager") simple_dags = processor_manager.heartbeat() # Send tasks for execution if available simple_dag_bag = SimpleDagBag(simple_dags) if len(simple_dags) > 0: # 중략 self._execute_task_instances(simple_dag_bag, (State.SCHEDULED,)) # Call heartbeats self.log.debug("Heartbeating the executor") self.executor.heartbeat() # 중략 # Exit early for a test mode if processor_manager.max_runs_reached():"Exiting loop as all files have been processed %s times", self.num_runs) break # 후략 def queue_command(self, task_instance, command, priority=1, queue=None): key = task_instance.key if key not in self.queued_tasks and key not in self.running:"Adding to queue: %s", command) self.queued_tasks[key] = (command, priority, queue, task_instance) else:"could not queue task {}".format(key))
  • 37. def heartbeat(self): # Triggering new jobs if not self.parallelism: open_slots = len(self.queued_tasks) else: open_slots = self.parallelism - len(self.running) self.log.debug("%s running task instances", len(self.running)) self.log.debug("%s in queue", len(self.queued_tasks)) self.log.debug("%s open slots", open_slots) sorted_queue = sorted( [(k, v) for k, v in self.queued_tasks.items()], key=lambda x: x[1][1], reverse=True) for i in range(min((open_slots, len(self.queued_tasks)))): key, (command, _, queue, ti) = sorted_queue.pop(0) # TODO(jlowin) without a way to know what Job ran which tasks, # there is a danger that another Job started running a task # that was also queued to this executor. This is the last chance # to check if that happened. The most probable way is that a # Scheduler tried to run a task that was originally queued by a # Backfill. This fix reduces the probability of a collision but # does NOT eliminate it. self.queued_tasks.pop(key) ti.refresh_from_db() if ti.state != State.RUNNING: self.running[key] = command self.execute_async(key=key, command=command, queue=queue, executor_config=ti.executor_config) else: 'Task is already running, not sending to ' 'executor: {}'.format(key)) # Calling child class sync method self.log.debug("Calling the %s sync method", self.__class__) self.sync() def execute_async(self, key, command): """ :param key: the key to identify the TI :type key: Tuple(dag_id, task_id, execution_date) :param command: the command to execute :type command: string """ local_worker = LocalWorker(self.executor.result_queue) local_worker.key = key local_worker.command = command self.executor.workers_used += 1 self.executor.workers_active += 1 local_worker.start() class LocalWorker(multiprocessing.Process, LoggingMixin): """LocalWorker Process implementation to run airflow commands. Executes the given command and puts the result into a result queue when done, terminating execution.""" def __init__(self, result_queue): """ :param result_queue: the queue to store result states tuples (key, State) :type result_queue: multiprocessing.Queue """ super(LocalWorker, self).__init__() self.daemon = True self.result_queue = result_queue self.key = None self.command = None def execute_work(self, key, command): """ Executes command received and stores result state in queue. :param key: the key to identify the TI :type key: Tuple(dag_id, task_id, execution_date) :param command: the command to execute :type command: string """ if key is None: return"%s running %s", self.__class__.__name__, command) command = "exec bash -c '{0}'".format(command) try: subprocess.check_call(command, shell=True, close_fds=True) state = State.SUCCESS except subprocess.CalledProcessError as e: state = State.FAILED self.log.error("Failed to execute task %s.", str(e)) # TODO: Why is this commented out? # raise e self.result_queue.put((key, state)) def run(self): self.execute_work(self.key, self.command) time.sleep(1)
  • 38. def generate_command(dag_id, task_id, execution_date, mark_success=False, ignore_all_deps=False, ignore_depends_on_past=False, ignore_task_deps=False, ignore_ti_state=False, local=False, pickle_id=None, file_path=None, raw=False, job_id=None, pool=None, cfg_path=None ): """ Generates the shell command required to execute this task instance. 중략 """ iso = execution_date.isoformat() cmd = ["airflow", "run", str(dag_id), str(task_id), str(iso)] cmd.extend(["--mark_success"]) if mark_success else None cmd.extend(["--pickle", str(pickle_id)]) if pickle_id else None cmd.extend(["--job_id", str(job_id)]) if job_id else None cmd.extend(["-A"]) if ignore_all_deps else None cmd.extend(["-i"]) if ignore_task_deps else None cmd.extend(["-I"]) if ignore_depends_on_past else None cmd.extend(["--force"]) if ignore_ti_state else None cmd.extend(["--local"]) if local else None cmd.extend(["--pool", pool]) if pool else None cmd.extend(["--raw"]) if raw else None cmd.extend(["-sd", file_path]) if file_path else None cmd.extend(["--cfg_path", cfg_path]) if cfg_path else None return cmd def run(args, dag=None): # 중략 task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() ti.init_run_context(raw=args.raw) hostname = get_hostname()"Running %s on host %s", ti, hostname) if args.interactive: _run(args, dag, ti) else: with redirect_stdout(ti.log, logging.INFO), redirect_stderr(ti.log, logging.WARN): _run(args, dag, ti) logging.shutdown() def _run(args, dag, ti): if args.local: run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, pickle_id=args.pickle, ignore_all_deps=args.ignore_all_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, ignore_task_deps=args.ignore_dependencies, ignore_ti_state=args.force, pool=args.pool) elif args.raw: ti._run_raw_task( mark_success=args.mark_success, job_id=args.job_id, pool=args.pool, ) else: # 후략 def _execute(self): self.task_runner = get_task_runner(self) # 중략 if not self.task_instance._check_and_change_state_before_execution( mark_success=self.mark_success, ignore_all_deps=self.ignore_all_deps, ignore_depends_on_past=self.ignore_depends_on_past, ignore_task_deps=self.ignore_task_deps, ignore_ti_state=self.ignore_ti_state,, pool=self.pool):"Task is not able to be run") return try: self.task_runner.start() last_heartbeat_time = time.time() heartbeat_time_limit = conf.getint('scheduler', 'scheduler_zombie_task_threshold') while True: # Monitor the task to see if it's done return_code = self.task_runner.return_code() if return_code is not None:"Task exited with return code %s", return_code) return # 중략 finally: self.on_kill() class BashTaskRunner(BaseTaskRunner): """ Runs the raw Airflow task by invoking through the Bash shell. """ def __init__(self, local_task_job): super(BashTaskRunner, self).__init__(local_task_job) def start(self): self.process = self.run_command(['bash', '-c'], join_args=True) def return_code(self): return self.process.poll()