cancel(projectId, region, jobId, body, x__xgafv=None)
Starts a job cancellation request. To access the job resource after cancellation, call [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get).
delete(projectId, region, jobId, x__xgafv=None)
Deletes the job from the project. If the job is active, the delete fails, and the response returns `FAILED_PRECONDITION`.
get(projectId, region, jobId, x__xgafv=None)
Gets the resource representation for a job in a project.
Lists regions/{region}/jobs in a project.
list_next(previous_request, previous_response)
Retrieves the next page of results.
submit(projectId, region, body, x__xgafv=None)
Submits a job to a cluster.
cancel(projectId, region, jobId, body, x__xgafv=None)
Starts a job cancellation request. To access the job resource after cancellation, call [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get). Args: projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required) region: string, [Required] The Cloud Dataproc region in which to handle the request. (required) jobId: string, [Required] The job ID. (required) body: object, The request body. (required) The object takes the form of: { # A request to cancel a job. } x__xgafv: string, V1 error format. Returns: An object of the form: { # A Cloud Dataproc job resource. "status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job. "jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' "properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code. "a_key": "A String", }, }, "statusHistory": [ # [Output-only] The previous job status. { # Cloud Dataproc job status. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, ], "placement": { # Cloud Dataproc job config. # [Required] Job information, including how, when, and where to run the job. "clusterName": "A String", # [Required] The name of the cluster where the job will be submitted. "clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Cloud Dataproc service when the job is submitted. }, "reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id. "projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to. "jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters. }, "sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job. "queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. "a_key": "A String", }, }, "pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job. "queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code. "a_key": "A String", }, }, "driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program. "driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`. "sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job. "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file that contains the main class. "properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job. "mainPythonFileUri": "A String", # [Required] The HCFS URI of the main Python file to use as the driver. Must be a .py file. "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip. "A String", ], "properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job. "queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`). "a_key": "A String", }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code. "a_key": "A String", }, }, }
delete(projectId, region, jobId, x__xgafv=None)
Deletes the job from the project. If the job is active, the delete fails, and the response returns `FAILED_PRECONDITION`. Args: projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required) region: string, [Required] The Cloud Dataproc region in which to handle the request. (required) jobId: string, [Required] The job ID. (required) x__xgafv: string, V1 error format. Returns: An object of the form: { # A generic empty message that you can re-use to avoid defining duplicated empty messages in your APIs. A typical example is to use it as the request or the response type of an API method. For instance: service Foo { rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); } The JSON representation for `Empty` is empty JSON object `{}`. }
get(projectId, region, jobId, x__xgafv=None)
Gets the resource representation for a job in a project. Args: projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required) region: string, [Required] The Cloud Dataproc region in which to handle the request. (required) jobId: string, [Required] The job ID. (required) x__xgafv: string, V1 error format. Returns: An object of the form: { # A Cloud Dataproc job resource. "status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job. "jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' "properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code. "a_key": "A String", }, }, "statusHistory": [ # [Output-only] The previous job status. { # Cloud Dataproc job status. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, ], "placement": { # Cloud Dataproc job config. # [Required] Job information, including how, when, and where to run the job. "clusterName": "A String", # [Required] The name of the cluster where the job will be submitted. "clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Cloud Dataproc service when the job is submitted. }, "reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id. "projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to. "jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters. }, "sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job. "queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. "a_key": "A String", }, }, "pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job. "queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code. "a_key": "A String", }, }, "driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program. "driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`. "sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job. "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file that contains the main class. "properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job. "mainPythonFileUri": "A String", # [Required] The HCFS URI of the main Python file to use as the driver. Must be a .py file. "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip. "A String", ], "properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job. "queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`). "a_key": "A String", }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code. "a_key": "A String", }, }, }
list(projectId, region, pageSize=None, x__xgafv=None, jobStateMatcher=None, pageToken=None, clusterName=None)
Lists regions/{region}/jobs in a project. Args: projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required) region: string, [Required] The Cloud Dataproc region in which to handle the request. (required) pageSize: integer, [Optional] The number of results to return in each response. x__xgafv: string, V1 error format. jobStateMatcher: string, [Optional] Specifies enumerated categories of jobs to list. pageToken: string, [Optional] The page token, returned by a previous call, to request the next page of results. clusterName: string, [Optional] If set, the returned jobs list includes only jobs that were submitted to the named cluster. Returns: An object of the form: { # A list of jobs in a project. "nextPageToken": "A String", # [Optional] This token is included in the response if there are more results to fetch. To fetch additional results, provide this value as the `page_token` in a subsequent ListJobsRequest. "jobs": [ # [Output-only] Jobs list. { # A Cloud Dataproc job resource. "status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job. "jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' "properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code. "a_key": "A String", }, }, "statusHistory": [ # [Output-only] The previous job status. { # Cloud Dataproc job status. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, ], "placement": { # Cloud Dataproc job config. # [Required] Job information, including how, when, and where to run the job. "clusterName": "A String", # [Required] The name of the cluster where the job will be submitted. "clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Cloud Dataproc service when the job is submitted. }, "reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id. "projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to. "jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters. }, "sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job. "queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. "a_key": "A String", }, }, "pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job. "queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code. "a_key": "A String", }, }, "driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program. "driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`. "sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job. "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file that contains the main class. "properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job. "mainPythonFileUri": "A String", # [Required] The HCFS URI of the main Python file to use as the driver. Must be a .py file. "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip. "A String", ], "properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job. "queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`). "a_key": "A String", }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code. "a_key": "A String", }, }, }, ], }
list_next(previous_request, previous_response)
Retrieves the next page of results. Args: previous_request: The request for the previous page. (required) previous_response: The response from the request for the previous page. (required) Returns: A request object that you can call 'execute()' on to request the next page. Returns None if there are no more items in the collection.
submit(projectId, region, body, x__xgafv=None)
Submits a job to a cluster. Args: projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required) region: string, [Required] The Cloud Dataproc region in which to handle the request. (required) body: object, The request body. (required) The object takes the form of: { # A request to submit a job. "job": { # A Cloud Dataproc job resource. # [Required] The job resource. "status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job. "jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' "properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code. "a_key": "A String", }, }, "statusHistory": [ # [Output-only] The previous job status. { # Cloud Dataproc job status. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, ], "placement": { # Cloud Dataproc job config. # [Required] Job information, including how, when, and where to run the job. "clusterName": "A String", # [Required] The name of the cluster where the job will be submitted. "clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Cloud Dataproc service when the job is submitted. }, "reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id. "projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to. "jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters. }, "sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job. "queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. "a_key": "A String", }, }, "pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job. "queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code. "a_key": "A String", }, }, "driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program. "driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`. "sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job. "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file that contains the main class. "properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job. "mainPythonFileUri": "A String", # [Required] The HCFS URI of the main Python file to use as the driver. Must be a .py file. "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip. "A String", ], "properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job. "queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`). "a_key": "A String", }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code. "a_key": "A String", }, }, }, } x__xgafv: string, V1 error format. Returns: An object of the form: { # A Cloud Dataproc job resource. "status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job. "jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' "properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code. "a_key": "A String", }, }, "statusHistory": [ # [Output-only] The previous job status. { # Cloud Dataproc job status. "state": "A String", # [Required] A state message specifying the overall job state. "stateStartTime": "A String", # [Output-only] The time when this state was entered. "details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR. }, ], "placement": { # Cloud Dataproc job config. # [Required] Job information, including how, when, and where to run the job. "clusterName": "A String", # [Required] The name of the cluster where the job will be submitted. "clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Cloud Dataproc service when the job is submitted. }, "reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id. "projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to. "jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters. }, "sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job. "queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. "a_key": "A String", }, }, "pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job. "queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`). "a_key": "A String", }, "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code. "a_key": "A String", }, }, "driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program. "driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`. "sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job. "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`. "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "mainJarFileUri": "A String", # The HCFS URI of the jar file that contains the main class. "properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job. "mainPythonFileUri": "A String", # [Required] The HCFS URI of the main Python file to use as the driver. Must be a .py file. "args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission. "A String", ], "loggingConfig": { # The runtime logging config of the job. # [Optional] The runtime log config for job execution. "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' "a_key": "A String", }, }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks. "A String", ], "fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks. "A String", ], "archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip. "A String", ], "pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip. "A String", ], "properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code. "a_key": "A String", }, }, "hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job. "queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries. "scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`). "a_key": "A String", }, "jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. "A String", ], "queryList": { # A list of queries to run on a cluster. # A list of queries. "queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } } "A String", ], }, "continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries. "properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code. "a_key": "A String", }, }, }