Tuesday, 30 July 2019

py4j.Py4JException: Method abs([class java.lang.String]) does not exist


# TODO

from pyspark.sql.functions import abs
peopleWithFixedSalariesDF = peopleDF.select(abs("salary")).filter(col("salary")<0);
display(peopleWithFixedSalariesDF)


--------------------------------------------------------------------------- Py4JError Traceback (most recent call last) <command-3332536293827318> in <module>() 2 3 from pyspark.sql.functions import abs ----> 4 peopleWithFixedSalariesDF = peopleDF.select(abs("salary")).filter(col("salary")<0); 5 display(peopleWithFixedSalariesDF) /databricks/spark/python/pyspark/sql/functions.py in _(col) 42 def _(col): 43 sc = SparkContext._active_spark_context ---> 44 jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col) 45 return Column(jc) 46 _.__name__ = name /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: /databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 330 raise Py4JError( 331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n". --> 332 format(target_id, ".", name, value)) 333 else: 334 raise Py4JError( Py4JError: An error occurred while calling z:org.apache.spark.sql.functions.abs. Trace: py4j.Py4JException: Method abs([class java.lang.String]) does not exist at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:341) at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:362) at py4j.Gateway.invoke(Gateway.java:289) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:251) at java.lang.Thread.run(Thread.java:748)





Solution

Error is due to the fact "salary" is treated as a string and not as a salary column, since it's treated as a string and abs function which accepts string argument doesn't exist thus the error.

To solve use col function to ensure "salary" is treated as a column in DataFrame and not literally as a string.


thus to solve use


from pyspark.sql.functions import abs,col
peopleWithFixedSalariesDF = peopleDF.select(abs(col("salary"))).filter(col("salary")<0);
display(peopleWithFixedSalariesDF)


No comments:

Post a Comment