Hive functions. version 0.8

% (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPMod

a % b - Returns the remainder when dividing a by b


& (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPBitAnd

a & b - Bitwise and

Example:
  > SELECT 3 & 5 FROM src LIMIT 1;
  1

* (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPMultiply

a * b - Multiplies a by b


+ (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPPlus

a + b - Returns a+b


- (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPNegative

- a - Returns -a


/ (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPDivide

a / b - Divide a by b

Example:
  > SELECT 3 / 2 FROM src LIMIT 1;
  1.5

< (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan

a < b - Returns TRUE if a is less than b


<= (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan

a <= b - Returns TRUE if a is not greater than b


<> (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual

a <> b - Returns TRUE if a is not equal to b


= (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual

a = b - Returns TRUE if a equals b and false otherwise


> (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan

a > b - Returns TRUE if a is greater than b


>= (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan

a >= b - Returns TRUE if a is not smaller than b


E

org.apache.hadoop.hive.ql.udf.UDFE

E() - returns E

Example:
 > SELECT E() FROM src LIMIT 1;
 2.718281828459045

PI

org.apache.hadoop.hive.ql.udf.UDFPI

PI() - returns pi

Example:
 > SELECT PI() FROM src LIMIT 1;
 3.14159...

UDFDegrees

org.apache.hadoop.hive.ql.udf.UDFDegrees

UDFDegrees(x) - Converts radians to degrees

Example:
  > SELECT UDFDegrees(30) FROM src LIMIT 1;
  -1

^ (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPBitXor

a ^ b - Bitwise exclusive or

Example:
  > SELECT 3 ^ 5 FROM src LIMIT 1;
  2

abs (builtin)

org.apache.hadoop.hive.ql.udf.UDFAbs

abs(x) - returns the absolute value of x

Example:
  > SELECT abs(0) FROM src LIMIT 1;
  0
  > SELECT abs(-5) FROM src LIMIT 1;
  5

acos (builtin)

org.apache.hadoop.hive.ql.udf.UDFAcos

acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise

Example:
  > SELECT acos(1) FROM src LIMIT 1;
  0
  > SELECT acos(2) FROM src LIMIT 1;
  NULL

and (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd

a and b - Logical and


array (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray

array(n0, n1...) - Creates an array with the given elements


array_contains (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains

array_contains(array, value) - Returns TRUE if the array contains value.

Example:
  > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1;
  true

ascii (builtin)

org.apache.hadoop.hive.ql.udf.UDFAscii

ascii(str) - returns the numeric value of the first character of str

Returns 0 if str is empty or NULL if str is NULL
Example:
  > SELECT ascii('222') FROM src LIMIT 1;  50
  > SELECT ascii(2) FROM src LIMIT 1;
  50

asin (builtin)

org.apache.hadoop.hive.ql.udf.UDFAsin

asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise

Example:
  > SELECT asin(0) FROM src LIMIT 1;
  0
  > SELECT asin(2) FROM src LIMIT 1;
  NULL

assert_true (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFAssertTrue

assert_true(condition) - Throw an exception if 'condition' is not true.

Example:
   > SELECT assert_true(x >= 0) FROM src LIMIT 1;
  NULL

atan (builtin)

org.apache.hadoop.hive.ql.udf.UDFAtan

atan(x) - returns the atan (arctan) of x (x is in radians)

Example:
   > SELECT atan(0) FROM src LIMIT 1;
  0

avg (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage

avg(x) - Returns the mean of a set of numbers


bin (builtin)

org.apache.hadoop.hive.ql.udf.UDFBin

bin(n) - returns n in binary

n is a BIGINT. Returns NULL if n is NULL.
Example:
  > SELECT bin(13) FROM src LIMIT 1
  '1101'

binary

org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary

binary(a) - cast a to binary

Currently only string or binary can be cast into binary

ceil (builtin)

org.apache.hadoop.hive.ql.udf.UDFCeil

ceil(x) - Find the smallest integer not smaller than x

Example:
  > SELECT ceil(-0.1) FROM src LIMIT 1;
  0
  > SELECT ceil(5) FROM src LIMIT 1;
  5

ceiling (builtin)

org.apache.hadoop.hive.ql.udf.UDFCeil

ceiling(x) - Find the smallest integer not smaller than x

Example:
  > SELECT ceiling(-0.1) FROM src LIMIT 1;
  0
  > SELECT ceiling(5) FROM src LIMIT 1;
  5

coalesce (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce

coalesce(a1, a2, ...) - Returns the first non-null argument

Example:
  > SELECT coalesce(NULL, 1, NULL) FROM src LIMIT 1;
  1

collect_set (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet

collect_set(x) - Returns a set of objects with duplicate elements eliminated


concat (builtin)

org.apache.hadoop.hive.ql.udf.UDFConcat

concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data bin1, bin2, ... binN

Returns NULL if any argument is NULL.
Example:
  > SELECT concat('abc', 'def') FROM src LIMIT 1;
  'abcdef'

concat_ws (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS

concat_ws(separator, str1, str2, ...) - returns the concatenation of the strings separated by the separator.

Example:
  > SELECT concat_ws('ce', 'fa', 'book') FROM src LIMIT 1;
  'facebook'

context_ngrams (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFContextNGrams

context_ngrams(expr, array<string1, string2, ...>, k, pf) estimates the top-k most frequent n-grams that fit into the specified context. The second parameter specifies a string of words that specify the positions of the n-gram elements, with a null value standing in for a 'blank' that must be filled by an n-gram element.

The primary expression must be an array of strings, or an array of arrays of strings, such as the return type of the sentences() UDF. The second parameter specifies the context -- for example, array("i", "love", null) -- which would estimate the top 'k' words that follow the phrase "i love" in the primary expression. The optional fourth parameter 'pf' controls the memory used by the heuristic. Larger values will yield better accuracy, but use more memory. Example usage:
  SELECT context_ngrams(sentences(lower(review)), array("i", "love", null, null), 10) FROM movies
would attempt to determine the 10 most common two-word phrases that follow "i love" in a database of free-form natural language movie reviews.

conv (builtin)

org.apache.hadoop.hive.ql.udf.UDFConv

conv(num, from_base, to_base) - convert num from from_base to to_base

If to_base is negative, treat num as a signed integer,otherwise, treat it as an unsigned integer.
Example:
  > SELECT conv('100', 2, 10) FROM src LIMIT 1;
  '4'
  > SELECT conv(-10, 16, -10) FROM src LIMIT 1;
  '16'

corr (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation

corr(x,y) - Returns the Pearson coefficient of correlation
between a set of number pairs

The function takes as arguments any pair of numeric types and returns a double.
Any pair with a NULL is ignored. If the function is applied to an empty set or
a singleton set, NULL will be returned. Otherwise, it computes the following:
   COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))
where neither x nor y is null,
COVAR_POP is the population covariance,
and STDDEV_POP is the population standard deviation.

cos (builtin)

org.apache.hadoop.hive.ql.udf.UDFCos

cos(x) - returns the cosine of x (x is in radians)

Example:
   > SELECT cos(0) FROM src LIMIT 1;
  1

count (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount

count(*) - Returns the total number of retrieved rows, including rows containing NULL values.
count(expr) - Returns the number of rows for which the supplied expression is non-NULL.
count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.


covar_pop (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance

covar_pop(x,y) - Returns the population covariance of a set of number pairs

The function takes as arguments any pair of numeric types and returns a double.
Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
will be returned. Otherwise, it computes the following:
   (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)
where neither x nor y is null.

covar_samp (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovarianceSample

covar_samp(x,y) - Returns the sample covariance of a set of number pairs

The function takes as arguments any pair of numeric types and returns a double.
Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
will be returned. Otherwise, it computes the following:
   (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/(COUNT(x,y)-1)
where neither x nor y is null.

covariance

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance

covariance(x,y) - Returns the population covariance of a set of number pairs

The function takes as arguments any pair of numeric types and returns a double.
Any pair with a NULL is ignored. If the function is applied to an empty set, NULL
will be returned. Otherwise, it computes the following:
   (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)
where neither x nor y is null.

create_union (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion

create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag

Example:
  > SELECT create_union(1, 1, "one") FROM src LIMIT 1;
  one

date_add (builtin)

org.apache.hadoop.hive.ql.udf.UDFDateAdd

date_add(start_date, num_days) - Returns the date that is num_days after start_date.

start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored.
Example:
   > SELECT date_add('2009-30-07', 1) FROM src LIMIT 1;
  '2009-31-07'

date_sub (builtin)

org.apache.hadoop.hive.ql.udf.UDFDateSub

date_sub(start_date, num_days) - Returns the date that is num_days before start_date.

start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored.
Example:
   > SELECT date_sub('2009-30-07', 1) FROM src LIMIT 1;
  '2009-29-07'

datediff (builtin)

org.apache.hadoop.hive.ql.udf.UDFDateDiff

datediff(date1, date2) - Returns the number of days between date1 and date2

date1 and date2 are strings in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. The time parts are ignored.If date1 is earlier than date2, the result is negative.
Example:
   > SELECT datediff('2009-30-07', '2009-31-07') FROM src LIMIT 1;
  1

day (builtin)

org.apache.hadoop.hive.ql.udf.UDFDayOfMonth

day(date) - Returns the date of the month of date

date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
Example:
   > SELECT day('2009-30-07', 1) FROM src LIMIT 1;
  30

dayofmonth (builtin)

org.apache.hadoop.hive.ql.udf.UDFDayOfMonth

dayofmonth(date) - Returns the date of the month of date

date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
Example:
   > SELECT dayofmonth('2009-30-07', 1) FROM src LIMIT 1;
  30

dboutput

org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput

dboutput(jdbcstring,username,password,preparedstatement,[arguments]) - sends data to a jdbc driver

argument 0 is the JDBC connection string
argument 1 is the user name
argument 2 is the password
argument 3 is an SQL query to be used in the PreparedStatement
argument (4-n) The remaining arguments must be primitive and are passed to the PreparedStatement object

div (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPLongDivide

a div b - Divide a by b rounded to the long integer

Example:
  > SELECT 3 div 2 FROM src LIMIT 1;
  1

elt (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt

elt(n, str1, str2, ...) - returns the n-th string

Example:
  > SELECT elt(1, 'face', 'book') FROM src LIMIT 1;
  'face'

ewah_bitmap (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEWAHBitmap

ewah_bitmap(expr) - Returns an EWAH-compressed bitmap representation of a column.


ewah_bitmap_and (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd

ewah_bitmap_and(b1, b2) - Return an EWAH-compressed bitmap that is the bitwise AND of two bitmaps.


ewah_bitmap_empty (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty

ewah_bitmap_empty(bitmap) - Predicate that tests whether an EWAH-compressed bitmap is all zeros


ewah_bitmap_or (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr

ewah_bitmap_or(b1, b2) - Return an EWAH-compressed bitmap that is the bitwise OR of two bitmaps.


example_max

org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax

example_max(expr) - Returns the maximum value of expr


example_min

org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMin

example_min(expr) - Returns the minimum value of expr


exp (builtin)

org.apache.hadoop.hive.ql.udf.UDFExp

exp(x) - Returns e to the power of x

Example:
   > SELECT exp(0) FROM src LIMIT 1;
  1

explode (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode

explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns


explode2

org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFExplode2

explode2(a) - like explode, but outputs two identical columns (for testing purposes)


field (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFField

field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found

All primitive types are supported, arguments are compared using str.equals(x). If str is NULL, the return value is 0.

find_in_set (builtin)

org.apache.hadoop.hive.ql.udf.UDFFindInSet

find_in_set(str,str_array) - Returns the first occurrence of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.

Example:
  > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1;
  3
  > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0;
  311  val_311
  128

floor (builtin)

org.apache.hadoop.hive.ql.udf.UDFFloor

floor(x) - Find the largest integer not greater than x

Example:
  > SELECT floor(-0.1) FROM src LIMIT 1;
  -1
  > SELECT floor(5) FROM src LIMIT 1;
  5

from_unixtime (builtin)

org.apache.hadoop.hive.ql.udf.UDFFromUnixTime

from_unixtime(unix_time, format) - returns unix_time in the specified format

Example:
  > SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1;
  '1970-01-01 00:00:00'

get_json_object (builtin)

org.apache.hadoop.hive.ql.udf.UDFJson

get_json_object(json_txt, path) - Extract a json object from path

Extract json object from a json string based on json path specified, and return json string of the extracted json object. It will return null if the input json string is invalid.
A limited version of JSONPath supported:
  $   : Root object
  .   : Child operator
  []  : Subscript operator for array
  *   : Wildcard for []
Syntax not supported that's worth noticing:
  ''  : Zero length string as key
  ..  : Recursive descent
  @   : Current object/element
  ()  : Script expression
  ?() : Filter (script) expression.
  [,] : Union operator
  [start:end:step] : array slice operator

hash (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash

hash(a1, a2, ...) - Returns a hash value of the arguments


hex (builtin)

org.apache.hadoop.hive.ql.udf.UDFHex

hex(n or str) - Convert the argument to hexadecimal

If the argument is a string, returns two hex digits for each character in the string.
If the argument is a number, returns the hexadecimal representation.
Example:
  > SELECT hex(17) FROM src LIMIT 1;
  'H1'
  > SELECT hex('Facebook') FROM src LIMIT 1;
  '46616365626F6F6B'

histogram_numeric (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric

histogram_numeric(expr, nb) - Computes a histogram on numeric 'expr' using nb bins.

Example:
> SELECT histogram_numeric(val, 3) FROM src;
[{"x":100,"y":14.0},{"x":200,"y":22.0},{"x":290.5,"y":11.0}]
The return value is an array of (x,y) pairs representing the centers of the histogram's bins. As the value of 'nb' is increased, the histogram approximationgets finer-grained, but may yield artifacts around outliers. In practice, 20-40 histogram bins appear to work well, with more bins being required for skewed or smaller datasets. Note that this function creates a histogram with non-uniform bin widths. It offers no guarantees in terms of the mean-squared-error of the histogram, but in practice is comparable to the histograms produced by the R/S-Plusstatistical computing packages.

hour (builtin)

org.apache.hadoop.hive.ql.udf.UDFHour

hour(date) - Returns the hour of date

date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
Example:
   > SELECT hour('2009-07-30 12:58:59') FROM src LIMIT 1;
  12
  > SELECT hour('12:58:59') FROM src LIMIT 1;
  12

in (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn

test in(val1, val2...) - returns true if test equals any valN


in_file (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile

in_file(str, filename) - Returns true if str appears in the file


index (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex

index(a, n) - Returns the n-th element of a


instr (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr

instr(str, substr) - Returns the index of the first occurance of substr in str

Example:
  > SELECT instr('Facebook', 'boo') FROM src LIMIT 1;
  5

isnotnull (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull

isnotnull a - Returns true if a is not NULL and false otherwise


isnull (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull

isnull a - Returns true if a is NULL and false otherwise


json_tuple (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple

json_tuple(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string.


lcase (builtin)

org.apache.hadoop.hive.ql.udf.UDFLower

lcase(str) - Returns str with all characters changed to lowercase

Example:
  > SELECT lcase('Facebook') FROM src LIMIT 1;
  'facebook'

length (builtin)

org.apache.hadoop.hive.ql.udf.UDFLength

length(str | binary) - Returns the length of str or number of bytes in binary data

Example:
  > SELECT length('Facebook') FROM src LIMIT 1;
  8

like (builtin)

org.apache.hadoop.hive.ql.udf.UDFLike

like(str, pattern) - Checks if str matches pattern

Example:
  > SELECT a.* FROM srcpart a WHERE a.hr like '%2' LIMIT 1;
  27      val_27  2008-04-08      12

ln (builtin)

org.apache.hadoop.hive.ql.udf.UDFLn

ln(x) - Returns the natural logarithm of x

Example:
  > SELECT ln(1) FROM src LIMIT 1;
  0

locate (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate

locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos

Example:
  > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
  7

log (builtin)

org.apache.hadoop.hive.ql.udf.UDFLog

log([b], x) - Returns the logarithm of x with base b

Example:
  > SELECT log(13, 13) FROM src LIMIT 1;
  1

log10 (builtin)

org.apache.hadoop.hive.ql.udf.UDFLog10

log10(x) - Returns the logarithm of x with base 10

Example:
  > SELECT log10(10) FROM src LIMIT 1;
  1

log2 (builtin)

org.apache.hadoop.hive.ql.udf.UDFLog2

log2(x) - Returns the logarithm of x with base 2

Example:
  > SELECT log2(2) FROM src LIMIT 1;
  1

lower (builtin)

org.apache.hadoop.hive.ql.udf.UDFLower

lower(str) - Returns str with all characters changed to lowercase

Example:
  > SELECT lower('Facebook') FROM src LIMIT 1;
  'facebook'

lpad (builtin)

org.apache.hadoop.hive.ql.udf.UDFLpad

lpad(str, len, pad) - Returns str, left-padded with pad to a length of len

If str is longer than len, the return value is shortened to len characters.
Example:
  > SELECT lpad('hi', 5, '??') FROM src LIMIT 1;
  '???hi'  > SELECT lpad('hi', 1, '??') FROM src LIMIT 1;
  'h'

ltrim (builtin)

org.apache.hadoop.hive.ql.udf.UDFLTrim

ltrim(str) - Removes the leading space characters from str

Example:
  > SELECT ltrim('   facebook') FROM src LIMIT 1;
  'facebook'

map (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap

map(key0, value0, key1, value1...) - Creates a map with the given key/value pairs


map_keys (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapKeys

map_keys(map) - Returns an unordered array containing the keys of the input map.


map_values (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapValues

map_values(map) - Returns an unordered array containing the values of the input map.


max (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax

max(expr) - Returns the maximum value of expr


min (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin

min(expr) - Returns the minimum value of expr


minute (builtin)

org.apache.hadoop.hive.ql.udf.UDFMinute

minute(date) - Returns the minute of date

date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
Example:
   > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1;
  58
  > SELECT minute('12:58:59') FROM src LIMIT 1;
  58

month (builtin)

org.apache.hadoop.hive.ql.udf.UDFMonth

month(date) - Returns the month of date

Example:
  > SELECT month('2009-30-07') FROM src LIMIT 1;
  7

named_struct (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFNamedStruct

named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values


ngrams (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams

ngrams(expr, n, k, pf) - Estimates the top-k n-grams in rows that consist of sequences of strings, represented as arrays of strings, or arrays of arrays of strings. 'pf' is an optional precision factor that controls memory usage.

The parameter 'n' specifies what type of n-grams are being estimated. Unigrams are n = 1, and bigrams are n = 2. Generally, n will not be greater than about 5. The 'k' parameter specifies how many of the highest-frequency n-grams will be returned by the UDAF. The optional precision factor 'pf' specifies how much memory to use for estimation; more memory will give more accurate frequency counts, but could crash the JVM. The default value is 20, which internally maintains 20*k n-grams, but only returns the k highest frequency ones. The output is an array of structs with the top-k n-grams. It might be convenient to explode() the output of this UDAF.

not (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot

not a - Logical not


or (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr

a or b - Logical or


parse_url (builtin)

org.apache.hadoop.hive.ql.udf.UDFParseUrl

parse_url(url, partToExtract[, key]) - extracts a part from a URL

Parts: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO
key specifies which query to extract
Example:
  > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'HOST') FROM src LIMIT 1;
  'facebook.com'
  > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY') FROM src LIMIT 1;
  'query=1'
  > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY', 'query') FROM src LIMIT 1;
  '1'

parse_url_tuple (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDTFParseUrlTuple

parse_url_tuple(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts from a URL.
It takes a URL and one or multiple partnames, and returns a tuple. All the input parameters and output column types are string.

Partname: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO, QUERY:<KEY_NAME>
Note: Partnames are case-sensitive, and should not contain unnecessary white spaces.
Example:
  > SELECT b.* FROM src LATERAL VIEW parse_url_tuple(fullurl, 'HOST', 'PATH', 'QUERY', 'QUERY:id') b as host, path, query, query_id LIMIT 1;
  > SELECT parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE',  'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from src a;

percentile (builtin)

org.apache.hadoop.hive.ql.udf.UDAFPercentile

percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array


percentile_approx (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox

percentile_approx(expr, pc, [nb]) - For very large data, computes an approximate percentile value from a histogram, using the optional argument [nb] as the number of histogram bins to use. A higher value of nb results in a more accurate approximation, at the cost of higher memory usage.

'expr' can be any numeric column, including doubles and floats, and 'pc' is either a single double/float with a requested percentile, or an array of double/float with multiple percentiles. If 'nb' is not specified, the default approximation is done with 10,000 histogram bins, which means that if there are 10,000 or fewer unique values in 'expr', you can expect an exact result. The percentile() function always computes an exact percentile and can run out of memory if there are too many unique values in a column, which necessitates this function.
Example (three percentiles requested using a finer histogram approximation):
> SELECT percentile_approx(val, array(0.5, 0.95, 0.98), 100000) FROM somedata;
[0.05,1.64,2.26]

pmod (builtin)

org.apache.hadoop.hive.ql.udf.UDFPosMod

a pmod b - Compute the positive modulo


positive (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPPositive

positive a - Returns a


pow (builtin)

org.apache.hadoop.hive.ql.udf.UDFPower

pow(x1, x2) - raise x1 to the power of x2

Example:
  > SELECT pow(2, 3) FROM src LIMIT 1;
  8

power (builtin)

org.apache.hadoop.hive.ql.udf.UDFPower

power(x1, x2) - raise x1 to the power of x2

Example:
  > SELECT power(2, 3) FROM src LIMIT 1;
  8

radians (builtin)

org.apache.hadoop.hive.ql.udf.UDFRadians

radians(x) - Converts degrees to radians

Example:
  > SELECT radians(90) FROM src LIMIT 1;
  1.5707963267949mo

rand (builtin)

org.apache.hadoop.hive.ql.udf.UDFRand

rand([seed]) - Returns a pseudorandom number between 0 and 1


reflect (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFReflect

reflect(class,method[,arg1[,arg2..]]) calls method with reflection

Use this UDF to call Java methods by matching the argument signature

regexp (builtin)

org.apache.hadoop.hive.ql.udf.UDFRegExp

str regexp regexp - Returns true if str matches regexp and false otherwise

Example:
  > SELECT 'fb' regexp '.*' FROM src LIMIT 1;
  true

regexp_extract (builtin)

org.apache.hadoop.hive.ql.udf.UDFRegExpExtract

regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp

Example:
  > SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) FROM src LIMIT 1;
  '100'

regexp_replace (builtin)

org.apache.hadoop.hive.ql.udf.UDFRegExpReplace

regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep

Example:
  > SELECT regexp_replace('100-200', '(\d+)', 'num') FROM src LIMIT 1;
  'num-num'

repeat (builtin)

org.apache.hadoop.hive.ql.udf.UDFRepeat

repeat(str, n) - repeat str n times

Example:
  > SELECT repeat('123', 2) FROM src LIMIT 1;
  '123123'

reverse (builtin)

org.apache.hadoop.hive.ql.udf.UDFReverse

reverse(str) - reverse str

Example:
  > SELECT reverse('Facebook') FROM src LIMIT 1;
  'koobecaF'

rlike (builtin)

org.apache.hadoop.hive.ql.udf.UDFRegExp

str rlike regexp - Returns true if str matches regexp and false otherwise

Example:
  > SELECT 'fb' rlike '.*' FROM src LIMIT 1;
  true

rot13

org.apache.hive.pdktest.Rot13

rot13(str) - Returns str with all characters transposed via rot13

Example:
  > SELECT rot13('Facebook') FROM src LIMIT 1;
  'Snprobbx'

round (builtin)

org.apache.hadoop.hive.ql.udf.UDFRound

round(x[, d]) - round x to d decimal places

Example:
  > SELECT round(12.3456, 1) FROM src LIMIT 1;
  12.3'

row_sequence

org.apache.hadoop.hive.contrib.udf.UDFRowSequence

row_sequence() - Returns a generated row sequence number starting from 1


rpad (builtin)

org.apache.hadoop.hive.ql.udf.UDFRpad

rpad(str, len, pad) - Returns str, right-padded with pad to a length of len

If str is longer than len, the return value is shortened to len characters.
Example:
  > SELECT rpad('hi', 5, '??') FROM src LIMIT 1;
  'hi???'  > SELECT rpad('hi', 1, '??') FROM src LIMIT 1;
  'h'

rtrim (builtin)

org.apache.hadoop.hive.ql.udf.UDFRTrim

rtrim(str) - Removes the trailing space characters from str

Example:
  > SELECT rtrim('facebook   ') FROM src LIMIT 1;
  'facebook'

second (builtin)

org.apache.hadoop.hive.ql.udf.UDFSecond

second(date) - Returns the second of date

date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.
Example:
   > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1;
  59
  > SELECT second('12:58:59') FROM src LIMIT 1;
  59

sentences (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFSentences

sentences(str, lang, country) - Splits str into arrays of sentences, where each sentence is an array of words. The 'lang' and'country' arguments are optional, and if omitted, the default locale is used.

Example:
  > SELECT sentences('Hello there! I am a UDF.') FROM src LIMIT 1;
  [ ["Hello", "there"], ["I", "am", "a", "UDF"] ]
  > SELECT sentences(review, language) FROM movies;
Unnecessary punctuation, such as periods and commas in English, is automatically stripped. If specified, 'lang' should be a two-letter ISO-639 language code (such as 'en'), and 'country' should be a two-letter ISO-3166 code (such as 'us'). Not all country and language codes are fully supported, and if an unsupported code is specified, a default locale is used to process that string.

sign (builtin)

org.apache.hadoop.hive.ql.udf.UDFSign

sign(x) - returns the sign of x )

Example:
   > SELECT sign(40) FROM src LIMIT 1;
  1

sin (builtin)

org.apache.hadoop.hive.ql.udf.UDFSin

sin(x) - returns the sine of x (x is in radians)

Example:
   > SELECT sin(0) FROM src LIMIT 1;
  0

size (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize

size(a) - Returns the size of a


space (builtin)

org.apache.hadoop.hive.ql.udf.UDFSpace

space(n) - returns n spaces

Example:
   > SELECT space(2) FROM src LIMIT 1;
  '  '

split (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit

split(str, regex) - Splits str around occurances that match regex

Example:
  > SELECT split('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1;
  ["one", "two", "three"]

sqrt (builtin)

org.apache.hadoop.hive.ql.udf.UDFSqrt

sqrt(x) - returns the square root of x

Example:
   > SELECT sqrt(4) FROM src LIMIT 1;
  2

stack (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDTFStack

stack(n, cols...) - turns k columns into n rows of size k/n each


std (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd

std(x) - Returns the standard deviation of a set of numbers


stddev (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd

stddev(x) - Returns the standard deviation of a set of numbers


stddev_pop (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd

stddev_pop(x) - Returns the standard deviation of a set of numbers


stddev_samp (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStdSample

stddev_samp(x) - Returns the sample standard deviation of a set of numbers


str_to_map (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap

str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text

Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2.

struct (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct

struct(col1, col2, col3, ...) - Creates a struct with the given field values


substr (builtin)

org.apache.hadoop.hive.ql.udf.UDFSubstr

substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len

pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
Example:
   > SELECT substr('Facebook', 5) FROM src LIMIT 1;
  'book'
  > SELECT substr('Facebook', -5) FROM src LIMIT 1;
  'ebook'
  > SELECT substr('Facebook', 5, 1) FROM src LIMIT 1;
  'b'

substring (builtin)

org.apache.hadoop.hive.ql.udf.UDFSubstr

substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len

pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
Example:
   > SELECT substring('Facebook', 5) FROM src LIMIT 1;
  'book'
  > SELECT substring('Facebook', -5) FROM src LIMIT 1;
  'ebook'
  > SELECT substring('Facebook', 5, 1) FROM src LIMIT 1;
  'b'

sum (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum

sum(x) - Returns the sum of a set of numbers


tan (builtin)

org.apache.hadoop.hive.ql.udf.UDFTan

tan(x) - returns the tangent of x (x is in radians)

Example:
   > SELECT tan(0) FROM src LIMIT 1;
  1

to_date (builtin)

org.apache.hadoop.hive.ql.udf.UDFDate

to_date(expr) - Extracts the date part of the date or datetime expression expr

Example:
   > SELECT to_date('2009-30-07 04:17:52') FROM src LIMIT 1;
  '2009-30-07'

trim (builtin)

org.apache.hadoop.hive.ql.udf.UDFTrim

trim(str) - Removes the leading and trailing space characters from str

Example:
  > SELECT trim('   facebook  ') FROM src LIMIT 1;
  'facebook'

ucase (builtin)

org.apache.hadoop.hive.ql.udf.UDFUpper

ucase(str) - Returns str with all characters changed to uppercase

Example:
  > SELECT ucase('Facebook') FROM src LIMIT 1;
  'FACEBOOK'

unhex (builtin)

org.apache.hadoop.hive.ql.udf.UDFUnhex

unhex(str) - Converts hexadecimal argument to string

Performs the inverse operation of HEX(str). That is, it interprets
each pair of hexadecimal digits in the argument as a number and
converts it to the character represented by the number. The
resulting characters are returned as a binary string.

Example:
> SELECT UNHEX('4D7953514C') from src limit 1;
'MySQL'
> SELECT UNHEX(HEX('string')) from src limit 1;
'string'
> SELECT HEX(UNHEX('1267')) from src limit 1;
'1267'

The characters in the argument string must be legal hexadecimal
digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters
any nonhexadecimal digits in the argument, it returns NULL. Also,
if there are an odd number of characters a leading 0 is appended.

union_map

org.apache.hive.builtins.UDAFUnionMap

union_map(col) - aggregate given maps into a single map

Aggregate maps, returns as a HashMap.

unix_timestamp (builtin)

org.apache.hadoop.hive.ql.udf.UDFUnixTimeStamp

unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp

Converts the current or specified time to number of seconds since 1970-01-01.

upper (builtin)

org.apache.hadoop.hive.ql.udf.UDFUpper

upper(str) - Returns str with all characters changed to uppercase

Example:
  > SELECT upper('Facebook') FROM src LIMIT 1;
  'FACEBOOK'

var_pop (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance

var_pop(x) - Returns the variance of a set of numbers


var_samp (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample

var_samp(x) - Returns the sample variance of a set of numbers


variance (builtin)

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance

variance(x) - Returns the variance of a set of numbers


xpath (builtin)

org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath

xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression

Example:
  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1
  []
  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1
  ["b1","b2","b3"]
  > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1
  ["c1","c2"]

xpath_boolean (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean

xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression

Example:
  > SELECT xpath_boolean('<a><b>1</b></a>','a/b') FROM src LIMIT 1;
  true
  > SELECT xpath_boolean('<a><b>1</b></a>','a/b = 2') FROM src LIMIT 1;
  false

xpath_double (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathDouble

xpath_double(xml, xpath) - Returns a double value that matches the xpath expression

Example:
  > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
  3.0

xpath_float (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathFloat

xpath_float(xml, xpath) - Returns a float value that matches the xpath expression

Example:
  > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
  3.0

xpath_int (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathInteger

xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression

Example:
  > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
  3

xpath_long (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathLong

xpath_long(xml, xpath) - Returns a long value that matches the xpath expression

Example:
  > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
  3

xpath_number (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathDouble

xpath_number(xml, xpath) - Returns a double value that matches the xpath expression

Example:
  > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
  3.0

xpath_short (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathShort

xpath_short(xml, xpath) - Returns a short value that matches the xpath expression

Example:
  > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1;
  3

xpath_string (builtin)

org.apache.hadoop.hive.ql.udf.xml.UDFXPathString

xpath_string(xml, xpath) - Returns the text contents of the first xml node that matches the xpath expression

Example:
  > SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') FROM src LIMIT 1;
  'cc'
  > SELECT xpath_string('<a><b>b1</b><b>b2</b></a>','a/b') FROM src LIMIT 1;
  'b1'
  > SELECT xpath_string('<a><b>b1</b><b>b2</b></a>','a/b[2]') FROM src LIMIT 1;
  'b2'
  > SELECT xpath_string('<a><b>b1</b><b>b2</b></a>','a') FROM src LIMIT 1;
  'b1b2'

year (builtin)

org.apache.hadoop.hive.ql.udf.UDFYear

year(date) - Returns the year of date

date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.
Example:
   > SELECT year('2009-30-07', 1) FROM src LIMIT 1;
  2009

yearweek

org.apache.hadoop.hive.ql.udf.UDFWeekOfYear

yearweek(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.

Examples:
  > SELECT yearweek('2008-02-20') FROM src LIMIT 1;
  8
  > SELECT yearweek('1980-12-31 12:59:59') FROM src LIMIT 1;
  1

| (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPBitOr

a | b - Bitwise or

Example:
  > SELECT 3 | 5 FROM src LIMIT 1;
  7

~ (builtin)

org.apache.hadoop.hive.ql.udf.UDFOPBitNot

~ n - Bitwise not

Example:
  > SELECT ~ 0 FROM src LIMIT 1;
  -1