org.apache.hadoop.hive.ql.udf.UDFOPMod
a % b - Returns the remainder when dividing a by b
org.apache.hadoop.hive.ql.udf.UDFOPBitAnd
a & b - Bitwise and
Example: > SELECT 3 & 5 FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFOPMultiply
a * b - Multiplies a by b
org.apache.hadoop.hive.ql.udf.UDFOPPlus
a + b - Returns a+b
org.apache.hadoop.hive.ql.udf.UDFOPNegative
- a - Returns -a
org.apache.hadoop.hive.ql.udf.UDFOPDivide
a / b - Divide a by b
Example: > SELECT 3 / 2 FROM src LIMIT 1; 1.5
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan
a < b - Returns TRUE if a is less than b
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan
a <= b - Returns TRUE if a is not greater than b
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual
a <> b - Returns TRUE if a is not equal to b
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual
a = b - Returns TRUE if a equals b and false otherwise
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan
a > b - Returns TRUE if a is greater than b
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan
a >= b - Returns TRUE if a is not smaller than b
org.apache.hadoop.hive.ql.udf.UDFE
E() - returns E
Example: > SELECT E() FROM src LIMIT 1; 2.718281828459045
org.apache.hadoop.hive.ql.udf.UDFPI
PI() - returns pi
Example: > SELECT PI() FROM src LIMIT 1; 3.14159...
org.apache.hadoop.hive.ql.udf.UDFDegrees
UDFDegrees(x) - Converts radians to degrees
Example: > SELECT UDFDegrees(30) FROM src LIMIT 1; -1
org.apache.hadoop.hive.ql.udf.UDFOPBitXor
a ^ b - Bitwise exclusive or
Example: > SELECT 3 ^ 5 FROM src LIMIT 1; 2
org.apache.hadoop.hive.ql.udf.UDFAbs
abs(x) - returns the absolute value of x
Example: > SELECT abs(0) FROM src LIMIT 1; 0 > SELECT abs(-5) FROM src LIMIT 1; 5
org.apache.hadoop.hive.ql.udf.UDFAcos
acos(x) - returns the arc cosine of x if -1<=x<=1 or NULL otherwise
Example: > SELECT acos(1) FROM src LIMIT 1; 0 > SELECT acos(2) FROM src LIMIT 1; NULL
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd
a and b - Logical and
org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray
array(n0, n1...) - Creates an array with the given elements
org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains
array_contains(array, value) - Returns TRUE if the array contains value.
Example: > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1; true
org.apache.hadoop.hive.ql.udf.UDFAscii
ascii(str) - returns the numeric value of the first character of str
Returns 0 if str is empty or NULL if str is NULL Example: > SELECT ascii('222') FROM src LIMIT 1; 50 > SELECT ascii(2) FROM src LIMIT 1; 50
org.apache.hadoop.hive.ql.udf.UDFAsin
asin(x) - returns the arc sine of x if -1<=x<=1 or NULL otherwise
Example: > SELECT asin(0) FROM src LIMIT 1; 0 > SELECT asin(2) FROM src LIMIT 1; NULL
org.apache.hadoop.hive.ql.udf.generic.GenericUDFAssertTrue
assert_true(condition) - Throw an exception if 'condition' is not true.
Example: > SELECT assert_true(x >= 0) FROM src LIMIT 1; NULL
org.apache.hadoop.hive.ql.udf.UDFAtan
atan(x) - returns the atan (arctan) of x (x is in radians)
Example: > SELECT atan(0) FROM src LIMIT 1; 0
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage
avg(x) - Returns the mean of a set of numbers
org.apache.hadoop.hive.ql.udf.UDFBin
bin(n) - returns n in binary
n is a BIGINT. Returns NULL if n is NULL. Example: > SELECT bin(13) FROM src LIMIT 1 '1101'
org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary
binary(a) - cast a to binary
Currently only string or binary can be cast into binary
org.apache.hadoop.hive.ql.udf.UDFCeil
ceil(x) - Find the smallest integer not smaller than x
Example: > SELECT ceil(-0.1) FROM src LIMIT 1; 0 > SELECT ceil(5) FROM src LIMIT 1; 5
org.apache.hadoop.hive.ql.udf.UDFCeil
ceiling(x) - Find the smallest integer not smaller than x
Example: > SELECT ceiling(-0.1) FROM src LIMIT 1; 0 > SELECT ceiling(5) FROM src LIMIT 1; 5
org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce
coalesce(a1, a2, ...) - Returns the first non-null argument
Example: > SELECT coalesce(NULL, 1, NULL) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet
collect_set(x) - Returns a set of objects with duplicate elements eliminated
org.apache.hadoop.hive.ql.udf.UDFConcat
concat(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or concat(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data bin1, bin2, ... binN
Returns NULL if any argument is NULL. Example: > SELECT concat('abc', 'def') FROM src LIMIT 1; 'abcdef'
org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcatWS
concat_ws(separator, str1, str2, ...) - returns the concatenation of the strings separated by the separator.
Example: > SELECT concat_ws('ce', 'fa', 'book') FROM src LIMIT 1; 'facebook'
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFContextNGrams
context_ngrams(expr, array<string1, string2, ...>, k, pf) estimates the top-k most frequent n-grams that fit into the specified context. The second parameter specifies a string of words that specify the positions of the n-gram elements, with a null value standing in for a 'blank' that must be filled by an n-gram element.
The primary expression must be an array of strings, or an array of arrays of strings, such as the return type of the sentences() UDF. The second parameter specifies the context -- for example, array("i", "love", null) -- which would estimate the top 'k' words that follow the phrase "i love" in the primary expression. The optional fourth parameter 'pf' controls the memory used by the heuristic. Larger values will yield better accuracy, but use more memory. Example usage: SELECT context_ngrams(sentences(lower(review)), array("i", "love", null, null), 10) FROM movies would attempt to determine the 10 most common two-word phrases that follow "i love" in a database of free-form natural language movie reviews.
org.apache.hadoop.hive.ql.udf.UDFConv
conv(num, from_base, to_base) - convert num from from_base to to_base
If to_base is negative, treat num as a signed integer,otherwise, treat it as an unsigned integer. Example: > SELECT conv('100', 2, 10) FROM src LIMIT 1; '4' > SELECT conv(-10, 16, -10) FROM src LIMIT 1; '16'
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation
corr(x,y) - Returns the Pearson coefficient of correlation
between a set of number pairs
The function takes as arguments any pair of numeric types and returns a double. Any pair with a NULL is ignored. If the function is applied to an empty set or a singleton set, NULL will be returned. Otherwise, it computes the following: COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y)) where neither x nor y is null, COVAR_POP is the population covariance, and STDDEV_POP is the population standard deviation.
org.apache.hadoop.hive.ql.udf.UDFCos
cos(x) - returns the cosine of x (x is in radians)
Example: > SELECT cos(0) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount
count(*) - Returns the total number of retrieved rows, including rows containing NULL values.
count(expr) - Returns the number of rows for which the supplied expression is non-NULL.
count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance
covar_pop(x,y) - Returns the population covariance of a set of number pairs
The function takes as arguments any pair of numeric types and returns a double. Any pair with a NULL is ignored. If the function is applied to an empty set, NULL will be returned. Otherwise, it computes the following: (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y) where neither x nor y is null.
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovarianceSample
covar_samp(x,y) - Returns the sample covariance of a set of number pairs
The function takes as arguments any pair of numeric types and returns a double. Any pair with a NULL is ignored. If the function is applied to an empty set, NULL will be returned. Otherwise, it computes the following: (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/(COUNT(x,y)-1) where neither x nor y is null.
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance
covariance(x,y) - Returns the population covariance of a set of number pairs
The function takes as arguments any pair of numeric types and returns a double. Any pair with a NULL is ignored. If the function is applied to an empty set, NULL will be returned. Otherwise, it computes the following: (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y) where neither x nor y is null.
org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion
create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag
Example: > SELECT create_union(1, 1, "one") FROM src LIMIT 1; one
org.apache.hadoop.hive.ql.udf.UDFDateAdd
date_add(start_date, num_days) - Returns the date that is num_days after start_date.
start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored. Example: > SELECT date_add('2009-30-07', 1) FROM src LIMIT 1; '2009-31-07'
org.apache.hadoop.hive.ql.udf.UDFDateSub
date_sub(start_date, num_days) - Returns the date that is num_days before start_date.
start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. num_days is a number. The time part of start_date is ignored. Example: > SELECT date_sub('2009-30-07', 1) FROM src LIMIT 1; '2009-29-07'
org.apache.hadoop.hive.ql.udf.UDFDateDiff
datediff(date1, date2) - Returns the number of days between date1 and date2
date1 and date2 are strings in the format 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. The time parts are ignored.If date1 is earlier than date2, the result is negative. Example: > SELECT datediff('2009-30-07', '2009-31-07') FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFDayOfMonth
day(date) - Returns the date of the month of date
date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. Example: > SELECT day('2009-30-07', 1) FROM src LIMIT 1; 30
org.apache.hadoop.hive.ql.udf.UDFDayOfMonth
dayofmonth(date) - Returns the date of the month of date
date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. Example: > SELECT dayofmonth('2009-30-07', 1) FROM src LIMIT 1; 30
org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput
dboutput(jdbcstring,username,password,preparedstatement,[arguments]) - sends data to a jdbc driver
argument 0 is the JDBC connection string argument 1 is the user name argument 2 is the password argument 3 is an SQL query to be used in the PreparedStatement argument (4-n) The remaining arguments must be primitive and are passed to the PreparedStatement object
org.apache.hadoop.hive.ql.udf.UDFOPLongDivide
a div b - Divide a by b rounded to the long integer
Example: > SELECT 3 div 2 FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt
elt(n, str1, str2, ...) - returns the n-th string
Example: > SELECT elt(1, 'face', 'book') FROM src LIMIT 1; 'face'
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEWAHBitmap
ewah_bitmap(expr) - Returns an EWAH-compressed bitmap representation of a column.
org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapAnd
ewah_bitmap_and(b1, b2) - Return an EWAH-compressed bitmap that is the bitwise AND of two bitmaps.
org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapEmpty
ewah_bitmap_empty(bitmap) - Predicate that tests whether an EWAH-compressed bitmap is all zeros
org.apache.hadoop.hive.ql.udf.generic.GenericUDFEWAHBitmapOr
ewah_bitmap_or(b1, b2) - Return an EWAH-compressed bitmap that is the bitwise OR of two bitmaps.
org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax
example_max(expr) - Returns the maximum value of expr
org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMin
example_min(expr) - Returns the minimum value of expr
org.apache.hadoop.hive.ql.udf.UDFExp
exp(x) - Returns e to the power of x
Example: > SELECT exp(0) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode
explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns
org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFExplode2
explode2(a) - like explode, but outputs two identical columns (for testing purposes)
org.apache.hadoop.hive.ql.udf.generic.GenericUDFField
field(str, str1, str2, ...) - returns the index of str in the str1,str2,... list or 0 if not found
All primitive types are supported, arguments are compared using str.equals(x). If str is NULL, the return value is 0.
org.apache.hadoop.hive.ql.udf.UDFFindInSet
find_in_set(str,str_array) - Returns the first occurrence of str in str_array where str_array is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument has any commas.
Example: > SELECT find_in_set('ab','abc,b,ab,c,def') FROM src LIMIT 1; 3 > SELECT * FROM src1 WHERE NOT find_in_set(key,'311,128,345,956')=0; 311 val_311 128
org.apache.hadoop.hive.ql.udf.UDFFloor
floor(x) - Find the largest integer not greater than x
Example: > SELECT floor(-0.1) FROM src LIMIT 1; -1 > SELECT floor(5) FROM src LIMIT 1; 5
org.apache.hadoop.hive.ql.udf.UDFFromUnixTime
from_unixtime(unix_time, format) - returns unix_time in the specified format
Example: > SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') FROM src LIMIT 1; '1970-01-01 00:00:00'
org.apache.hadoop.hive.ql.udf.UDFJson
get_json_object(json_txt, path) - Extract a json object from path
Extract json object from a json string based on json path specified, and return json string of the extracted json object. It will return null if the input json string is invalid. A limited version of JSONPath supported: $ : Root object . : Child operator [] : Subscript operator for array * : Wildcard for [] Syntax not supported that's worth noticing: '' : Zero length string as key .. : Recursive descent @ : Current object/element () : Script expression ?() : Filter (script) expression. [,] : Union operator [start:end:step] : array slice operator
org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash
hash(a1, a2, ...) - Returns a hash value of the arguments
org.apache.hadoop.hive.ql.udf.UDFHex
hex(n or str) - Convert the argument to hexadecimal
If the argument is a string, returns two hex digits for each character in the string. If the argument is a number, returns the hexadecimal representation. Example: > SELECT hex(17) FROM src LIMIT 1; 'H1' > SELECT hex('Facebook') FROM src LIMIT 1; '46616365626F6F6B'
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric
histogram_numeric(expr, nb) - Computes a histogram on numeric 'expr' using nb bins.
Example: > SELECT histogram_numeric(val, 3) FROM src; [{"x":100,"y":14.0},{"x":200,"y":22.0},{"x":290.5,"y":11.0}] The return value is an array of (x,y) pairs representing the centers of the histogram's bins. As the value of 'nb' is increased, the histogram approximationgets finer-grained, but may yield artifacts around outliers. In practice, 20-40 histogram bins appear to work well, with more bins being required for skewed or smaller datasets. Note that this function creates a histogram with non-uniform bin widths. It offers no guarantees in terms of the mean-squared-error of the histogram, but in practice is comparable to the histograms produced by the R/S-Plusstatistical computing packages.
org.apache.hadoop.hive.ql.udf.UDFHour
hour(date) - Returns the hour of date
date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. Example: > SELECT hour('2009-07-30 12:58:59') FROM src LIMIT 1; 12 > SELECT hour('12:58:59') FROM src LIMIT 1; 12
org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn
test in(val1, val2...) - returns true if test equals any valN
org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile
in_file(str, filename) - Returns true if str appears in the file
org.apache.hadoop.hive.ql.udf.generic.GenericUDFIndex
index(a, n) - Returns the n-th element of a
org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr
instr(str, substr) - Returns the index of the first occurance of substr in str
Example: > SELECT instr('Facebook', 'boo') FROM src LIMIT 1; 5
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull
isnotnull a - Returns true if a is not NULL and false otherwise
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull
isnull a - Returns true if a is NULL and false otherwise
org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple
json_tuple(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string.
org.apache.hadoop.hive.ql.udf.UDFLower
lcase(str) - Returns str with all characters changed to lowercase
Example: > SELECT lcase('Facebook') FROM src LIMIT 1; 'facebook'
org.apache.hadoop.hive.ql.udf.UDFLength
length(str | binary) - Returns the length of str or number of bytes in binary data
Example: > SELECT length('Facebook') FROM src LIMIT 1; 8
org.apache.hadoop.hive.ql.udf.UDFLike
like(str, pattern) - Checks if str matches pattern
Example: > SELECT a.* FROM srcpart a WHERE a.hr like '%2' LIMIT 1; 27 val_27 2008-04-08 12
org.apache.hadoop.hive.ql.udf.UDFLn
ln(x) - Returns the natural logarithm of x
Example: > SELECT ln(1) FROM src LIMIT 1; 0
org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate
locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
Example: > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1; 7
org.apache.hadoop.hive.ql.udf.UDFLog
log([b], x) - Returns the logarithm of x with base b
Example: > SELECT log(13, 13) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFLog10
log10(x) - Returns the logarithm of x with base 10
Example: > SELECT log10(10) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFLog2
log2(x) - Returns the logarithm of x with base 2
Example: > SELECT log2(2) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFLower
lower(str) - Returns str with all characters changed to lowercase
Example: > SELECT lower('Facebook') FROM src LIMIT 1; 'facebook'
org.apache.hadoop.hive.ql.udf.UDFLpad
lpad(str, len, pad) - Returns str, left-padded with pad to a length of len
If str is longer than len, the return value is shortened to len characters. Example: > SELECT lpad('hi', 5, '??') FROM src LIMIT 1; '???hi' > SELECT lpad('hi', 1, '??') FROM src LIMIT 1; 'h'
org.apache.hadoop.hive.ql.udf.UDFLTrim
ltrim(str) - Removes the leading space characters from str
Example: > SELECT ltrim(' facebook') FROM src LIMIT 1; 'facebook'
org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap
map(key0, value0, key1, value1...) - Creates a map with the given key/value pairs
org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapKeys
map_keys(map) - Returns an unordered array containing the keys of the input map.
org.apache.hadoop.hive.ql.udf.generic.GenericUDFMapValues
map_values(map) - Returns an unordered array containing the values of the input map.
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax
max(expr) - Returns the maximum value of expr
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin
min(expr) - Returns the minimum value of expr
org.apache.hadoop.hive.ql.udf.UDFMinute
minute(date) - Returns the minute of date
date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. Example: > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1; 58 > SELECT minute('12:58:59') FROM src LIMIT 1; 58
org.apache.hadoop.hive.ql.udf.UDFMonth
month(date) - Returns the month of date
Example: > SELECT month('2009-30-07') FROM src LIMIT 1; 7
org.apache.hadoop.hive.ql.udf.generic.GenericUDFNamedStruct
named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams
ngrams(expr, n, k, pf) - Estimates the top-k n-grams in rows that consist of sequences of strings, represented as arrays of strings, or arrays of arrays of strings. 'pf' is an optional precision factor that controls memory usage.
The parameter 'n' specifies what type of n-grams are being estimated. Unigrams are n = 1, and bigrams are n = 2. Generally, n will not be greater than about 5. The 'k' parameter specifies how many of the highest-frequency n-grams will be returned by the UDAF. The optional precision factor 'pf' specifies how much memory to use for estimation; more memory will give more accurate frequency counts, but could crash the JVM. The default value is 20, which internally maintains 20*k n-grams, but only returns the k highest frequency ones. The output is an array of structs with the top-k n-grams. It might be convenient to explode() the output of this UDAF.
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot
not a - Logical not
org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr
a or b - Logical or
org.apache.hadoop.hive.ql.udf.UDFParseUrl
parse_url(url, partToExtract[, key]) - extracts a part from a URL
Parts: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO key specifies which query to extract Example: > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'HOST') FROM src LIMIT 1; 'facebook.com' > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY') FROM src LIMIT 1; 'query=1' > SELECT parse_url('http://facebook.com/path/p1.php?query=1', 'QUERY', 'query') FROM src LIMIT 1; '1'
org.apache.hadoop.hive.ql.udf.generic.GenericUDTFParseUrlTuple
parse_url_tuple(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts from a URL.
It takes a URL and one or multiple partnames, and returns a tuple. All the input parameters and output column types are string.
Partname: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO, QUERY:<KEY_NAME> Note: Partnames are case-sensitive, and should not contain unnecessary white spaces. Example: > SELECT b.* FROM src LATERAL VIEW parse_url_tuple(fullurl, 'HOST', 'PATH', 'QUERY', 'QUERY:id') b as host, path, query, query_id LIMIT 1; > SELECT parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from src a;
org.apache.hadoop.hive.ql.udf.UDAFPercentile
percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox
percentile_approx(expr, pc, [nb]) - For very large data, computes an approximate percentile value from a histogram, using the optional argument [nb] as the number of histogram bins to use. A higher value of nb results in a more accurate approximation, at the cost of higher memory usage.
'expr' can be any numeric column, including doubles and floats, and 'pc' is either a single double/float with a requested percentile, or an array of double/float with multiple percentiles. If 'nb' is not specified, the default approximation is done with 10,000 histogram bins, which means that if there are 10,000 or fewer unique values in 'expr', you can expect an exact result. The percentile() function always computes an exact percentile and can run out of memory if there are too many unique values in a column, which necessitates this function. Example (three percentiles requested using a finer histogram approximation): > SELECT percentile_approx(val, array(0.5, 0.95, 0.98), 100000) FROM somedata; [0.05,1.64,2.26]
org.apache.hadoop.hive.ql.udf.UDFPosMod
a pmod b - Compute the positive modulo
org.apache.hadoop.hive.ql.udf.UDFOPPositive
positive a - Returns a
org.apache.hadoop.hive.ql.udf.UDFPower
pow(x1, x2) - raise x1 to the power of x2
Example: > SELECT pow(2, 3) FROM src LIMIT 1; 8
org.apache.hadoop.hive.ql.udf.UDFPower
power(x1, x2) - raise x1 to the power of x2
Example: > SELECT power(2, 3) FROM src LIMIT 1; 8
org.apache.hadoop.hive.ql.udf.UDFRadians
radians(x) - Converts degrees to radians
Example: > SELECT radians(90) FROM src LIMIT 1; 1.5707963267949mo
org.apache.hadoop.hive.ql.udf.UDFRand
rand([seed]) - Returns a pseudorandom number between 0 and 1
org.apache.hadoop.hive.ql.udf.generic.GenericUDFReflect
reflect(class,method[,arg1[,arg2..]]) calls method with reflection
Use this UDF to call Java methods by matching the argument signature
org.apache.hadoop.hive.ql.udf.UDFRegExp
str regexp regexp - Returns true if str matches regexp and false otherwise
Example: > SELECT 'fb' regexp '.*' FROM src LIMIT 1; true
org.apache.hadoop.hive.ql.udf.UDFRegExpExtract
regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp
Example: > SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1) FROM src LIMIT 1; '100'
org.apache.hadoop.hive.ql.udf.UDFRegExpReplace
regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep
Example: > SELECT regexp_replace('100-200', '(\d+)', 'num') FROM src LIMIT 1; 'num-num'
org.apache.hadoop.hive.ql.udf.UDFRepeat
repeat(str, n) - repeat str n times
Example: > SELECT repeat('123', 2) FROM src LIMIT 1; '123123'
org.apache.hadoop.hive.ql.udf.UDFReverse
reverse(str) - reverse str
Example: > SELECT reverse('Facebook') FROM src LIMIT 1; 'koobecaF'
org.apache.hadoop.hive.ql.udf.UDFRegExp
str rlike regexp - Returns true if str matches regexp and false otherwise
Example: > SELECT 'fb' rlike '.*' FROM src LIMIT 1; true
org.apache.hive.pdktest.Rot13
rot13(str) - Returns str with all characters transposed via rot13
Example: > SELECT rot13('Facebook') FROM src LIMIT 1; 'Snprobbx'
org.apache.hadoop.hive.ql.udf.UDFRound
round(x[, d]) - round x to d decimal places
Example: > SELECT round(12.3456, 1) FROM src LIMIT 1; 12.3'
org.apache.hadoop.hive.contrib.udf.UDFRowSequence
row_sequence() - Returns a generated row sequence number starting from 1
org.apache.hadoop.hive.ql.udf.UDFRpad
rpad(str, len, pad) - Returns str, right-padded with pad to a length of len
If str is longer than len, the return value is shortened to len characters. Example: > SELECT rpad('hi', 5, '??') FROM src LIMIT 1; 'hi???' > SELECT rpad('hi', 1, '??') FROM src LIMIT 1; 'h'
org.apache.hadoop.hive.ql.udf.UDFRTrim
rtrim(str) - Removes the trailing space characters from str
Example: > SELECT rtrim('facebook ') FROM src LIMIT 1; 'facebook'
org.apache.hadoop.hive.ql.udf.UDFSecond
second(date) - Returns the second of date
date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. Example: > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1; 59 > SELECT second('12:58:59') FROM src LIMIT 1; 59
org.apache.hadoop.hive.ql.udf.generic.GenericUDFSentences
sentences(str, lang, country) - Splits str into arrays of sentences, where each sentence is an array of words. The 'lang' and'country' arguments are optional, and if omitted, the default locale is used.
Example: > SELECT sentences('Hello there! I am a UDF.') FROM src LIMIT 1; [ ["Hello", "there"], ["I", "am", "a", "UDF"] ] > SELECT sentences(review, language) FROM movies; Unnecessary punctuation, such as periods and commas in English, is automatically stripped. If specified, 'lang' should be a two-letter ISO-639 language code (such as 'en'), and 'country' should be a two-letter ISO-3166 code (such as 'us'). Not all country and language codes are fully supported, and if an unsupported code is specified, a default locale is used to process that string.
org.apache.hadoop.hive.ql.udf.UDFSign
sign(x) - returns the sign of x )
Example: > SELECT sign(40) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFSin
sin(x) - returns the sine of x (x is in radians)
Example: > SELECT sin(0) FROM src LIMIT 1; 0
org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize
size(a) - Returns the size of a
org.apache.hadoop.hive.ql.udf.UDFSpace
space(n) - returns n spaces
Example: > SELECT space(2) FROM src LIMIT 1; ' '
org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit
split(str, regex) - Splits str around occurances that match regex
Example: > SELECT split('oneAtwoBthreeC', '[ABC]') FROM src LIMIT 1; ["one", "two", "three"]
org.apache.hadoop.hive.ql.udf.UDFSqrt
sqrt(x) - returns the square root of x
Example: > SELECT sqrt(4) FROM src LIMIT 1; 2
org.apache.hadoop.hive.ql.udf.generic.GenericUDTFStack
stack(n, cols...) - turns k columns into n rows of size k/n each
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd
std(x) - Returns the standard deviation of a set of numbers
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd
stddev(x) - Returns the standard deviation of a set of numbers
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStd
stddev_pop(x) - Returns the standard deviation of a set of numbers
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStdSample
stddev_samp(x) - Returns the sample standard deviation of a set of numbers
org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap
str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text
Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2.
org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct
struct(col1, col2, col3, ...) - Creates a struct with the given field values
org.apache.hadoop.hive.ql.udf.UDFSubstr
substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. Example: > SELECT substr('Facebook', 5) FROM src LIMIT 1; 'book' > SELECT substr('Facebook', -5) FROM src LIMIT 1; 'ebook' > SELECT substr('Facebook', 5, 1) FROM src LIMIT 1; 'b'
org.apache.hadoop.hive.ql.udf.UDFSubstr
substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str. Example: > SELECT substring('Facebook', 5) FROM src LIMIT 1; 'book' > SELECT substring('Facebook', -5) FROM src LIMIT 1; 'ebook' > SELECT substring('Facebook', 5, 1) FROM src LIMIT 1; 'b'
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum
sum(x) - Returns the sum of a set of numbers
org.apache.hadoop.hive.ql.udf.UDFTan
tan(x) - returns the tangent of x (x is in radians)
Example: > SELECT tan(0) FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFDate
to_date(expr) - Extracts the date part of the date or datetime expression expr
Example: > SELECT to_date('2009-30-07 04:17:52') FROM src LIMIT 1; '2009-30-07'
org.apache.hadoop.hive.ql.udf.UDFTrim
trim(str) - Removes the leading and trailing space characters from str
Example: > SELECT trim(' facebook ') FROM src LIMIT 1; 'facebook'
org.apache.hadoop.hive.ql.udf.UDFUpper
ucase(str) - Returns str with all characters changed to uppercase
Example: > SELECT ucase('Facebook') FROM src LIMIT 1; 'FACEBOOK'
org.apache.hadoop.hive.ql.udf.UDFUnhex
unhex(str) - Converts hexadecimal argument to string
Performs the inverse operation of HEX(str). That is, it interprets each pair of hexadecimal digits in the argument as a number and converts it to the character represented by the number. The resulting characters are returned as a binary string. Example: > SELECT UNHEX('4D7953514C') from src limit 1; 'MySQL' > SELECT UNHEX(HEX('string')) from src limit 1; 'string' > SELECT HEX(UNHEX('1267')) from src limit 1; '1267' The characters in the argument string must be legal hexadecimal digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters any nonhexadecimal digits in the argument, it returns NULL. Also, if there are an odd number of characters a leading 0 is appended.
org.apache.hive.builtins.UDAFUnionMap
union_map(col) - aggregate given maps into a single map
Aggregate maps, returns as a HashMap.
org.apache.hadoop.hive.ql.udf.UDFUnixTimeStamp
unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp
Converts the current or specified time to number of seconds since 1970-01-01.
org.apache.hadoop.hive.ql.udf.UDFUpper
upper(str) - Returns str with all characters changed to uppercase
Example: > SELECT upper('Facebook') FROM src LIMIT 1; 'FACEBOOK'
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance
var_pop(x) - Returns the variance of a set of numbers
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample
var_samp(x) - Returns the sample variance of a set of numbers
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance
variance(x) - Returns the variance of a set of numbers
org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath
xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression
Example: > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/text()') FROM src LIMIT 1 [] > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/b/text()') FROM src LIMIT 1 ["b1","b2","b3"] > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>', 'a/c/text()') FROM src LIMIT 1 ["c1","c2"]
org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean
xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression
Example: > SELECT xpath_boolean('<a><b>1</b></a>','a/b') FROM src LIMIT 1; true > SELECT xpath_boolean('<a><b>1</b></a>','a/b = 2') FROM src LIMIT 1; false
org.apache.hadoop.hive.ql.udf.xml.UDFXPathDouble
xpath_double(xml, xpath) - Returns a double value that matches the xpath expression
Example: > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1; 3.0
org.apache.hadoop.hive.ql.udf.xml.UDFXPathFloat
xpath_float(xml, xpath) - Returns a float value that matches the xpath expression
Example: > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1; 3.0
org.apache.hadoop.hive.ql.udf.xml.UDFXPathInteger
xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression
Example: > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1; 3
org.apache.hadoop.hive.ql.udf.xml.UDFXPathLong
xpath_long(xml, xpath) - Returns a long value that matches the xpath expression
Example: > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1; 3
org.apache.hadoop.hive.ql.udf.xml.UDFXPathDouble
xpath_number(xml, xpath) - Returns a double value that matches the xpath expression
Example: > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1; 3.0
org.apache.hadoop.hive.ql.udf.xml.UDFXPathShort
xpath_short(xml, xpath) - Returns a short value that matches the xpath expression
Example: > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)') FROM src LIMIT 1; 3
org.apache.hadoop.hive.ql.udf.xml.UDFXPathString
xpath_string(xml, xpath) - Returns the text contents of the first xml node that matches the xpath expression
Example: > SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') FROM src LIMIT 1; 'cc' > SELECT xpath_string('<a><b>b1</b><b>b2</b></a>','a/b') FROM src LIMIT 1; 'b1' > SELECT xpath_string('<a><b>b1</b><b>b2</b></a>','a/b[2]') FROM src LIMIT 1; 'b2' > SELECT xpath_string('<a><b>b1</b><b>b2</b></a>','a') FROM src LIMIT 1; 'b1b2'
org.apache.hadoop.hive.ql.udf.UDFYear
year(date) - Returns the year of date
date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. Example: > SELECT year('2009-30-07', 1) FROM src LIMIT 1; 2009
org.apache.hadoop.hive.ql.udf.UDFWeekOfYear
yearweek(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
Examples: > SELECT yearweek('2008-02-20') FROM src LIMIT 1; 8 > SELECT yearweek('1980-12-31 12:59:59') FROM src LIMIT 1; 1
org.apache.hadoop.hive.ql.udf.UDFOPBitOr
a | b - Bitwise or
Example: > SELECT 3 | 5 FROM src LIMIT 1; 7
org.apache.hadoop.hive.ql.udf.UDFOPBitNot
~ n - Bitwise not
Example: > SELECT ~ 0 FROM src LIMIT 1; -1