Difference between revisions of "Knowledge-Aware-Search-AQL"
(→Annotation Query Language) |
(→Annotation Query Language) |
||
Line 1: | Line 1: | ||
Note that complete details on the Annotation Query Language (AQL) are available on the [http://pic.dhe.ibm.com/infocenter/bigins/v1r2/index.jsp?topic=%2Fcom.ibm.swg.im.infosphere.biginsights.doc%2Fdoc%2Fbiginsights_aqlref_con_aql-overview.html IBM website]. We summarize some of the queries used specifically in our work here. | Note that complete details on the Annotation Query Language (AQL) are available on the [http://pic.dhe.ibm.com/infocenter/bigins/v1r2/index.jsp?topic=%2Fcom.ibm.swg.im.infosphere.biginsights.doc%2Fdoc%2Fbiginsights_aqlref_con_aql-overview.html IBM website]. We summarize some of the queries used specifically in our work here. | ||
=Annotation Query Language= | =Annotation Query Language= | ||
+ | <font color="red">Note that for the Evaluation, all queries were run on THE SAME SITE for Google, Hakia, DuckDuckGo and our System</font> | ||
'''create dictionary''' Buprenorphine_dict '''as''' | '''create dictionary''' Buprenorphine_dict '''as''' | ||
( | ( |
Revision as of 03:48, 23 May 2013
Note that complete details on the Annotation Query Language (AQL) are available on the IBM website. We summarize some of the queries used specifically in our work here.
Annotation Query Language
Note that for the Evaluation, all queries were run on THE SAME SITE for Google, Hakia, DuckDuckGo and our System
create dictionary Buprenorphine_dict as ( 'Buprel', 'Buprenex', 'Buprenorphine', 'Buprenorphine analgesic', 'Buprenorphine opioid dependence', 'Probuphine', 'Subbies', 'Suboxone', 'Suboxone film', 'Suboxone tablet', 'Subs', 'Subutex', 'Temgesic', 'film', 'films', 'strip', 'strips', 'sub', 'tecs', 'tex', 'Zubsolv' );
create view Buprenorphine_view as extract dictionaries 'Buprenorphine_dict' on D.text as buprenorphine from Document D;
create dictionary demonstrative_pronoun_dict as ( 'this', 'that', 'these', 'those' );
create dictionary personal_pronoun_dict as ( 'i', 'me','you', 'she','her', 'he','him', 'it', 'we','us', 'they','them' );
create dictionary possessive_pronoun_dict as ( 'my', 'your', 'our', 'ours', 'your','yours', 'his','her', 'hers', 'its', 'their','theirs', 'mine' );
create dictionary reflexive_pronoun_dict as ( 'myself', 'ourselves', 'yourself','yourselves', 'himself','herself', 'itself','themselves' );
create dictionary relative_pronoun_dict as ( 'that', 'which', 'who', 'whom', 'whose', 'whichever', 'whoever', 'whomever' );
create dictionary interrogative_pronoun_dict as ( 'what', 'who', 'whom', 'which', 'whose' );
create dictionary indefinite_pronoun_dict as ( 'anybody', 'anyone','anything', 'each','either', 'everybody','everyone','everything', 'neither','nobody','no one','nothing','one', 'somebody','someone','something', 'both','few','many','several', 'all','any','most','none','some' );
create view Pronounview as extract dictionaries 'personal_pronoun_dict' and 'relative_pronoun_dict' and 'demonstrative_pronoun_dict' and 'indefinite_pronoun_dict' and 'reflexive_pronoun_dict' and 'interrogative_pronoun_dict' and 'possessive_pronoun_dict' on D.text as match from Document D;
The following production DrugPronounview detects an ENTITY followed by a PRONOUN (e.g. bupe I took.)
create view DrugPronounview as select CombineSpans(D.drug,P.match) as match from 'Drug_view' D,'Pronounview' P where FollowsTok( D.drug,P.match, 0, 4);
The following Nonterminal NUMERIC_AMOUNT detects numeric amounts (e.g. 10, 7/7, 80-90, 0.002, 2.5)
create view NUMERIC_AMOUNT as extract regex /\d{1,3}\.?(\/|\\|-|=)?\d*/ on D.text as numberunit from Document D;
The following Nonterminal detects a unit, same as Unit_dict
create view OnlyUnitView as select RightContextTok(N.numberunit,1) as unit from NUMERIC_AMOUNT N where ContainsDict('Unit_dict',RightContextTok(N.numberunit,1)) and ContainsRegex(/\d{1,3}\.?(-|\s|\(|=|\/|\\)?\d*/,N.numberunit);
The following production NUMERIC_AMOUNT_UNIT detects an amount followed by a unit (e.g. 300mg, 1/2CC, 37.5mg, 80-90mg)
create view NUMERIC_AMOUNT_UNIT as select CombineSpans(NA.numberunit, U.unit) as match from NUMERIC_AMOUNT NA, OnlyUnitView U where FollowsTok(NA.numberunit, U.unit, 0, 1);
create dictionary wordedAmount_dict as ( 'one','once','two','twice', 'three','thrice','four','five', 'six','seven','eight','nine', 'ten','eleven','twelve','thirteen', 'fourteen','fifteen','sixteen', 'seventeen','eighteen','nineteen', 'twenty','thirty','forty','fifty', 'sixty','seventy','eighty','ninty', 'hundred' );
The following Nonterminal detects a WordedAmount, same as wordedAmount_dict
create view WordedAmount as extract dictionary 'wordedAmount_dict' on D.text as match from Document D;
create dictionary Unit_dict as ( 'cc', 'ccs', 'cubic c', 'cubic cs', 'cubic centimeter', 'cubic centimeters', 'cubic centimetres', 'cubic centimetre', 'cubic centi-meter', 'cubic centi-meters', 'cubic centi-metre', 'cubic centi-metres', 'bag', 'bags', 'lb', 'lbs', 'pound', 'pounds', 'g', 'gs', 'gm', 'gms', 'gram', 'grams', 'mg', 'mgs', 'milligram', 'milligrams', 'milli-gram', 'milli-grams', 'kg', 'kgs', 'kilogram', 'kilograms', 'kilo-gram', 'kilo-grams', 'ug', 'ugs', 'mcg', 'mcgs', 'microgram', 'micrograms', 'micro-gram', 'micro-grams', 'litre', 'litres', 'liter', 'liters', 'ml','mls', 'millilitre', 'millilitres', 'milliliter', 'milliliters', 'milli-litre', 'milli-litres', 'milli-liter', 'milli-liters', 'mcl','mcls', 'microlitre', 'microlitres', 'microliter', 'microliters', 'micro-litre', 'micro-litres', 'micro-liter', 'micro-liters', 'oz', 'ozs', 'ounce', 'ounces', 'tablet', 'tablets', 'tab', 'tabs' );
create view Unit_view as extract dictionaries 'Unit_dict' on D.text as unit from Document D;
The following production WORDED_NUMERIC_AMOUNT_UNIT detects a dosage expressed as a word followed by a unit (e.g. four mg, six milligrams)
create view WORDED_NUMERIC_AMOUNT_UNIT as select CombineSpans(WN.match, U.unit) as match from WordedAmount WN, Unit_view U where FollowsTok(WN.match, U.unit, 0, 1);
The following Nonterminal DosageView combines all dosage expressions (e.g. 37.5mg, 80-90mg, four mg, six milligrams)
create view DosageView as (select N.match from NUMERIC_AMOUNT_UNIT N) union all (select W.match from WORDED_NUMERIC_AMOUNT_UNIT W);
create view DrugPronounDosageView as select CombineSpans(D.match,N.match) as match from DrugPronounview D, NUMERIC_AMOUNT_UNIT N where FollowsTok( D.match,N.match, 0, 4);
create dictionary pastperiodDeterminer as ( 'ago','prior',' previous','since','before','last','past' );
create dictionary presentperiodDeterminer as ( 'now','about','around','several','couple','every','all','this' );
create dictionary futureperiodDeterminer as ( 'next','later','after' );
create dictionary hourtimeIndicator as ( 'hour', 'hours', 'hrs','hr' );
create dictionary minutetimeIndicator as ( 'minute', 'minutes', 'mins','min' );
create dictionary secondtimeIndicator as ( 'second', 'seconds', 'sec', 'secs' );
create dictionary decadedurationIndicator as ( 'decade','decades' );
create dictionary yeardurationIndicator as ( 'year', 'years', 'yrs','yr', 'annum' );
create dictionary monthdurationIndicator as ( 'month', 'months', 'mnth','mths','mo' );
create dictionary weekdurationIndicator as ( 'wk','week', 'weeks','wks' );
create dictionary daydurationIndicator as ( 'day', 'days', 'night','nights','nite','nites', 'morning','mornings','mornin', 'evening','evenings', 'afternoon','noon' );
The following Nonterminal detects a PAST_PERIOD_DETERMINER, same as pastperiodDeterminer
create view PAST_PERIOD_DETERMINER as extract dictionary 'pastperiodDeterminer' on D.text as match from Document D;
The following Nonterminal detects a PRESENT_PERIOD_DETERMINER, same as presentperiodDeterminer
create view PRESENT_PERIOD_DETERMINER as extract dictionary 'presentperiodDeterminer' on D.text as match from Document D;
The following Nonterminal detects a FUTURE_PERIOD_DETERMINER, same as futureperiodDeterminer
create view FUTURE_PERIOD_DETERMINER as extract dictionary 'futureperiodDeterminer' on D.text as match from Document D;
The following Nonterminal PERIOD_DETERMINER combines all period expressions
create view PERIOD_DETERMINER as extract dictionaries 'pastperiodDeterminer' and 'presentperiodDeterminer' and 'futureperiodDeterminer' on D.text as match from Document D;
The following Nonterminal TIME_INTERVAL combines all time expressions
create view TIME_INTERVAL as extract dictionaries 'hourtimeIndicator' and 'minutetimeIndicator' and 'secondtimeIndicator' on D.text as match from Document D;
The following Nonterminal DURATION_INDICATOR combines all duration expressions
create view DURATION_INDICATOR as extract dictionaries 'decadedurationIndicator' and 'yeardurationIndicator' and 'monthdurationIndicator' and 'weekdurationIndicator' and 'daydurationIndicator' on D.text as match from Document D;
The following production DURATION_PAST_PERIOD detects a Duration identifier followed by a time when (e.g. years ago, weeks prior)
create view DURATION_PAST_PERIOD as select CombineSpans(DI.match, PSD.match) as match from DURATION_INDICATOR DI, PAST_PERIOD_DETERMINER PSD where FollowsTok(DI.match, PSD.match, 0, 1);
The following production DURATION_PRESENT_PERIOD detects a Duration identifier followed by a time when
create view DURATION_PRESENT_PERIOD as select CombineSpans(DI.match, PPD.match) as match from DURATION_INDICATOR DI, PRESENT_PERIOD_DETERMINER PPD where FollowsTok(DI.match, PPD.match, 0, 1);
The following production DURATION_FUTURE_PERIOD detects a Duration identifier followed by a time when (e.g. weeks later, days after)
create view DURATION_FUTURE_PERIOD as select CombineSpans(DI.match, FD.match) as match from DURATION_INDICATOR DI, FUTURE_PERIOD_DETERMINER FD where FollowsTok(DI.match, FD.match, 0, 1);
The following production PAST_PERIOD_DURATION detects a time when, followed by a Duration (e.g. last year, previous day)
create view PAST_PERIOD_DURATION as select CombineSpans(PSD.match, DI.match) as match from PAST_PERIOD_DETERMINER PSD , DURATION_INDICATOR DI where FollowsTok(PSD.match, DI.match, 0, 1);
The following production PRESENT_PERIOD_DURATION detects a time when, followed by a Duration(e.g. about a year, around a month)
create view PRESENT_PERIOD_DURATION as select CombineSpans(PPD.match, DI.match) as match from PRESENT_PERIOD_DETERMINER PPD , DURATION_INDICATOR DI where FollowsTok(PPD.match, DI.match, 0, 1);
The following production FUTURE_PERIOD_DURATION detects a time when, followed by a Duration (e.g. later years, next day)
create view FUTURE_PERIOD_DURATION as select CombineSpans(FD.match, DI.match) as match from FUTURE_PERIOD_DETERMINER FD, DURATION_INDICATOR DI where FollowsTok(FD.match, DI.match, 0, 1);
The following production TIME_PAST_PERIOD detects a Time indicator followed by a time when (e.g. hours ago, minutes before)
create view TIME_PAST_PERIOD as select CombineSpans(TI.match, PSD.match) as match from TIME_INTERVAL TI, PAST_PERIOD_DETERMINER PSD where FollowsTok(TI.match, PSD.match, 0, 1);
The following production TIME_PRESENT_PERIOD detects a Time indicator followed by a time when (e.g. hours now)
create view TIME_PRESENT_PERIOD as select CombineSpans(TI.match, PPD.match) as match from TIME_INTERVAL TI, PRESENT_PERIOD_DETERMINER PPD where FollowsTok(TI.match, PPD.match, 0, 1);
The following production TIME_FUTURE_PERIOD detects a Time indicator followed by a time when (e.g. hours later, minutes after)
create view TIME_FUTURE_PERIOD as select CombineSpans(TI.match, FD.match) as match from TIME_INTERVAL TI, FUTURE_PERIOD_DETERMINER FD where FollowsTok(TI.match, FD.match, 0, 1);
The following production PAST_PERIOD_TIME detects a time when followed by a Time indicator (e.g. last hour, last minutes)
create view PAST_PERIOD_TIME as select CombineSpans(PSD.match, TI.match) as match from PAST_PERIOD_DETERMINER PSD, TIME_INTERVAL TI where FollowsTok(PSD.match, TI.match, 0, 1);
The following production PRESENT_PERIOD_TIME detects a time when followed by a Time indicator (e.g. several hour, couple of minutes)
create view PRESENT_PERIOD_TIME as select CombineSpans(PPD.match, TI.match) as match from PRESENT_PERIOD_DETERMINER PPD, TIME_INTERVAL TI where FollowsTok(PPD.match, TI.match, 0, 1);
The following production FUTURE_PERIOD_TIME detects a time when followed by a Time indicator (e.g. next hour, after a minute)
create view FUTURE_PERIOD_TIME as select CombineSpans(FD.match, TI.match) as match from FUTURE_PERIOD_DETERMINER FD, TIME_INTERVAL TI where FollowsTok(FD.match, TI.match, 0, 1);
The following production AMOUNT_TIME_PAST_PERIOD detects an amount associated with a time period (e.g. 5 minutes ago)
create view AMOUNT_TIME_PAST_PERIOD as select CombineSpans(A.match, TSP.match) as match from AMOUNT A, TIME_PAST_PERIOD TSP where FollowsTok(A.match, TSP.match, 0, 1);
The following production AMOUNT_TIME_PRESENT_PERIOD detects an amount associated with a time period (e.g. 10 hours now)
create view AMOUNT_TIME_PRESENT_PERIOD as select CombineSpans(A.match, TPP.match) as match from AMOUNT A, TIME_PRESENT_PERIOD TPP where FollowsTok(A.match, TPP.match, 0, 1);
The following production AMOUNT_TIME_FUTURE_PERIOD detects an amount associated with a time period (e.g. 5 minutes later)
create view AMOUNT_TIME_FUTURE_PERIOD as select CombineSpans(A.match, TFP.match) as match from AMOUNT A, TIME_FUTURE_PERIOD TFP where FollowsTok(A.match, TFP.match, 0, 1);
The following production AMOUNT_TIME detects an amount associated with a time indicator (e.g. 15 seconds)
create view AMOUNT_TIME as select CombineSpans(A.match, T.match) as match from AMOUNT A, TIME_INTERVAL T where FollowsTok(A.match, T.match, 0, 1);
The following production PAST_PERIOD_AMOUNT_TIME detects an amount associated with a time period (e.g. last 2 hours, past 2 minutes)
create view PAST_PERIOD_AMOUNT_TIME as select CombineSpans(PSD.match, AT.match) as match from PAST_PERIOD_DETERMINER PSD, AMOUNT_TIME AT where FollowsTok(PSD.match, AT.match, 0, 1);
The following production PRESENT_PERIOD_AMOUNT_TIME detects an amount associated with a time period (e.g. around 2 hours)
create view PRESENT_PERIOD_AMOUNT_TIME as select CombineSpans(PPD.match, AT.match) as match from PRESENT_PERIOD_DETERMINER PPD, AMOUNT_TIME AT where FollowsTok(PPD.match, AT.match, 0, 1);
The following production FUTURE_PERIOD_AMOUNT_TIME detects an amount associated with a time period (e.g. next 15 seconds, after 2 minutes)
create view FUTURE_PERIOD_AMOUNT_TIME as select CombineSpans(FD.match, AT.match) as match from FUTURE_PERIOD_DETERMINER FD, AMOUNT_TIME AT where FollowsTok(FD.match, AT.match, 0, 1);
The following production AMOUNT_DURATION_PAST_PERIOD detects an amount associated with an interval period (e.g. 5 years ago )
create view AMOUNT_DURATION_PAST_PERIOD as select CombineSpans(A.match, DSP.match) as match from AMOUNT A, DURATION_PAST_PERIOD DSP where FollowsTok(A.match, DSP.match, 0, 1);
The following production AMOUNT_DURATION_PRESENT_PERIOD detects an amount associated with an interval period (e.g. 5 years now)
create view AMOUNT_DURATION_PRESENT_PERIOD as select CombineSpans(A.match, DPP.match) as match from AMOUNT A, DURATION_PRESENT_PERIOD DPP where FollowsTok(A.match, DPP.match, 0, 1);
The following production AMOUNT_DURATION_FUTURE_PERIOD detects an amount associated with an interval period (e.g. 5 years later, 9 months after)
create view AMOUNT_DURATION_FUTURE_PERIOD as select CombineSpans(A.match, DFP.match) as match from AMOUNT A, DURATION_FUTURE_PERIOD DFP where FollowsTok(A.match, DFP.match, 0, 1);
The following production AMOUNT_DURATION detects an amount associated with an interval indicator (e.g. 15 months)
create view AMOUNT_DURATION as select CombineSpans(A.match, DI.match) as match from AMOUNT A, DURATION_INDICATOR DI where FollowsTok(A.match, DI.match, 0, 1);
The following production PAST_PERIOD_AMOUNT_DURATION detects an amount associated with a time period (e.g. last 15 weeks, last 2 years)
create view PAST_PERIOD_AMOUNT_DURATION as select CombineSpans(PSD.match, AD.match) as match from PAST_PERIOD_DETERMINER PSD, AMOUNT_DURATION AD where FollowsTok(PSD.match, AD.match, 0, 1);
The following production PRESENT_PERIOD_AMOUNT_DURATION detects an amount associated with a time period (e.g. about 3 monts, around five years)
create view PRESENT_PERIOD_AMOUNT_DURATION as select CombineSpans(PPD.match, AD.match) as match from PRESENT_PERIOD_DETERMINER PPD, AMOUNT_DURATION AD where FollowsTok(PPD.match, AD.match, 0, 1);
The following production FUTURE_PERIOD_AMOUNT_DURATION detects an amount associated with a time period (e.g. next 15 weeks, after 2 years)
create view FUTURE_PERIOD_AMOUNT_DURATION as select CombineSpans(FPD.match, AD.match) as match from FUTURE_PERIOD_DETERMINER FPD, AMOUNT_DURATION AD where FollowsTok(FPD.match, AD.match, 0, 1);
The following Nonterminal DURATION_PERIOD combines all duration expressions followed by a past, present or future period
create view DURATION_PERIOD as (select DSP.* from DURATION_PAST_PERIOD DSP) union all (select DPP.* from DURATION_PRESENT_PERIOD DPP) union all (select DFP.* from DURATION_FUTURE_PERIOD DFP);
The following Nonterminal PERIOD_DURATION combines all period expressions of past, present and future followed by a duration
create view PERIOD_DURATION as (select PSD.* from PAST_PERIOD_DURATION PSD) union all (select PPD.* from PRESENT_PERIOD_DURATION PPD) union all (select FPD.* from FUTURE_PERIOD_DURATION FPD);
The following Nonterminal TIME_PERIOD combines all time expressions followed by past, present or future period expression
create view TIME_PERIOD as (select TSP.* from TIME_PAST_PERIOD TSP) union all (select TPP.* from TIME_PRESENT_PERIOD TPP) union all (select TFP.* from TIME_FUTURE_PERIOD TFP);
The following Nonterminal PERIOD_TIME combines all period expressions of past, present or future followed by time expressions
create view PERIOD_TIME as (select PST.* from PAST_PERIOD_TIME PST) union all (select PPT.* from PRESENT_PERIOD_TIME PPT) union all (select FPT.* from FUTURE_PERIOD_TIME FPT);
The following Nonterminal AMOUNT_TIME_PERIOD combines all expressions of an amount followed with a TIME_PERIOD
create view AMOUNT_TIME_PERIOD as (select ATSP.* from AMOUNT_TIME_PAST_PERIOD ATSP) union all (select ATPP.* from AMOUNT_TIME_PRESENT_PERIOD ATPP) union all (select ATFP.* from AMOUNT_TIME_FUTURE_PERIOD ATFP);
The following Nonterminal PERIOD_AMOUNT_TIME combines all expressions of a period followed with a AMOUNT_TIME
create view PERIOD_AMOUNT_TIME as (select PSAT.* from PAST_PERIOD_AMOUNT_TIME PSAT) union all (select PPAT.* from PRESENT_PERIOD_AMOUNT_TIME PPAT) union all (select FPAT.* from FUTURE_PERIOD_AMOUNT_TIME FPAT);
The following Nonterminal AMOUNT_DURATION_PERIOD combines all expressions of an amount followed with a DURATION_PERIOD
create view AMOUNT_DURATION_PERIOD as (select ADSP.* from AMOUNT_DURATION_PAST_PERIOD ADSP) union all (select ADPP.* from AMOUNT_DURATION_PRESENT_PERIOD ADPP) union all (select AFDP.* from AMOUNT_DURATION_FUTURE_PERIOD AFDP);
The following Nonterminal PERIOD_AMOUNT_DURATION combines all expressions of a period followed with a AMOUNT_DURATION
create view PERIOD_AMOUNT_DURATION as (select PSAD.* from PAST_PERIOD_AMOUNT_DURATION PSAD) union all (select PPAD.* from PRESENT_PERIOD_AMOUNT_DURATION PPAD) union all (select FPAD.* from FUTURE_PERIOD_AMOUNT_DURATION FPAD);
The following Nonterminal Intervalview combines all Interval expressions of past, present and future
create view Intervalview as (select AD.* from AMOUNT_DURATION AD) union all (select AT.* from AMOUNT_TIME AT) union all (select DP.* from DURATION_PERIOD DP) union all (select DP.* from PERIOD_DURATION DP) union all (select TP.* from TIME_PERIOD TP) union all (select PT.* from PERIOD_TIME PT) union all (select ATP.* from AMOUNT_TIME_PERIOD ATP) union all (select PAT.* from PERIOD_AMOUNT_TIME PAT) union all (select ADP.* from AMOUNT_DURATION_PERIOD ADP) union all (select PAD.* from PERIOD_AMOUNT_DURATION PAD);
The following production DrugPronounDosageIntervalView detects an ENTITY followed by a PRONOUN followed by a DOSAGE and then an INTERVAL (e.g. bupe I took 16mg a day, subs i had 80-90mg every day.)
create view DrugPronounDosageIntervalView as select CombineSpans(D.match,I.match) as match from DrugPronounDosageView D, Intervalview I where FollowsTok( D.match,I.match, 0, 4);